@@ -125,12 +125,6 @@ pub struct Io {
125125pub const KEEPALIVE_PACKET_SIZE : u64 = 32 ;
126126/// Default wireguard keepalive duration
127127pub const WG_KEEPALIVE : Duration = Duration :: from_secs ( 10 ) ;
128- /// Used to ensure uapi errors happen for at least this long before considering critical error event.
129- /// The failures are usually transient and observed during power transitions when using WireguardNT.
130- const REQUIRED_CRITICAL_ERROR_GAP : Duration = Duration :: from_secs ( 15 ) ;
131- /// Used to detect long gaps between UAPI commands that suggest the device might have been suspended.
132- /// Usually libtelio should poll every second.
133- const MAX_UAPI_CMD_GAP : Duration = Duration :: from_secs ( 30 ) ;
134128
135129pub use telio_utils:: { BytesAndTimestamps , Instant } ;
136130
@@ -139,12 +133,12 @@ pub use telio_utils::{BytesAndTimestamps, Instant};
139133#[ derive( Debug ) ]
140134struct UAPIErrorTracker {
141135 // Minimum required time between first error and later ones to consider returning critical verdict.
142- required_critical_error_gap : Duration ,
136+ adapter_gone_error_threshold : Duration ,
143137
144138 // Maximum time allowed between uapi commands.
145- max_uapi_cmd_gap : Duration ,
139+ adapter_gone_max_uapi_inerval : Duration ,
146140
147- // Timestamp of the first error from which the required_critical_error_gap is measured.
141+ // Timestamp of the first error from which the adapter_gone_error_threshold is measured.
148142 first_error_at : Option < Instant > ,
149143
150144 // Timestamp of the most recent UAPI command.
@@ -160,10 +154,13 @@ enum UAPIErrorTrackerVerdict {
160154}
161155
162156impl UAPIErrorTracker {
163- fn new ( required_critical_error_gap : Duration , max_uapi_cmd_gap : Duration ) -> Self {
157+ fn new (
158+ adapter_gone_error_threshold : Duration ,
159+ adapter_gone_max_uapi_inerval : Duration ,
160+ ) -> Self {
164161 UAPIErrorTracker {
165- required_critical_error_gap ,
166- max_uapi_cmd_gap ,
162+ adapter_gone_error_threshold ,
163+ adapter_gone_max_uapi_inerval ,
167164 first_error_at : None ,
168165 last_uapi_cmd_at : None ,
169166 }
@@ -177,16 +174,16 @@ impl UAPIErrorTracker {
177174 return UAPIErrorTrackerVerdict :: OK ;
178175 }
179176
180- let uapi_within_allowed_window =
181- prev_uapi_cmd_time . is_none_or ( |t| inst. duration_since ( t) < self . max_uapi_cmd_gap ) ;
177+ let uapi_within_allowed_window = prev_uapi_cmd_time
178+ . is_none_or ( |t| inst. duration_since ( t) < self . adapter_gone_max_uapi_inerval ) ;
182179
183180 if !uapi_within_allowed_window {
184181 self . first_error_at = Some ( inst) ;
185182 }
186183
187184 match self . first_error_at {
188185 Some ( t) => {
189- if inst. duration_since ( t) >= self . required_critical_error_gap
186+ if inst. duration_since ( t) >= self . adapter_gone_error_threshold
190187 && resp. interface . is_none ( )
191188 {
192189 self . first_error_at = None ;
@@ -298,7 +295,9 @@ impl DynamicWg {
298295 /// },
299296 /// None,
300297 /// Duration::from_millis(1000),
301- /// Duration::from_millis(50)
298+ /// Duration::from_millis(50),
299+ /// Duration::from_secs(15),
300+ /// Duration::from_secs(30),
302301 /// );
303302 /// }
304303 /// ```
@@ -308,6 +307,8 @@ impl DynamicWg {
308307 link_detection : Option < LinkDetection > ,
309308 polling_period : Duration ,
310309 polling_period_after_update : Duration ,
310+ adapter_gone_error_duration : Duration ,
311+ adapter_gone_error_max_uapi_interval : Duration ,
311312 ) -> Result < Self , Error >
312313 where
313314 Self : Sized ,
@@ -321,6 +322,8 @@ impl DynamicWg {
321322 cfg,
322323 polling_period,
323324 polling_period_after_update,
325+ adapter_gone_error_duration,
326+ adapter_gone_error_max_uapi_interval,
324327 ) ) ;
325328 #[ cfg( windows) ]
326329 return Ok ( Self :: start_with (
@@ -329,16 +332,21 @@ impl DynamicWg {
329332 link_detection,
330333 polling_period,
331334 polling_period_after_update,
335+ adapter_gone_error_duration,
336+ adapter_gone_error_max_uapi_interval,
332337 ) ) ;
333338 }
334339
340+ #[ allow( clippy:: too_many_arguments) ]
335341 fn start_with (
336342 io : Io ,
337343 adapter : Box < dyn Adapter > ,
338344 link_detection : Option < LinkDetection > ,
339345 #[ cfg( unix) ] cfg : Config ,
340346 polling_period : Duration ,
341347 polling_period_after_update : Duration ,
348+ adapter_gone_error_duration : Duration ,
349+ adapter_gone_error_max_uapi_interval : Duration ,
342350 ) -> Self {
343351 let interval = interval ( polling_period) ;
344352 Self {
@@ -351,8 +359,8 @@ impl DynamicWg {
351359 event : io. events ,
352360 analytics_tx : io. analytics_tx ,
353361 uapi_error_tracker : UAPIErrorTracker :: new (
354- REQUIRED_CRITICAL_ERROR_GAP ,
355- MAX_UAPI_CMD_GAP ,
362+ adapter_gone_error_duration ,
363+ adapter_gone_error_max_uapi_interval ,
356364 ) ,
357365 link_detection,
358366 libtelio_event : io. libtelio_wide_event_publisher ,
@@ -1095,6 +1103,8 @@ pub mod tests {
10951103
10961104 const DEFAULT_POLLING_PERIOD_MS : u64 = 1000 ;
10971105 const DEFAULT_POLLING_PERIOD_AFTER_UPDATE_MS : u64 = 50 ;
1106+ const ADAPTER_GONE_ERROR_THRESHOLD_S : u64 = 15 ;
1107+ const ADAPTER_GONE_MAX_UAPI_INERVAL_S : u64 = 30 ;
10981108
10991109 fn random_interface ( ) -> Interface {
11001110 let mut rng = rand:: thread_rng ( ) ;
@@ -1253,6 +1263,8 @@ pub mod tests {
12531263 cfg,
12541264 Duration :: from_millis ( DEFAULT_POLLING_PERIOD_MS ) ,
12551265 Duration :: from_millis ( DEFAULT_POLLING_PERIOD_AFTER_UPDATE_MS ) ,
1266+ Duration :: from_secs ( ADAPTER_GONE_ERROR_THRESHOLD_S ) ,
1267+ Duration :: from_secs ( ADAPTER_GONE_MAX_UAPI_INERVAL_S ) ,
12561268 ) ;
12571269 time:: advance ( Duration :: from_millis ( 0 ) ) . await ;
12581270 adapter. lock ( ) . await . checkpoint ( ) ;
@@ -1276,6 +1288,138 @@ pub mod tests {
12761288 wg. stop ( ) . await ;
12771289 }
12781290
1291+ #[ tokio:: test( start_paused = true ) ]
1292+ async fn wg_test_uapi_critical_error ( ) {
1293+ struct ErrorReturningAdapter { }
1294+
1295+ #[ async_trait]
1296+ impl Adapter for ErrorReturningAdapter {
1297+ async fn send_uapi_cmd ( & self , _cmd : & Cmd ) -> Result < Response , AdapterError > {
1298+ return Ok ( Response {
1299+ errno : 2 ,
1300+ interface : None ,
1301+ } ) ;
1302+ }
1303+
1304+ async fn stop ( & self ) { }
1305+ fn get_adapter_luid ( & self ) -> u64 {
1306+ 0
1307+ }
1308+ async fn set_tun ( & self , _tun : Tun ) -> Result < ( ) , Error > {
1309+ Ok ( ( ) )
1310+ }
1311+ fn clone_box ( & self ) -> Option < Box < dyn Adapter > > {
1312+ None
1313+ }
1314+ }
1315+
1316+ let events_ch = Chan :: default ( ) ;
1317+ let ( event_tx, mut event_rx) = tokio:: sync:: broadcast:: channel ( 256 ) ;
1318+
1319+ let analytics_ch = Some ( McChan :: default ( ) . tx ) ;
1320+ let wg = DynamicWg :: start_with (
1321+ Io {
1322+ events : events_ch. tx ,
1323+ analytics_tx : analytics_ch. clone ( ) ,
1324+ libtelio_wide_event_publisher : Some ( event_tx) ,
1325+ } ,
1326+ Box :: new ( ErrorReturningAdapter { } ) ,
1327+ None ,
1328+ #[ cfg( all( unix, test) ) ]
1329+ Config :: new ( ) . unwrap ( ) ,
1330+ #[ cfg( all( unix, not( test) ) ) ]
1331+ cfg,
1332+ Duration :: from_millis ( DEFAULT_POLLING_PERIOD_MS ) ,
1333+ Duration :: from_millis ( DEFAULT_POLLING_PERIOD_AFTER_UPDATE_MS ) ,
1334+ Duration :: from_secs ( ADAPTER_GONE_ERROR_THRESHOLD_S ) ,
1335+ Duration :: from_secs ( ADAPTER_GONE_MAX_UAPI_INERVAL_S ) ,
1336+ ) ;
1337+
1338+ // Now lets advance time and eventually expect critical error since adapter is gone
1339+ for _ in 0 ..ADAPTER_GONE_ERROR_THRESHOLD_S {
1340+ tokio:: time:: advance ( Duration :: from_secs ( 1 ) ) . await ;
1341+ tokio:: task:: yield_now ( ) . await ;
1342+ }
1343+
1344+ assert_eq ! ( event_rx. len( ) , 0 ) ;
1345+
1346+ tokio:: time:: advance ( Duration :: from_secs ( 1 ) ) . await ;
1347+ tokio:: task:: yield_now ( ) . await ;
1348+
1349+ assert ! ( matches!(
1350+ * event_rx. try_recv( ) . unwrap( ) ,
1351+ telio_model:: event:: Event :: Error { body }
1352+ if body. code == ErrorCode :: Unknown
1353+ && body. level == ErrorLevel :: Critical
1354+ && body. msg == "Interface gone"
1355+ ) ) ;
1356+
1357+ wg. stop ( ) . await ;
1358+ }
1359+
1360+ #[ tokio:: test( start_paused = true ) ]
1361+ async fn wg_test_uapi_no_critical_error ( ) {
1362+ struct ErrorReturningAdapter { }
1363+
1364+ #[ async_trait]
1365+ impl Adapter for ErrorReturningAdapter {
1366+ async fn send_uapi_cmd ( & self , _cmd : & Cmd ) -> Result < Response , AdapterError > {
1367+ return Ok ( Response {
1368+ errno : 2 ,
1369+ interface : Some ( Interface :: default ( ) ) ,
1370+ } ) ;
1371+ }
1372+
1373+ async fn stop ( & self ) { }
1374+ fn get_adapter_luid ( & self ) -> u64 {
1375+ 0
1376+ }
1377+ async fn set_tun ( & self , _tun : Tun ) -> Result < ( ) , Error > {
1378+ Ok ( ( ) )
1379+ }
1380+ fn clone_box ( & self ) -> Option < Box < dyn Adapter > > {
1381+ None
1382+ }
1383+ }
1384+
1385+ let events_ch = Chan :: default ( ) ;
1386+ let ( event_tx, event_rx) = tokio:: sync:: broadcast:: channel ( 256 ) ;
1387+
1388+ let analytics_ch = Some ( McChan :: default ( ) . tx ) ;
1389+ let wg = DynamicWg :: start_with (
1390+ Io {
1391+ events : events_ch. tx ,
1392+ analytics_tx : analytics_ch. clone ( ) ,
1393+ libtelio_wide_event_publisher : Some ( event_tx) ,
1394+ } ,
1395+ Box :: new ( ErrorReturningAdapter { } ) ,
1396+ None ,
1397+ #[ cfg( all( unix, test) ) ]
1398+ Config :: new ( ) . unwrap ( ) ,
1399+ #[ cfg( all( unix, not( test) ) ) ]
1400+ cfg,
1401+ Duration :: from_millis ( DEFAULT_POLLING_PERIOD_MS ) ,
1402+ Duration :: from_millis ( DEFAULT_POLLING_PERIOD_AFTER_UPDATE_MS ) ,
1403+ Duration :: from_secs ( ADAPTER_GONE_ERROR_THRESHOLD_S ) ,
1404+ Duration :: from_secs ( ADAPTER_GONE_MAX_UAPI_INERVAL_S ) ,
1405+ ) ;
1406+
1407+ // Now lets advance time and since adapter is present, no critical event should be there
1408+ for _ in 0 ..ADAPTER_GONE_ERROR_THRESHOLD_S {
1409+ tokio:: time:: advance ( Duration :: from_secs ( 1 ) ) . await ;
1410+ tokio:: task:: yield_now ( ) . await ;
1411+ }
1412+
1413+ assert_eq ! ( event_rx. len( ) , 0 ) ;
1414+
1415+ tokio:: time:: advance ( Duration :: from_secs ( 1 ) ) . await ;
1416+ tokio:: task:: yield_now ( ) . await ;
1417+
1418+ assert_eq ! ( event_rx. len( ) , 0 ) ;
1419+
1420+ wg. stop ( ) . await ;
1421+ }
1422+
12791423 /// See RFC LLT-0083 for more details on how and why polling speed up was added.
12801424 #[ tokio:: test( start_paused = true ) ]
12811425 async fn faster_polling ( ) {
@@ -2079,7 +2223,12 @@ pub mod tests {
20792223 interface : Some ( Interface :: default ( ) ) ,
20802224 } ;
20812225
2082- let mut tracker = UAPIErrorTracker :: new ( REQUIRED_CRITICAL_ERROR_GAP , MAX_UAPI_CMD_GAP ) ;
2226+ use super :: * ;
2227+
2228+ let mut tracker = UAPIErrorTracker :: new (
2229+ Duration :: from_secs ( ADAPTER_GONE_ERROR_THRESHOLD_S ) ,
2230+ Duration :: from_secs ( ADAPTER_GONE_MAX_UAPI_INERVAL_S ) ,
2231+ ) ;
20832232 let mut now = Instant :: now ( ) ;
20842233
20852234 // Initially, no error: should Ignore
@@ -2092,12 +2241,12 @@ pub mod tests {
20922241 assert_eq ! ( verdict, UAPIErrorTrackerVerdict :: OK ) ;
20932242
20942243 // After threshold duration, should Ignore (gap too big between uapi calls)
2095- now += MAX_UAPI_CMD_GAP ;
2244+ now += Duration :: from_secs ( ADAPTER_GONE_MAX_UAPI_INERVAL_S ) ;
20962245 let verdict = tracker. update ( & err_resp, now) ;
20972246 assert_eq ! ( verdict, UAPIErrorTrackerVerdict :: OK ) ;
20982247
20992248 // Trigger errors every second, until just before the threshold, should Ignore
2100- for _ in 1 ..REQUIRED_CRITICAL_ERROR_GAP . as_secs ( ) {
2249+ for _ in 1 ..ADAPTER_GONE_ERROR_THRESHOLD_S {
21012250 now += Duration :: from_secs ( 1 ) ;
21022251 let verdict = tracker. update ( & err_resp, now) ;
21032252 assert_eq ! ( verdict, UAPIErrorTrackerVerdict :: OK ) ;
@@ -2113,7 +2262,7 @@ pub mod tests {
21132262 assert_eq ! ( verdict, UAPIErrorTrackerVerdict :: OK ) ;
21142263
21152264 // Trigger errors every second, until just before the threshold, should Ignore
2116- for _ in 1 ..REQUIRED_CRITICAL_ERROR_GAP . as_secs ( ) {
2265+ for _ in 1 ..ADAPTER_GONE_ERROR_THRESHOLD_S {
21172266 now += Duration :: from_secs ( 1 ) ;
21182267 let verdict = tracker. update ( & err_resp, now) ;
21192268 assert_eq ! ( verdict, UAPIErrorTrackerVerdict :: OK ) ;
@@ -2131,7 +2280,7 @@ pub mod tests {
21312280
21322281 // Trigger errors repeatedly with big time gaps in between, should be Ignore
21332282 for _ in 0 ..50 {
2134- now += MAX_UAPI_CMD_GAP ;
2283+ now += Duration :: from_secs ( ADAPTER_GONE_MAX_UAPI_INERVAL_S ) ;
21352284 let verdict = tracker. update ( & err_resp, now) ;
21362285 assert_eq ! ( verdict, UAPIErrorTrackerVerdict :: OK ) ;
21372286 }
0 commit comments