Skip to content

Commit 853eb12

Browse files
committed
Improve UAPI error handler
Replace the fixed count-based error handler with a duration-based mechanism which is configurable with FeatureInterfaceHealth. Signed-off-by: Lukas Pukenis <lukas.pukenis@nordsec.com>
1 parent 8d50da7 commit 853eb12

File tree

7 files changed

+379
-27
lines changed

7 files changed

+379
-27
lines changed

.unreleased/LLT-6859

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix critical error handler and make it configurable

crates/telio-model/src/event.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ macro_rules! report_event {
2222
}
2323

2424
/// Error levels. Used for app to decide what to do with `telio` device when error happens.
25-
#[derive(Clone, Debug, Default, Serialize)]
25+
#[derive(Clone, Debug, Default, Serialize, Eq, PartialEq)]
2626
#[serde(rename_all = "lowercase")]
2727
pub enum ErrorLevel {
2828
/// The error level is critical (highest priority)
@@ -37,7 +37,7 @@ pub enum ErrorLevel {
3737
}
3838

3939
/// Error code. Common error code representation (for statistics).
40-
#[derive(Clone, Debug, Default, Serialize)]
40+
#[derive(Clone, Debug, Default, Serialize, Eq, PartialEq)]
4141
#[serde(rename_all = "lowercase")]
4242
pub enum ErrorCode {
4343
/// There is no error in the execution

crates/telio-model/src/features.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ pub struct FeatureWireguard {
102102
/// Configurable wireguard polling period
103103
#[serde(default)]
104104
pub polling: FeaturePolling,
105+
/// Configurable wireguard interface health monitor
106+
#[serde(default)]
107+
pub interface_health: FeatureInterfaceHealth,
105108
/// Configurable up/down behavior of WireGuard-NT adapter. See RFC LLT-0089 for details
106109
#[serde(default)]
107110
pub enable_dynamic_wg_nt_control: bool,
@@ -148,6 +151,23 @@ pub struct FeaturePersistentKeepalive {
148151
pub stun: Option<u32>,
149152
}
150153

154+
/// Configurable interface health monitor
155+
#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize, SmartDefault)]
156+
#[serde(default)]
157+
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
158+
pub struct FeatureInterfaceHealth {
159+
/// Minimum time separation between UAPI failures required to classify
160+
/// a failure as non-transient. (in seconds) [default 10s]
161+
#[default(10)]
162+
pub transient_failure_separation_threshold: u32,
163+
164+
/// Maximum poll gap before treating libtelio as suspended for transient failure filtering
165+
/// for WireguardNT. Recommended to be slightly higher than twice the FeaturePolling.wireguard_polling_period
166+
/// (in seconds) [default 5s]
167+
#[default(5)]
168+
pub uapi_poll_suspension_threshold: u32,
169+
}
170+
151171
/// Configurable Wireguard polling period
152172
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, SmartDefault)]
153173
#[serde(default)]
@@ -698,6 +718,10 @@ mod tests {
698718
"wireguard_polling_period": 1000,
699719
"wireguard_polling_period_after_state_change": 50
700720
},
721+
"interface_health": {
722+
"transient_failure_separation_threshold": 44,
723+
"uapi_poll_suspension_threshold": 55
724+
},
701725
"enable_dynamic_wg_nt_control": true,
702726
"skt_buffer_size": 123456,
703727
"inter_thread_channel_size": 123456,
@@ -801,6 +825,10 @@ mod tests {
801825
wireguard_polling_period: 1000,
802826
wireguard_polling_period_after_state_change: 50
803827
},
828+
interface_health: FeatureInterfaceHealth {
829+
transient_failure_separation_threshold: 44,
830+
uapi_poll_suspension_threshold: 55
831+
},
804832
enable_dynamic_wg_nt_control: true,
805833
skt_buffer_size: Some(123456),
806834
inter_thread_channel_size: Some(123456),

0 commit comments

Comments
 (0)