Skip to content
This repository was archived by the owner on Dec 1, 2020. It is now read-only.

Commit 7ace5ca

Browse files
authored
Merge pull request #282 from project-march/fix/ethercat-master-watchdog
Fix; ethercat master watchdog
2 parents 55d6524 + fcab98e commit 7ace5ca

File tree

10 files changed

+184
-27
lines changed

10 files changed

+184
-27
lines changed

march_hardware/include/march_hardware/error/error_type.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ enum class ErrorType
3131
MISSING_REQUIRED_KEY = 119,
3232
INIT_URDF_FAILED = 120,
3333
INVALID_SW_STRING = 121,
34+
SLAVE_LOST_TIMOUT = 122,
3435
UNKNOWN = 999,
3536
};
3637

march_hardware/include/march_hardware/ethercat/ethercat_master.h

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#ifndef MARCH_HARDWARE_ETHERCAT_ETHERCATMASTER_H
33
#define MARCH_HARDWARE_ETHERCAT_ETHERCATMASTER_H
44
#include <atomic>
5+
#include <exception>
56
#include <vector>
67
#include <string>
78
#include <thread>
@@ -23,7 +24,7 @@ namespace march
2324
class EthercatMaster
2425
{
2526
public:
26-
EthercatMaster(std::string ifname, int max_slave_index, int cycle_time);
27+
EthercatMaster(std::string ifname, int max_slave_index, int cycle_time, int slave_timeout);
2728
~EthercatMaster();
2829

2930
/* Delete copy constructor/assignment since the member thread can not be copied */
@@ -37,6 +38,8 @@ class EthercatMaster
3738
bool isOperational() const;
3839
void waitForPdo();
3940

41+
std::exception_ptr getLastException() const noexcept;
42+
4043
/**
4144
* Returns the cycle time in milliseconds.
4245
*/
@@ -75,13 +78,27 @@ class EthercatMaster
7578

7679
/**
7780
* Sends the PDO and receives the working counter and check if this is lower than expected.
81+
*
82+
* @returns true if and only if all PDOs have been successfully sent and received, otherwise false.
7883
*/
79-
void sendReceivePdo();
84+
bool sendReceivePdo();
8085

8186
/**
8287
* Checks if all the slaves are connected and in operational state.
8388
*/
84-
static void monitorSlaveConnection();
89+
void monitorSlaveConnection();
90+
91+
/**
92+
* Attempts to recover a slave to operational state.
93+
*
94+
* @returns true when recovery was successfull, otherwise false.
95+
*/
96+
bool attemptSlaveRecover(int slave);
97+
98+
/**
99+
* Sets ethercat state to INIT and closes port.
100+
*/
101+
void closeEthercat();
85102

86103
/**
87104
* Sets the ethercat thread priority and scheduling
@@ -105,7 +122,12 @@ class EthercatMaster
105122
char io_map_[4096] = { 0 };
106123
int expected_working_counter_ = 0;
107124

125+
int latest_lost_slave_ = -1;
126+
const int slave_watchdog_timeout_;
127+
std::chrono::high_resolution_clock::time_point valid_slaves_timestamp_ms_;
128+
108129
std::thread ethercat_thread_;
130+
std::exception_ptr last_exception_;
109131
};
110132

111133
} // namespace march

march_hardware/include/march_hardware/march_robot.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,12 @@ class MarchRobot
2626
public:
2727
using iterator = std::vector<Joint>::iterator;
2828

29-
MarchRobot(::std::vector<Joint> jointList, urdf::Model urdf, ::std::string ifName, int ecatCycleTime);
29+
MarchRobot(::std::vector<Joint> jointList, urdf::Model urdf, ::std::string ifName, int ecatCycleTime,
30+
int ecatSlaveTimeout);
3031

3132
MarchRobot(::std::vector<Joint> jointList, urdf::Model urdf,
32-
std::unique_ptr<PowerDistributionBoard> powerDistributionBoard, ::std::string ifName, int ecatCycleTime);
33+
std::unique_ptr<PowerDistributionBoard> powerDistributionBoard, ::std::string ifName, int ecatCycleTime,
34+
int ecatSlaveTimeout);
3335

3436
~MarchRobot();
3537

@@ -53,6 +55,8 @@ class MarchRobot
5355

5456
bool isEthercatOperational();
5557

58+
std::exception_ptr getLastEthercatException() const noexcept;
59+
5660
void waitForPdo();
5761

5862
int getEthercatCycleTime() const;

march_hardware/src/error/error_type.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ const char* getErrorDescription(ErrorType type)
5151
return "Failed to load URDF from parameter server";
5252
case ErrorType::INVALID_SW_STRING:
5353
return "Slave has incorrect SW file";
54+
case ErrorType::SLAVE_LOST_TIMOUT:
55+
return "EtherCAT slave monitor timer elapsed, connection has been lost";
5456
default:
5557
return "Unknown error occurred. Please create/use a documented error";
5658
}

march_hardware/src/ethercat/ethercat_master.cpp

Lines changed: 128 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "march_hardware/error/hardware_exception.h"
44

55
#include <chrono>
6+
#include <exception>
67
#include <sstream>
78
#include <string>
89
#include <vector>
@@ -13,8 +14,12 @@
1314

1415
namespace march
1516
{
16-
EthercatMaster::EthercatMaster(std::string ifname, int max_slave_index, int cycle_time)
17-
: is_operational_(false), ifname_(std::move(ifname)), max_slave_index_(max_slave_index), cycle_time_ms_(cycle_time)
17+
EthercatMaster::EthercatMaster(std::string ifname, int max_slave_index, int cycle_time, int slave_timeout)
18+
: is_operational_(false)
19+
, ifname_(std::move(ifname))
20+
, max_slave_index_(max_slave_index)
21+
, cycle_time_ms_(cycle_time)
22+
, slave_watchdog_timeout_(slave_timeout)
1823
{
1924
}
2025

@@ -36,12 +41,18 @@ int EthercatMaster::getCycleTime() const
3641
void EthercatMaster::waitForPdo()
3742
{
3843
std::unique_lock<std::mutex> lock(this->wait_on_pdo_condition_mutex_);
39-
this->wait_on_pdo_condition_var_.wait(lock, [&] { return this->pdo_received_; });
44+
this->wait_on_pdo_condition_var_.wait(lock, [&] { return this->pdo_received_ || !this->is_operational_; });
4045
this->pdo_received_ = false;
4146
}
4247

48+
std::exception_ptr EthercatMaster::getLastException() const noexcept
49+
{
50+
return this->last_exception_;
51+
}
52+
4353
bool EthercatMaster::start(std::vector<Joint>& joints)
4454
{
55+
this->last_exception_ = nullptr;
4556
this->ethercatMasterInitiation();
4657
return this->ethercatSlaveInitiation(joints);
4758
}
@@ -152,15 +163,15 @@ void EthercatMaster::ethercatLoop()
152163
{
153164
const auto begin_time = std::chrono::high_resolution_clock::now();
154165

155-
this->sendReceivePdo();
156-
monitorSlaveConnection();
166+
const bool pdo_received = this->sendReceivePdo();
167+
this->monitorSlaveConnection();
157168

158169
const auto end_time = std::chrono::high_resolution_clock::now();
159170
const auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - begin_time);
160171

161172
{
162173
std::lock_guard<std::mutex> lock(this->wait_on_pdo_condition_mutex_);
163-
this->pdo_received_ = true;
174+
this->pdo_received_ = pdo_received;
164175
}
165176
this->wait_on_pdo_condition_var_.notify_one();
166177

@@ -185,30 +196,132 @@ void EthercatMaster::ethercatLoop()
185196
total_loops = 0;
186197
not_achieved_count = 0;
187198
}
199+
200+
const auto delta_t = std::chrono::high_resolution_clock::now() - this->valid_slaves_timestamp_ms_;
201+
const auto slave_lost_duration = std::chrono::duration_cast<std::chrono::milliseconds>(delta_t);
202+
const std::chrono::milliseconds slave_watchdog_timeout(this->slave_watchdog_timeout_);
203+
204+
if (slave_lost_duration > slave_watchdog_timeout)
205+
{
206+
this->last_exception_ = std::make_exception_ptr(error::HardwareException(
207+
error::ErrorType::SLAVE_LOST_TIMOUT, "Slave connection lost for %i ms from slave %i and onwards.",
208+
this->slave_watchdog_timeout_, this->latest_lost_slave_));
209+
this->is_operational_ = false;
210+
this->wait_on_pdo_condition_var_.notify_one();
211+
212+
this->closeEthercat();
213+
}
188214
}
189215
}
190216

191-
void EthercatMaster::sendReceivePdo()
217+
bool EthercatMaster::sendReceivePdo()
192218
{
193-
ec_send_processdata();
194-
int wkc = ec_receive_processdata(EC_TIMEOUTRET);
195-
if (wkc < this->expected_working_counter_)
219+
if (this->latest_lost_slave_ == -1)
196220
{
197-
ROS_WARN_THROTTLE(1, "Working counter: %d is lower than expected: %d", wkc, this->expected_working_counter_);
221+
ec_send_processdata();
222+
const int wkc = ec_receive_processdata(EC_TIMEOUTRET);
223+
if (wkc < this->expected_working_counter_)
224+
{
225+
ROS_WARN_THROTTLE(1, "Working counter: %d is lower than expected: %d", wkc, this->expected_working_counter_);
226+
return false;
227+
}
228+
return true;
198229
}
230+
return false;
199231
}
200232

201233
void EthercatMaster::monitorSlaveConnection()
202234
{
235+
ec_readstate();
203236
for (int slave = 1; slave <= ec_slavecount; slave++)
204237
{
205-
ec_statecheck(slave, EC_STATE_OPERATIONAL, EC_TIMEOUTRET);
206-
if (!ec_slave[slave].state)
238+
if (ec_slave[slave].state != EC_STATE_OPERATIONAL)
207239
{
208240
ROS_WARN_THROTTLE(1, "EtherCAT train lost connection from slave %d onwards", slave);
209-
return;
241+
242+
if (!this->attemptSlaveRecover(slave))
243+
{
244+
this->latest_lost_slave_ = slave;
245+
return;
246+
}
247+
}
248+
}
249+
250+
if (this->latest_lost_slave_ > -1)
251+
{
252+
ROS_INFO("All slaves returned to operational state.");
253+
}
254+
255+
this->latest_lost_slave_ = -1;
256+
this->valid_slaves_timestamp_ms_ = std::chrono::high_resolution_clock::now();
257+
}
258+
259+
bool EthercatMaster::attemptSlaveRecover(int slave)
260+
{
261+
if (ec_slave[slave].state != EC_STATE_OPERATIONAL)
262+
{
263+
if (ec_slave[slave].state == EC_STATE_PRE_OP)
264+
{
265+
ec_slave[slave].state = EC_STATE_SAFE_OP;
266+
ec_writestate(slave);
267+
}
268+
269+
if (ec_slave[slave].state == EC_STATE_SAFE_OP)
270+
{
271+
ec_slave[slave].state = EC_STATE_OPERATIONAL;
272+
ec_writestate(slave);
273+
}
274+
275+
if (ec_slave[slave].state > EC_STATE_NONE)
276+
{
277+
if (ec_reconfig_slave(slave, 500))
278+
{
279+
ec_slave[slave].islost = FALSE;
280+
}
281+
}
282+
283+
if (!ec_slave[slave].islost)
284+
{
285+
ec_statecheck(slave, EC_STATE_OPERATIONAL, EC_TIMEOUTRET);
286+
if (ec_slave[slave].state == EC_STATE_NONE)
287+
{
288+
ec_slave[slave].islost = TRUE;
289+
ROS_ERROR("Ethercat lost connection to slave %d", slave);
290+
}
291+
}
292+
}
293+
294+
if (ec_slave[slave].islost)
295+
{
296+
if (ec_slave[slave].state == EC_STATE_NONE)
297+
{
298+
if (ec_recover_slave(slave, 500))
299+
{
300+
ec_slave[slave].islost = FALSE;
301+
}
302+
}
303+
else
304+
{
305+
ec_slave[slave].islost = FALSE;
210306
}
211307
}
308+
309+
if (ec_slave[slave].state == EC_STATE_OPERATIONAL)
310+
{
311+
ROS_INFO("Slave %i resumed operational state", slave);
312+
return true;
313+
}
314+
else
315+
{
316+
return false;
317+
}
318+
}
319+
320+
void EthercatMaster::closeEthercat()
321+
{
322+
ec_slave[0].state = EC_STATE_INIT;
323+
ec_writestate(0);
324+
ec_close();
212325
}
213326

214327
void EthercatMaster::stop()
@@ -219,9 +332,7 @@ void EthercatMaster::stop()
219332
this->is_operational_ = false;
220333
this->ethercat_thread_.join();
221334

222-
ec_slave[0].state = EC_STATE_INIT;
223-
ec_writestate(0);
224-
ec_close();
335+
this->closeEthercat();
225336
}
226337
}
227338

march_hardware/src/march_robot.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,21 @@
1414

1515
namespace march
1616
{
17-
MarchRobot::MarchRobot(::std::vector<Joint> jointList, urdf::Model urdf, ::std::string ifName, int ecatCycleTime)
17+
MarchRobot::MarchRobot(::std::vector<Joint> jointList, urdf::Model urdf, ::std::string ifName, int ecatCycleTime,
18+
int ecatSlaveTimeout)
1819
: jointList(std::move(jointList))
1920
, urdf_(std::move(urdf))
20-
, ethercatMaster(ifName, this->getMaxSlaveIndex(), ecatCycleTime)
21+
, ethercatMaster(ifName, this->getMaxSlaveIndex(), ecatCycleTime, ecatSlaveTimeout)
2122
, pdb_(nullptr)
2223
{
2324
}
2425

2526
MarchRobot::MarchRobot(::std::vector<Joint> jointList, urdf::Model urdf,
2627
std::unique_ptr<PowerDistributionBoard> powerDistributionBoard, ::std::string ifName,
27-
int ecatCycleTime)
28+
int ecatCycleTime, int ecatSlaveTimeout)
2829
: jointList(std::move(jointList))
2930
, urdf_(std::move(urdf))
30-
, ethercatMaster(ifName, this->getMaxSlaveIndex(), ecatCycleTime)
31+
, ethercatMaster(ifName, this->getMaxSlaveIndex(), ecatCycleTime, ecatSlaveTimeout)
3132
, pdb_(std::move(powerDistributionBoard))
3233
{
3334
}
@@ -156,6 +157,11 @@ bool MarchRobot::isEthercatOperational()
156157
return ethercatMaster.isOperational();
157158
}
158159

160+
std::exception_ptr MarchRobot::getLastEthercatException() const noexcept
161+
{
162+
return this->ethercatMaster.getLastException();
163+
}
164+
159165
void MarchRobot::waitForPdo()
160166
{
161167
this->ethercatMaster.waitForPdo();

march_hardware_builder/robots/march4.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
march4:
33
ifName: enp2s0
44
ecatCycleTime: 4
5+
ecatSlaveTimeout: 50
56
joints:
67
- left_ankle:
78
actuationMode: torque

march_hardware_builder/robots/test_joint_linear.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
testjoint_linear:
22
ifName: enp2s0
33
ecatCycleTime: 4
4+
ecatSlaveTimeout: 50
45
joints:
56
- linear_joint:
67
actuationMode: torque

march_hardware_builder/src/hardware_builder.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,15 @@ std::unique_ptr<march::MarchRobot> HardwareBuilder::createMarchRobot()
6565
YAML::Node config = this->robot_config_[robot_name];
6666
const auto if_name = config["ifName"].as<std::string>();
6767
const auto cycle_time = config["ecatCycleTime"].as<int>();
68+
const auto slave_timeout = config["ecatSlaveTimeout"].as<int>();
6869

6970
std::vector<march::Joint> joints = this->createJoints(config["joints"], pdo_interface, sdo_interface);
7071

7172
ROS_INFO_STREAM("Robot config:\n" << config);
7273
YAML::Node pdb_config = config["powerDistributionBoard"];
7374
auto pdb = HardwareBuilder::createPowerDistributionBoard(pdb_config, pdo_interface, sdo_interface);
74-
return std::make_unique<march::MarchRobot>(std::move(joints), this->urdf_, std::move(pdb), if_name, cycle_time);
75+
return std::make_unique<march::MarchRobot>(std::move(joints), this->urdf_, std::move(pdb), if_name, cycle_time,
76+
slave_timeout);
7577
}
7678

7779
march::Joint HardwareBuilder::createJoint(const YAML::Node& joint_config, const std::string& joint_name,

0 commit comments

Comments
 (0)