@@ -19,6 +19,7 @@ limitations under the License.
1919#include < absl/time/time.h>
2020#include < folly/futures/Future.h>
2121
22+ #include < chrono>
2223#include < cstdint>
2324#include < vector>
2425
@@ -81,6 +82,44 @@ struct SequenceParams {
8182 StoppingChecker* stopping_checker; // not owned
8283};
8384
85+ static uint32_t timeout_ms = 0 ;
86+ class TimeoutChecker {
87+ private:
88+ std::chrono::steady_clock::time_point timeout_start_;
89+ bool is_timeout_set_ = false ;
90+
91+ public:
92+ TimeoutChecker () { init (); }
93+
94+ bool check_timeout () {
95+ if (!is_timeout_set_) {
96+ timeout_start_ = std::chrono::steady_clock::now ();
97+ is_timeout_set_ = true ;
98+
99+ return false ;
100+ } else {
101+ auto now = std::chrono::steady_clock::now ();
102+ auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
103+ now - timeout_start_);
104+
105+ return elapsed.count () >= timeout_ms;
106+ }
107+ }
108+
109+ void reset () { is_timeout_set_ = false ; }
110+
111+ private:
112+ static void init_timeout () {
113+ const char * env_str = std::getenv (" PREFETCH_TIMEOUT_MS" );
114+ timeout_ms = env_str ? std::strtoul (env_str, nullptr , 10 ) : 0 ;
115+ LOG (INFO) << " Prefetch timeout set as: " << timeout_ms;
116+ }
117+ static void init () {
118+ static std::once_flag flag_;
119+ std::call_once (flag_, init_timeout);
120+ }
121+ };
122+
84123class Sequence final {
85124 public:
86125 Sequence (size_t index,
@@ -242,12 +281,12 @@ class Sequence final {
242281 const Tokenizer& tokenizer,
243282 std::optional<std::vector<LogProb>>& out_logprobs);
244283
245- const std::atomic<bool >& get_termination_flag () { return termination_flag_; }
284+ std::atomic<bool >* get_termination_flag () { return & termination_flag_; }
246285 std::vector<std::shared_ptr<std::atomic<uint32_t >>>* get_prefetch_results () {
247286 return &prefetch_results_;
248287 }
249288
250- void update_prefetch_result ();
289+ bool update_prefetch_result ();
251290
252291 void reset ();
253292
@@ -361,6 +400,8 @@ class Sequence final {
361400 // kvcache store copy async result
362401 std::atomic<bool > termination_flag_{false };
363402 std::vector<std::shared_ptr<std::atomic<uint32_t >>> prefetch_results_;
403+
404+ TimeoutChecker timeout_checker_;
364405};
365406
366407} // namespace xllm
0 commit comments