@@ -557,6 +557,44 @@ def test_multiprocessing_with_parallelism(self):
557557 multiprocessing_with_parallelism (tokenizer , False )
558558 multiprocessing_with_parallelism (tokenizer , True )
559559
560+ def test_multithreaded_concurrency (self ):
561+
562+ # Thread worker functions
563+ def encode_batch (batch ):
564+ tokenizer = Tokenizer (BPE ())
565+ return tokenizer .encode_batch (batch )
566+
567+ def encode_batch_fast (batch ):
568+ tokenizer = Tokenizer (BPE ())
569+ return tokenizer .encode_batch_fast (batch )
570+
571+ # Create some significant workload
572+ batches = [
573+ ["my name is john " * 50 ] * 20 ,
574+ ["my name is paul " * 50 ] * 20 ,
575+ ["my name is ringo " * 50 ] * 20 ,
576+ ]
577+
578+ # Many encoding operations to run concurrently
579+ tasks = [
580+ (encode_batch , batches [0 ]),
581+ (encode_batch_fast , batches [1 ]),
582+ (encode_batch , batches [2 ]),
583+ ] * 10
584+
585+ executor = concurrent .futures .ThreadPoolExecutor (max_workers = 4 )
586+
587+ futures = []
588+ for task in tasks :
589+ futures .append (executor .submit (* task ))
590+
591+ # All tasks should complete successfully
592+ results = [f .result () for f in futures ]
593+
594+ # Verify results
595+ assert len (results ) == 30
596+ assert all (len (result ) == 20 for result in results )
597+
560598 def test_from_pretrained (self ):
561599 tokenizer = Tokenizer .from_pretrained ("bert-base-cased" )
562600 output = tokenizer .encode ("Hey there dear friend!" , add_special_tokens = False )
0 commit comments