Skip to content

Commit fa4b031

Browse files
committed
update
1 parent 2a5c018 commit fa4b031

File tree

3 files changed

+76
-34
lines changed

3 files changed

+76
-34
lines changed

inference/flexllm/peft_train.cc

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -364,12 +364,12 @@ void FlexFlow::top_level_task(Task const *task,
364364
num_layers_per_finetuning_step,
365365
temporal_sharing_frequency,
366366
run_warmup);
367-
assert(peft_finetuning_enabled(ffconfig.peft_support_mode) &&
368-
"Cannot train LORA adapter if finetuning is not enabled");
369-
assert(!file_paths.dataset_file_path.empty() &&
370-
"Cannot train LORA adapter if dataset path is empty");
371-
assert(!peft_model_name.empty() &&
372-
"PEFT model name should not be left empty");
367+
// assert(peft_finetuning_enabled(ffconfig.peft_support_mode) &&
368+
// "Cannot train LORA adapter if finetuning is not enabled");
369+
// assert(!file_paths.dataset_file_path.empty() &&
370+
// "Cannot train LORA adapter if dataset path is empty");
371+
// assert(!peft_model_name.empty() &&
372+
// "PEFT model name should not be left empty");
373373

374374
if (num_kv_cache_slots == -1) {
375375
num_kv_cache_slots = max_sequence_length * max_requests_per_batch;
@@ -520,17 +520,17 @@ void FlexFlow::top_level_task(Task const *task,
520520
rm->start_background_server(&model);
521521

522522
// Add PEFT adapter(s)
523-
PEFTModelID *peft_model_id_finetuning =
524-
model.register_peft_adapter(peft_config_finetuning);
525-
526-
if (run_warmup) {
527-
std::vector<Request> warmup_requests =
528-
make_warmup_requests(10, 1000, peft_model_id_finetuning);
529-
std::vector<GenerationResult> warmup_result =
530-
model.generate(warmup_requests);
531-
rm->set_inference_finished(false); // reset inference finished flag
532-
std::cout << "----------warmup finished--------------" << std::endl;
533-
}
523+
// PEFTModelID *peft_model_id_finetuning =
524+
// model.register_peft_adapter(peft_config_finetuning);
525+
526+
// if (run_warmup) {
527+
// std::vector<Request> warmup_requests =
528+
// make_warmup_requests(10, 1000, peft_model_id_finetuning);
529+
// std::vector<GenerationResult> warmup_result =
530+
// model.generate(warmup_requests);
531+
// rm->set_inference_finished(false); // reset inference finished flag
532+
// std::cout << "----------warmup finished--------------" << std::endl;
533+
// }
534534

535535
// Run workload
536536
{
@@ -544,23 +544,23 @@ void FlexFlow::top_level_task(Task const *task,
544544
}
545545

546546
// Add fine-tuning request
547-
assert(!file_paths.dataset_file_path.empty() &&
548-
"Dataset file path is required for fine-tuning.");
549-
printf("Finetuning request with dataset %s\n",
550-
file_paths.dataset_file_path.c_str());
551-
Request fine_tuning_req;
552-
fine_tuning_req.req_type = RequestType::REQ_FINETUNING;
553-
fine_tuning_req.peft_model_id = *peft_model_id_finetuning;
554-
fine_tuning_req.peft_finetuning_info.dataset_filepath =
555-
file_paths.dataset_file_path;
556-
fine_tuning_req.peft_finetuning_info.max_samples = max_finetuning_samples;
557-
fine_tuning_req.peft_finetuning_info.max_training_epochs =
558-
max_training_epochs;
559-
fine_tuning_req.peft_finetuning_info.gradient_accumulation_steps =
560-
gradient_accumulation_steps;
561-
fine_tuning_req.peft_finetuning_info.num_logging_steps = num_logging_steps;
547+
// assert(!file_paths.dataset_file_path.empty() &&
548+
// "Dataset file path is required for fine-tuning.");
549+
// printf("Finetuning request with dataset %s\n",
550+
// file_paths.dataset_file_path.c_str());
551+
// Request fine_tuning_req;
552+
// fine_tuning_req.req_type = RequestType::REQ_FINETUNING;
553+
// fine_tuning_req.peft_model_id = *peft_model_id_finetuning;
554+
// fine_tuning_req.peft_finetuning_info.dataset_filepath =
555+
// file_paths.dataset_file_path;
556+
// fine_tuning_req.peft_finetuning_info.max_samples = max_finetuning_samples;
557+
// fine_tuning_req.peft_finetuning_info.max_training_epochs =
558+
// max_training_epochs;
559+
// fine_tuning_req.peft_finetuning_info.gradient_accumulation_steps =
560+
// gradient_accumulation_steps;
561+
// fine_tuning_req.peft_finetuning_info.num_logging_steps = num_logging_steps;
562562
std::vector<Request> finetuning_requests;
563-
finetuning_requests.push_back(fine_tuning_req);
563+
// finetuning_requests.push_back(fine_tuning_req);
564564

565565
std::cout << "----------inference started--------------" << std::endl;
566566
std::vector<GenerationResult> result =
@@ -598,7 +598,7 @@ void FlexFlow::top_level_task(Task const *task,
598598
run_warmup ? 10 : 0); // num_warmup_requests
599599
}
600600

601-
free(peft_model_id_finetuning);
601+
// free(peft_model_id_finetuning);
602602

603603
std::cout << "----------inference finished--------------" << std::endl;
604604

src/ops/fused.cu

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,17 @@ __host__ void
184184
printf("\tmy_output_accessor[%i] = output_accessor[%i]\n", i, my_off);
185185
#endif
186186
}
187+
188+
189+
int shard_id = task->index_point.point_data[0];
190+
cudaStream_t stream;
191+
checkCUDA(get_legion_stream(&stream));
192+
cudaEvent_t t_start, t_end;
193+
cudaEventCreate(&t_start);
194+
cudaEventCreate(&t_end);
195+
cudaEventRecord(t_start, stream);
196+
197+
187198
switch (fused->op_op_type[op]) {
188199
case OP_CONCAT: {
189200
assert(fused->op_num_weights[op] == 0);
@@ -620,6 +631,17 @@ __host__ void
620631
assert(false && "Fusion currently does not support type");
621632
}
622633
}
634+
cudaEventRecord(t_end, stream);
635+
checkCUDA(cudaEventSynchronize(t_end));
636+
float elapsed = 0;
637+
checkCUDA(cudaEventElapsedTime(&elapsed, t_start, t_end));
638+
cudaEventDestroy(t_start);
639+
cudaEventDestroy(t_end);
640+
std::string op_name_without_uid = get_op_name_without_uid(metas->meta[op]);
641+
if (shard_id == 0) {
642+
std::cout << "OPTIME[" << op_name_without_uid << "]= "<< elapsed << " ms" << std::endl;
643+
}
644+
623645
if (metas->meta[op]->inference_debugging) {
624646
std::vector<GenericTensorAccessorR> input_accessors_to_save;
625647
std::vector<GenericTensorAccessorR> weight_accessors_to_save;

src/parallel_ops/combine.cc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,15 @@ void Combine::forward_task_with_type(Task const *task,
477477
std::vector<PhysicalRegion> const &regions,
478478
Context ctx,
479479
Runtime *runtime) {
480+
481+
int shard_id = task->index_point.point_data[0];
482+
cudaStream_t stream;
483+
checkCUDA(get_legion_stream(&stream));
484+
cudaEvent_t t_start, t_end;
485+
cudaEventCreate(&t_start);
486+
cudaEventCreate(&t_end);
487+
cudaEventRecord(t_start, stream);
488+
480489
Domain input_domain = runtime->get_index_space_domain(
481490
ctx, task->regions[0].region.get_index_space());
482491
Domain output_domain = runtime->get_index_space_domain(
@@ -489,6 +498,17 @@ void Combine::forward_task_with_type(Task const *task,
489498
regions[1], task->regions[1], FID_DATA, ctx, runtime);
490499

491500
forward_kernel<DT>(input_ptr, output_ptr, output_domain.get_volume());
501+
502+
cudaEventRecord(t_end, stream);
503+
checkCUDA(cudaEventSynchronize(t_end));
504+
float elapsed = 0;
505+
checkCUDA(cudaEventElapsedTime(&elapsed, t_start, t_end));
506+
cudaEventDestroy(t_start);
507+
cudaEventDestroy(t_end);
508+
if (shard_id == 0) {
509+
std::cout << "OPTIME[Combine]= "<< elapsed << " ms" << std::endl;
510+
}
511+
492512
}
493513

494514
bool Combine::peft_bwd_task(Task const *task,

0 commit comments

Comments
 (0)