-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Open
Description
Describe the bug
FATE1.11.3,多方训练
guest_save_path='/data/projects/fate/fate/upload/315/mnist_train_changeA' host_save_path=['/data/projects/fate/fate/upload/315/mnist_train_changeB', '/data/projects/fate/fate/upload/315/mnist_train_changeC'] guest=9999 host=[10000, 9000] arbiter=9999
其中10000, 9000在FATEBoard的数据集均显示为experiment.mnist_train_changeC
日志如下
# Reader
[INFO] [2025-11-28 10:20:47,814] [202511281020383644780] [445336:140139240544064] - [task_executor._run_] [line:158]: run reader_0 202511281020383644780_reader_0 0 on host 10000 task
[INFO] [2025-11-28 10:20:47,815] [202511281020383644780] [445336:140139240544064] - [task_executor._run_] [line:159]: component parameters on party:
{
"dsl_version": 2,
"initiator": {
"role": "guest",
"party_id": 9999
},
"role": {
"guest": [
9999
],
"host": [
10000,
9000
]
},
"job_parameters": {
"job_type": "train",
"inheritance_info": {},
"computing_engine": "STANDALONE",
"federation_engine": "STANDALONE",
"storage_engine": "STANDALONE",
"engines_address": {
"computing": {
"cores_per_node": 4,
"nodes": 4
},
"federation": {
"cores_per_node": 4,
"nodes": 4
},
"storage": {
"cores_per_node": 4,
"nodes": 4
}
},
"federated_mode": "SINGLE",
"task_parallelism": 1,
"computing_partitions": 4,
"federated_status_collect_type": "PUSH",
"model_id": "arbiter-9999#guest-9999#host-9000_10000#model",
"model_version": "202511281020383644780",
"auto_retries": 0,
"auto_retry_delay": 1,
"eggroll_run": {
"eggroll.session.processors.per.node": 1
},
"spark_run": {},
"rabbitmq_run": {},
"pulsar_run": {},
"adaptation_parameters": {
"task_nodes": 4,
"task_cores_per_node": 1,
"task_memory_per_node": 0,
"request_task_cores": 1,
"if_initiator_baseline": false
},
"task_conf": {},
"roles": {
"guest": [
9999
],
"host": [
10000,
9000
],
"arbiter": [
9999
]
},
"role_parameters": {}
},
"local": {
"role": "host",
"party_id": 10000
},
"module": "Reader",
"CodePath": "Reader",
"ComponentParam": {
"table": {
"name": "mnist_train_changeB",
"namespace": "experiment"
},
"_name": "Reader#reader_0"
},
"ComponentParameterSource": "reader_0"
}
[INFO] [2025-11-28 10:20:47,996] [202511281020383644780] [445336:140139240544064] - [reader.save_table] [line:207]: source table name: mnist_train_changeB namespace: experiment engine: PATH
[INFO] [2025-11-28 10:20:47,996] [202511281020383644780] [445336:140139240544064] - [reader.save_table] [line:210]: destination table name: e8278626cc4311f087dc0242ac110005 namespace: output_data_202511281020383644780_reader_0_0 engine: PATH
# nn_0
[DEBUG] [2025-11-28 10:23:15,187] [202511281020383644780] [445525:140606553147200] - [data.load_dataset] [line:30]: use cached dataset, cached id /data/projects/fate/fate/upload/315/mnist_train_changeB
# eval_0未出现
请问如何解决,或者如何排查。目前训练是成功的。
Metadata
Metadata
Assignees
Labels
No labels