diff --git a/dataset_toolkits/download.py b/dataset_toolkits/download.py index 36e684ff..8040bd40 100644 --- a/dataset_toolkits/download.py +++ b/dataset_toolkits/download.py @@ -27,7 +27,10 @@ # get file list if not os.path.exists(os.path.join(opt.output_dir, 'metadata.csv')): raise ValueError('metadata.csv not found') - metadata = pd.read_csv(os.path.join(opt.output_dir, 'metadata.csv')) + + with open(os.path.join(opt.output_dir, 'metadata.csv'), 'r', encoding='utf-8', errors='ignore') as f: + metadata = pd.read_csv(f) + if opt.instances is None: if opt.filter_low_aesthetic_score is not None: metadata = metadata[metadata['aesthetic_score'] >= opt.filter_low_aesthetic_score] @@ -49,4 +52,5 @@ # process objects downloaded = dataset_utils.download(metadata, **opt) - downloaded.to_csv(os.path.join(opt.output_dir, f'downloaded_{opt.rank}.csv'), index=False) + with open(os.path.join(opt.output_dir, f'downloaded_{opt.rank}.csv'), 'w', encoding='utf-8', errors='ignore') as f: + downloaded.to_csv(f, index=False) \ No newline at end of file