import json # 读取原始的allcorrect.json文件 with open('ms-swift/matched_scores_2_1.json', 'r', encoding='utf-8') as f: allcorrect_data = json.load(f) # 读取merged_shuffled_train.json文件 with open('/root/autodl-tmp/600_train/merged_shuffled_train.json', 'r', encoding='utf-8') as f: merged_data = json.load(f) # 遍历allcorrect.json中的每个条目 for entry in allcorrect_data: # 获取key key = entry.get('key') if key: # 在merged_data中查找对应的条目 if key in merged_data: # 获取error_type并添加到entry中 error_type = merged_data[key].get('error_type') entry['error_type'] = error_type # 将更新后的数据写回文件 output_file = 'ms-swift/allcorrect_with_error_type.json' with open(output_file, 'w', encoding='utf-8') as f: json.dump(allcorrect_data, f, ensure_ascii=False, indent=2) print(f"处理完成,结果已保存到 {output_file}") # 统计error_type的分布 error_type_stats = {} for entry in allcorrect_data: error_type = entry.get('error_type') if error_type: error_type_stats[error_type] = error_type_stats.get(error_type, 0) + 1 else: error_type_stats['no_error_type'] = error_type_stats.get('no_error_type', 0) + 1 print("\nError Type 统计:") for error_type, count in error_type_stats.items(): print(f"{error_type}: {count}")