Student0809
/

interactSpeech

Model card Files Files and versions

interactSpeech / add_errorType.py

Student0809's picture

Add files using upload-large-folder tool

fd421e2 verified 5 months ago

history blame contribute delete

1.4 kB

	import json

	# 读取原始的allcorrect.json文件
	with open('ms-swift/matched_scores_2_1.json', 'r', encoding='utf-8') as f:
	allcorrect_data = json.load(f)

	# 读取merged_shuffled_train.json文件
	with open('/root/autodl-tmp/600_train/merged_shuffled_train.json', 'r', encoding='utf-8') as f:
	merged_data = json.load(f)

	# 遍历allcorrect.json中的每个条目
	for entry in allcorrect_data:
	# 获取key
	key = entry.get('key')
	if key:
	# 在merged_data中查找对应的条目
	if key in merged_data:
	# 获取error_type并添加到entry中
	error_type = merged_data[key].get('error_type')
	entry['error_type'] = error_type

	# 将更新后的数据写回文件
	output_file = 'ms-swift/allcorrect_with_error_type.json'
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(allcorrect_data, f, ensure_ascii=False, indent=2)

	print(f"处理完成，结果已保存到 {output_file}")

	# 统计error_type的分布
	error_type_stats = {}
	for entry in allcorrect_data:
	error_type = entry.get('error_type')
	if error_type:
	error_type_stats[error_type] = error_type_stats.get(error_type, 0) + 1
	else:
	error_type_stats['no_error_type'] = error_type_stats.get('no_error_type', 0) + 1

	print("\nError Type 统计:")
	for error_type, count in error_type_stats.items():
	print(f"{error_type}: {count}")