Student0809's picture
Add files using upload-large-folder tool
7feac49 verified
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Type
import gradio as gr
from swift.ui.base import BaseUI
from swift.utils import get_logger
logger = get_logger()
class Eval(BaseUI):
group = 'llm_eval'
locale_dict = {
'eval_backend': {
'label': {
'zh': '评测后端',
'en': 'Eval backend'
},
'info': {
'zh': '选择评测后端',
'en': 'Select eval backend'
}
},
'eval_dataset': {
'label': {
'zh': '评测数据集',
'en': 'Evaluation dataset'
},
'info': {
'zh': '选择评测数据集,支持多选 (先选择评测后端)',
'en': 'Select eval dataset, multiple datasets supported (select eval backend first)'
}
},
'eval_limit': {
'label': {
'zh': '评测数据个数',
'en': 'Eval numbers for each dataset'
},
'info': {
'zh': '每个评测集的取样数',
'en': 'Number of rows sampled from each dataset'
}
},
'eval_output_dir': {
'label': {
'zh': '评测输出目录',
'en': 'Eval output dir'
},
'info': {
'zh': '评测结果的输出目录',
'en': 'The dir to save the eval results'
}
},
'custom_eval_config': {
'label': {
'zh': '自定义数据集评测配置',
'en': 'Custom eval config'
},
'info': {
'zh': '可以使用该配置评测自己的数据集,详见github文档的评测部分',
'en': 'Use this config to eval your own datasets, check the docs in github for details'
}
},
'eval_url': {
'label': {
'zh': '评测链接',
'en': 'The eval url'
},
'info': {
'zh':
'OpenAI样式的评测链接(如:http://localhost:8080/v1/chat/completions),用于评测接口(模型类型输入为实际模型类型)',
'en':
'The OpenAI style link(like: http://localhost:8080/v1/chat/completions) for '
'evaluation(Input actual model type into model_type)'
}
},
'api_key': {
'label': {
'zh': '接口token',
'en': 'The url token'
},
'info': {
'zh': 'eval_url的token',
'en': 'The token used with eval_url'
}
},
'infer_backend': {
'label': {
'zh': '推理框架',
'en': 'Infer backend'
},
}
}
@classmethod
def do_build_ui(cls, base_tab: Type['BaseUI']):
try:
from swift.llm.argument.eval_args import EvalArguments
eval_dataset_dict = EvalArguments.list_eval_dataset()
default_backend = EvalArguments.eval_backend
except Exception as e:
logger.warn(e)
eval_dataset_dict = {}
default_backend = None
with gr.Row():
gr.Dropdown(elem_id='eval_backend', choices=list(eval_dataset_dict.keys()), value=default_backend, scale=20)
gr.Dropdown(
elem_id='eval_dataset',
is_list=True,
choices=eval_dataset_dict.get(default_backend, []),
multiselect=True,
allow_custom_value=True,
scale=20)
gr.Textbox(elem_id='eval_limit', scale=20)
gr.Dropdown(elem_id='infer_backend', scale=20)
with gr.Row():
gr.Textbox(elem_id='custom_eval_config', scale=20)
gr.Textbox(elem_id='eval_output_dir', scale=20)
gr.Textbox(elem_id='eval_url', scale=20)
gr.Textbox(elem_id='api_key', scale=20)
def update_eval_dataset(backend):
return gr.update(choices=eval_dataset_dict[backend])
cls.element('eval_backend').change(update_eval_dataset, [cls.element('eval_backend')],
[cls.element('eval_dataset')])