# Copyright (c) Alibaba, Inc. and its affiliates. from typing import Type import gradio as gr from swift.ui.base import BaseUI from swift.utils import get_logger logger = get_logger() class Eval(BaseUI): group = 'llm_eval' locale_dict = { 'eval_backend': { 'label': { 'zh': '评测后端', 'en': 'Eval backend' }, 'info': { 'zh': '选择评测后端', 'en': 'Select eval backend' } }, 'eval_dataset': { 'label': { 'zh': '评测数据集', 'en': 'Evaluation dataset' }, 'info': { 'zh': '选择评测数据集,支持多选 (先选择评测后端)', 'en': 'Select eval dataset, multiple datasets supported (select eval backend first)' } }, 'eval_limit': { 'label': { 'zh': '评测数据个数', 'en': 'Eval numbers for each dataset' }, 'info': { 'zh': '每个评测集的取样数', 'en': 'Number of rows sampled from each dataset' } }, 'eval_output_dir': { 'label': { 'zh': '评测输出目录', 'en': 'Eval output dir' }, 'info': { 'zh': '评测结果的输出目录', 'en': 'The dir to save the eval results' } }, 'custom_eval_config': { 'label': { 'zh': '自定义数据集评测配置', 'en': 'Custom eval config' }, 'info': { 'zh': '可以使用该配置评测自己的数据集,详见github文档的评测部分', 'en': 'Use this config to eval your own datasets, check the docs in github for details' } }, 'eval_url': { 'label': { 'zh': '评测链接', 'en': 'The eval url' }, 'info': { 'zh': 'OpenAI样式的评测链接(如:http://localhost:8080/v1/chat/completions),用于评测接口(模型类型输入为实际模型类型)', 'en': 'The OpenAI style link(like: http://localhost:8080/v1/chat/completions) for ' 'evaluation(Input actual model type into model_type)' } }, 'api_key': { 'label': { 'zh': '接口token', 'en': 'The url token' }, 'info': { 'zh': 'eval_url的token', 'en': 'The token used with eval_url' } }, 'infer_backend': { 'label': { 'zh': '推理框架', 'en': 'Infer backend' }, } } @classmethod def do_build_ui(cls, base_tab: Type['BaseUI']): try: from swift.llm.argument.eval_args import EvalArguments eval_dataset_dict = EvalArguments.list_eval_dataset() default_backend = EvalArguments.eval_backend except Exception as e: logger.warn(e) eval_dataset_dict = {} default_backend = None with gr.Row(): gr.Dropdown(elem_id='eval_backend', choices=list(eval_dataset_dict.keys()), value=default_backend, scale=20) gr.Dropdown( elem_id='eval_dataset', is_list=True, choices=eval_dataset_dict.get(default_backend, []), multiselect=True, allow_custom_value=True, scale=20) gr.Textbox(elem_id='eval_limit', scale=20) gr.Dropdown(elem_id='infer_backend', scale=20) with gr.Row(): gr.Textbox(elem_id='custom_eval_config', scale=20) gr.Textbox(elem_id='eval_output_dir', scale=20) gr.Textbox(elem_id='eval_url', scale=20) gr.Textbox(elem_id='api_key', scale=20) def update_eval_dataset(backend): return gr.update(choices=eval_dataset_dict[backend]) cls.element('eval_backend').change(update_eval_dataset, [cls.element('eval_backend')], [cls.element('eval_dataset')])