Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from kuidastaltsutadalaamat.trainllm import load_model, load_tokenizer | |
| from kuidastaltsutadalaamat.inference import llm_generate | |
| from kuidastaltsutadalaamat.data import LazyTokenizingInferenceDataset | |
| from kuidastaltsutadalaamat.promptops import * | |
| accel = None | |
| model_id = "tartuNLP/smugri4-mt" | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model = load_model(model_id, device, accelerator=accel, attention="eager") #eager for cpu | |
| model.eval() | |
| tokenizer = load_tokenizer(model_id, accelerator=accel) | |
| lang_raw_to_label = {"English": "English", | |
| "Erzya": "Erzya", | |
| "Estonian": "Estonian", | |
| "Estonian, Alutaguse, Lüg, dictionary": "Alutaguse", | |
| "Estonian, Hiiu, Rei, dictionary": "Hiiu", | |
| "Estonian, Ida, Kod, dictionary": "Ida", | |
| "Estonian, Kesk, Kjn, dictionary": "Keskmurre", | |
| "Estonian, Kihnu, dictionary": "Kihnu", | |
| "Estonian, Lääne, Mar, dictionary": "Lääne", | |
| "Estonian, Muhu, dictionary": "Muhu", | |
| "Estonian, Ranna, Kuu, dictionary": "Rannakeel", | |
| "Estonian, Saare, Khk, dictionary": "Saare", | |
| "Southern Estonian, Mulgi, Krk, dictionary": "Mulgi", | |
| "Southern Estonian, Seto, dictionary": "Seto", | |
| "Southern Estonian, Tartu, Nõo, dictionary": "Tartu", | |
| "Southern Estonian, Võro, Lei, dictionary": "Leivu", | |
| "Southern Estonian, Võro, Lut, dictionary": "Lutsi", | |
| "Southern Estonian, Võro, Sõnaq": "Võro (Sõnaq orth)", | |
| "Southern Estonian, Võro, Uma": "Võro (Umaleht orth)", | |
| "Finnish": "Finnish", | |
| "Kven": "Kven", | |
| "Meänkieli": "Meänkieli", | |
| "Hill Mari": "Hill Mari", | |
| "Meadow Mari": "Meadow Mari", | |
| "Hungarian": "Hungarian", | |
| "Inari Sami": "Inari Sami", | |
| "Pite Sami": "Pite Sami", | |
| "Kildin Sami, Antonova": "Kildin Sami (Antonova orth)", | |
| "Kildin Sami, Kuruch": "Kildin Sami (Kuruch orth)", | |
| "Lule Sami": "Lule Sami", | |
| "Northern Sami": "Northern Sami", | |
| "Skolt Sami": "Skolt Sami", | |
| "Southern Sami": "Southern Sami", | |
| "Ume Sami": "Ume Sami", | |
| "Izhorian, Mehmet": "Ingrian (Ala-Laukaa / simplified)", | |
| "Izhorian, Alamaluuga, speech": "Ingrian (Ala-Laukaa)", | |
| "Izhorian, Soikkola": "Ingrian (Soikkola)", | |
| "Votic, Standard": "Votic", | |
| "Komi-Permyak": "Komi-Permyak", | |
| "Komi-Zyrian": "Komi-Zyrian", | |
| "Latvian": "Latvian", | |
| "Livonian, Standard": "Livonian", | |
| "Livvi, Newwritten": "Livvi", | |
| "Ludian, Miikul": "Ludian (ü)", | |
| "Ludian, Newwritten": "Ludian (y)", | |
| "Mansi, Unk": "Mansi (Northern)", | |
| "Moksha": "Moksha", | |
| "Norwegian": "Norwegian", | |
| "Kazym Khanty, 2013": "Kazym Khanty", | |
| "Priur Khanty": "Priur Khanty", | |
| "Shur Khanty, 2013": "Shur Khanty", | |
| "Sred Khanty": "Sred Khanty", | |
| "Surgut Khanty, 2013": "Surgut Khanty", | |
| "Vakh Khanty, 2013": "Vakh Khanty", | |
| "Proper Karelian, Newwritten": "Proper Karelian", | |
| "Russian": "Russian", | |
| "Swedish": "Swedish", | |
| "Udmurt": "Udmurt", | |
| "Veps, Newwritten": "Veps" | |
| } | |
| label_to_raw = { e[1]: e[0] for e in lang_raw_to_label.items() } | |
| languages_labels = sorted(list(label_to_raw.keys())) | |
| def run_inference(text, from_lang, to_lang, mode): | |
| entry = {"src_segm": text, "task": mode} | |
| if mode == "translate": | |
| entry.update({"src_lang": label_to_raw[from_lang], "tgt_lang": label_to_raw[to_lang]}) | |
| prompt_format = PF_SMUGRI_MT | |
| else: | |
| prompt_format = PF_SMUGRI_LID | |
| ds = LazyTokenizingInferenceDataset([entry], tokenizer, prompt_format) | |
| tok = ds[0] | |
| output = llm_generate(model, tokenizer, tok, debug=False, max_len=512) | |
| return output[0] | |
| with gr.Blocks() as demo: | |
| text_input = gr.Textbox(label="Text", lines=6, placeholder="Enter text...") | |
| #identify_btn = gr.Button("Identify language", interactive=False) | |
| with gr.Row(): | |
| from_dropdown = gr.Dropdown(choices=languages_labels, label="From", value=None) | |
| to_dropdown = gr.Dropdown(choices=languages_labels, label="To", value=None) | |
| translate_btn = gr.Button("Translate", interactive=False) | |
| output = gr.Textbox(label="Output", lines=6) | |
| #def toggle_identify(text): | |
| # return gr.update(interactive=bool(text.strip())) | |
| #text_input.change(toggle_identify, [text_input], [identify_btn]) | |
| def toggle_translate(text, f, t): | |
| return gr.update(interactive=bool(text.strip() and f and t)) | |
| text_input.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn]) | |
| from_dropdown.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn]) | |
| to_dropdown.change(toggle_translate, [text_input, from_dropdown, to_dropdown], [translate_btn]) | |
| #identify_btn.click( | |
| # fn=lambda text: run_inference(text, None, None, mode="identify"), | |
| # inputs=[text_input], | |
| # outputs=[from_dropdown], | |
| #).then( | |
| # lambda *args: gr.update(interactive=bool(text_input.value.strip() and from_dropdown.value and to_dropdown.value)), | |
| # [], [translate_btn] | |
| #) | |
| translate_btn.click( | |
| fn=lambda text, f, t: run_inference(text, f, t, mode="translate"), | |
| inputs=[text_input, from_dropdown, to_dropdown], | |
| outputs=[output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |