import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch model_id = "openchat/openchat-3.5-1210" # Load model tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.float16 ) # System prompt def build_prompt(history, user_input): system_prompt = "<|system|>\nYou are a helpful AI assistant.\n" messages = system_prompt for user, bot in history: messages += f"<|user|>\n{user}\n<|assistant|>\n{bot}\n" messages += f"<|user|>\n{user_input}\n<|assistant|>\n" return messages def chat(user_input, history=[]): prompt = build_prompt(history, user_input) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) output = model.generate( **inputs, max_new_tokens=300, temperature=0.7, do_sample=True, top_p=0.9, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(output[0], skip_special_tokens=True) answer = response.split("<|assistant|>")[-1].strip() history.append((user_input, answer)) return answer, history gr.ChatInterface(chat, title="OpenChat AI Assistant").launch()