kimhyunwoo commited on
Commit
b9185e7
Β·
verified Β·
1 Parent(s): 0c5d476

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -279
app.py CHANGED
@@ -1,4 +1,7 @@
1
  import os
 
 
 
2
  import threading
3
  import torch
4
  import torch._dynamo
@@ -12,55 +15,17 @@ from transformers import (
12
  import gradio as gr
13
  import spaces
14
 
15
- # ν•„μš”ν•œ 경우 Bitnet 지원을 μœ„ν•œ transformers μ„€μΉ˜
16
- # Hugging Face Spacesμ—μ„œλŠ” Dockerfile 등을 톡해 미리 μ„€μΉ˜ν•˜λŠ” 것이 더 μΌλ°˜μ μž…λ‹ˆλ‹€.
17
- # λ‘œμ»¬μ—μ„œ ν…ŒμŠ€νŠΈ μ‹œμ—λŠ” ν•„μš”ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
18
- # print("Installing required transformers branch...")
19
- # try:
20
- # os.system("pip install git+https://github.com/shumingma/transformers.git -q")
21
- # print("transformers branch installed.")
22
- # except Exception as e:
23
- # print(f"Error installing transformers branch: {e}")
24
- # print("Proceeding with potentially default transformers version.")
25
-
26
- # os.system("pip install accelerate bitsandbytes -q") # bitsandbytes, accelerate도 ν•„μš”ν•  수 μžˆμŠ΅λ‹ˆλ‹€.
27
-
28
-
29
  model_id = "microsoft/bitnet-b1.58-2B-4T"
30
 
31
- # λͺ¨λΈ 및 ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ
32
- print(f"Loading model: {model_id}")
33
- try:
34
- tokenizer = AutoTokenizer.from_pretrained(model_id)
35
- # device_map="auto"λŠ” μ—¬λŸ¬ GPU λ˜λŠ” CPU둜 λͺ¨λΈμ„ μžλ™μœΌλ‘œ λΆ„μ‚° λ‘œλ“œν•©λ‹ˆλ‹€.
36
- # bfloat16은 λͺ¨λΈ κ°€μ€‘μΉ˜μ— μ‚¬μš©λ˜λŠ” 데이터 νƒ€μž…μž…λ‹ˆλ‹€.
37
- model = AutoModelForCausalLM.from_pretrained(
38
- model_id,
39
- torch_dtype=torch.bfloat16,
40
- device_map="auto",
41
- # load_in_8bit=True # Bitnet은 1.58bitμ΄λ―€λ‘œ 8bit λ‘œλ”©μ΄ 의미 없을 수 μžˆμŠ΅λ‹ˆλ‹€.
42
- )
43
- print(f"Model loaded successfully on device: {model.device}")
44
- except Exception as e:
45
- print(f"Error loading model: {e}")
46
- # λͺ¨λΈ λ‘œλ”© μ‹€νŒ¨ μ‹œ 더미 λͺ¨λΈ μ‚¬μš© λ˜λŠ” 였λ₯˜ 처리
47
- class DummyModel:
48
- def generate(self, **kwargs):
49
- # 더미 응닡 생성
50
- input_ids = kwargs.get('input_ids')
51
- streamer = kwargs.get('streamer')
52
- if streamer:
53
- # κ°„λ‹¨ν•œ 더미 응닡 슀트리밍
54
- dummy_response = "λͺ¨λΈ λ‘œλ”©μ— μ‹€νŒ¨ν•˜μ—¬ 더미 응닡을 μ œκ³΅ν•©λ‹ˆλ‹€. μ„€μ •/경둜λ₯Ό ν™•μΈν•˜μ„Έμš”."
55
- for char in dummy_response:
56
- streamer.put(char)
57
- streamer.end()
58
- model = DummyModel()
59
- tokenizer = AutoTokenizer.from_pretrained("gpt2") # 더미 ν† ν¬λ‚˜μ΄μ €
60
- print("Using dummy model due to loading failure.")
61
-
62
 
63
- @spaces.GPU # Hugging Face Spacesμ—μ„œ GPU μ‚¬μš©μ„ μ§€μ •ν•©λ‹ˆλ‹€.
64
  def respond(
65
  message: str,
66
  history: list[tuple[str, str]],
@@ -81,11 +46,6 @@ def respond(
81
  Yields:
82
  The growing response text as new tokens are generated.
83
  """
84
- # 더미 λͺ¨λΈ μ‚¬μš© μ‹œ 슀트리밍 였λ₯˜ λ°©μ§€
85
- if isinstance(model, DummyModel):
86
- yield "λͺ¨λΈ λ‘œλ”©μ— μ‹€νŒ¨ν•˜μ—¬ 응닡을 생성할 수 μ—†μŠ΅λ‹ˆλ‹€."
87
- return
88
-
89
  messages = [{"role": "system", "content": system_message}]
90
  for user_msg, bot_msg in history:
91
  if user_msg:
@@ -94,236 +54,45 @@ def respond(
94
  messages.append({"role": "assistant", "content": bot_msg})
95
  messages.append({"role": "user", "content": message})
96
 
97
- try:
98
- prompt = tokenizer.apply_chat_template(
99
- messages, tokenize=False, add_generation_prompt=True
100
- )
101
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
102
-
103
- streamer = TextIteratorStreamer(
104
- tokenizer, skip_prompt=True, skip_special_tokens=True
105
- )
106
- generate_kwargs = dict(
107
- **inputs,
108
- streamer=streamer,
109
- max_new_tokens=max_tokens,
110
- temperature=temperature,
111
- top_p=top_p,
112
- do_sample=True,
113
- # Bitnet λͺ¨λΈμ— ν•„μš”ν•œ μΆ”κ°€ 인자 μ„€μ • (λͺ¨λΈ λ¬Έμ„œ 확인 ν•„μš”)
114
- # 예λ₯Ό λ“€μ–΄, quantize_config λ“±
115
- )
116
-
117
- # μ“°λ ˆλ“œμ—μ„œ λͺ¨λΈ 생성 μ‹€ν–‰
118
- thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
119
- thread.start()
120
-
121
- # μŠ€νŠΈλ¦¬λ¨Έλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό 읽어와 yield
122
- response = ""
123
- for new_text in streamer:
124
- # yield ν•˜κΈ° 전에 λΆˆν•„μš”ν•œ 곡백/토큰 제거 λ˜λŠ” 처리 κ°€λŠ₯
125
- response += new_text
126
- yield response
127
-
128
- except Exception as e:
129
- print(f"Error during response generation: {e}")
130
- yield f"응닡 생성 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€: {e}"
131
-
132
-
133
- # --- λ””μžμΈ κ°œμ„ μ„ μœ„ν•œ CSS μ½”λ“œ ---
134
- css_styles = """
135
- /* 전체 νŽ˜μ΄μ§€ λ°°κ²½ 및 κΈ°λ³Έ 폰트 μ„€μ • */
136
- body {
137
- font-family: 'Segoe UI', 'Roboto', 'Arial', sans-serif;
138
- line-height: 1.6;
139
- margin: 0;
140
- padding: 20px; /* μ•± μ£Όλ³€ μ—¬λ°± μΆ”κ°€ */
141
- background-color: #f4f7f6; /* λΆ€λ“œλŸ¬μš΄ 배경색 */
142
- }
143
-
144
- /* 메인 μ•± μ»¨ν…Œμ΄λ„ˆ μŠ€νƒ€μΌ */
145
- .gradio-container {
146
- max-width: 900px; /* 쀑앙 μ •λ ¬ 및 μ΅œλŒ€ λ„ˆλΉ„ μ œν•œ */
147
- margin: 20px auto;
148
- border-radius: 12px; /* λ‘₯κ·Ό λͺ¨μ„œλ¦¬ */
149
- overflow: hidden; /* μžμ‹ μš”μ†Œλ“€μ΄ λͺ¨μ„œλ¦¬λ₯Ό λ„˜μ§€ μ•Šλ„λ‘ */
150
- box-shadow: 0 8px 16px rgba(0, 0, 0, 0.1); /* 그림자 효과 */
151
- background-color: #ffffff; /* μ•± λ‚΄μš© μ˜μ—­ 배경색 */
152
- }
153
-
154
- /* 타이틀 및 μ„€λͺ… μ˜μ—­ (ChatInterface의 κΈ°λ³Έ 타이틀/μ„€λͺ…) */
155
- /* 이 μ˜μ—­μ€ ChatInterface ꡬ쑰에 따라 μ •ν™•ν•œ 클래슀 이름이 λ‹€λ₯Ό 수 μžˆμœΌλ‚˜,
156
- .gradio-container λ‚΄λΆ€μ˜ 첫 λΈ”λ‘μ΄λ‚˜ H1/P νƒœκ·Έλ₯Ό νƒ€κ²Ÿν•  수 μžˆμŠ΅λ‹ˆλ‹€.
157
- ν…Œλ§ˆμ™€ ν•¨κ»˜ μ‚¬μš©ν•˜λ©΄ λŒ€λΆ€λΆ„ 잘 μ²˜λ¦¬λ©λ‹ˆλ‹€. μ—¬κΈ°μ„œλŠ” 좔가적인 νŒ¨λ”© λ“±λ§Œ κ³ λ € */
158
- .gradio-container > .gradio-block:first-child {
159
- padding: 20px 20px 10px 20px; /* 상단 νŒ¨λ”© μ‘°μ • */
160
- }
161
-
162
- /* μ±„νŒ… λ°•μŠ€ μ˜μ—­ μŠ€νƒ€μΌ */
163
- .gradio-chatbox {
164
- /* ν…Œλ§ˆμ— μ˜ν•΄ μŠ€νƒ€μΌλ§λ˜μ§€λ§Œ, 좔가적인 λ‚΄λΆ€ νŒ¨λ”© λ“± μ‘°μ • κ°€λŠ₯ */
165
- padding: 15px;
166
- background-color: #fefefe; /* μ±„νŒ… μ˜μ—­ 배경색 */
167
- border-radius: 8px; /* μ±„νŒ… μ˜μ—­ λ‚΄λΆ€ λͺ¨μ„œλ¦¬ */
168
- border: 1px solid #e0e0e0; /* 경계선 */
169
- }
170
-
171
- /* μ±„νŒ… λ©”μ‹œμ§€ μŠ€νƒ€μΌ */
172
- .gradio-chatmessage {
173
- margin-bottom: 12px;
174
- padding: 10px 15px;
175
- border-radius: 20px; /* λ‘₯κ·Ό λ©”μ‹œμ§€ λͺ¨μ„œλ¦¬ */
176
- max-width: 75%; /* λ©”μ‹œμ§€ λ„ˆλΉ„ μ œν•œ */
177
- word-wrap: break-word; /* κΈ΄ 단어 μ€„λ°”κΏˆ */
178
- white-space: pre-wrap; /* 곡백 및 μ€„λ°”κΏˆ μœ μ§€ */
179
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05); /* λ©”μ‹œμ§€μ— μ•½κ°„μ˜ 그림자 */
180
- }
181
-
182
- /* μ‚¬μš©μž λ©”μ‹œμ§€ μŠ€νƒ€μΌ */
183
- .gradio-chatmessage.user {
184
- background-color: #007bff; /* νŒŒλž€μƒ‰ 계열 */
185
- color: white;
186
- margin-left: auto; /* 였λ₯Έμͺ½ μ •λ ¬ */
187
- border-bottom-right-radius: 2px; /* 였λ₯Έμͺ½ μ•„λž˜ λͺ¨μ„œλ¦¬ κ°μ§€κ²Œ */
188
- }
189
-
190
- /* 봇 λ©”μ‹œμ§€ μŠ€νƒ€μΌ */
191
- .gradio-chatmessage.bot {
192
- background-color: #e9ecef; /* 밝은 νšŒμƒ‰ */
193
- color: #333; /* μ–΄λ‘μš΄ ν…μŠ€νŠΈ */
194
- margin-right: auto; /* μ™Όμͺ½ μ •λ ¬ */
195
- border-bottom-left-radius: 2px; /* μ™Όμͺ½ μ•„λž˜ λͺ¨μ„œλ¦¬ κ°μ§€κ²Œ */
196
- }
197
-
198
- /* μž…λ ₯μ°½ 및 λ²„νŠΌ μ˜μ—­ μŠ€νƒ€μΌ */
199
- .gradio-input-box {
200
- padding: 15px;
201
- border-top: 1px solid #eee; /* μœ„μͺ½ 경계선 */
202
- background-color: #f8f9fa; /* μž…λ ₯ μ˜μ—­ 배경색 */
203
- }
204
- /* μž…λ ₯ ν…μŠ€νŠΈ 에어리어 μŠ€νƒ€μΌ */
205
- .gradio-input-box textarea {
206
- border-radius: 8px;
207
- padding: 10px;
208
- border: 1px solid #ccc;
209
- resize: none !important; /* μž…λ ₯μ°½ 크기 쑰절 λΉ„ν™œμ„±ν™” (선택 사항) */
210
- min-height: 50px; /* μ΅œμ†Œ 높이 */
211
- max-height: 150px; /* μ΅œλŒ€ 높이 */
212
- overflow-y: auto; /* λ‚΄μš© λ„˜μΉ  경우 슀크둀 */
213
- }
214
- /* μŠ€ν¬λ‘€λ°” μŠ€νƒ€μΌ (선택 사항) */
215
- .gradio-input-box textarea::-webkit-scrollbar {
216
- width: 8px;
217
- }
218
- .gradio-input-box textarea::-webkit-scrollbar-thumb {
219
- background-color: #ccc;
220
- border-radius: 4px;
221
- }
222
- .gradio-input-box textarea::-webkit-scrollbar-track {
223
- background-color: #f1f1f1;
224
- }
225
-
226
-
227
- /* λ²„νŠΌ μŠ€νƒ€μΌ */
228
- .gradio-button {
229
- border-radius: 8px;
230
- padding: 10px 20px;
231
- font-weight: bold;
232
- transition: background-color 0.2s ease, opacity 0.2s ease; /* ν˜Έλ²„ μ• λ‹ˆλ©”μ΄μ…˜ */
233
- border: none; /* κΈ°λ³Έ ν…Œλ‘λ¦¬ 제거 */
234
- cursor: pointer;
235
- }
236
-
237
- .gradio-button:not(.clear-button) { /* Send λ²„νŠΌ */
238
- background-color: #28a745; /* μ΄ˆλ‘μƒ‰ */
239
- color: white;
240
- }
241
- .gradio-button:not(.clear-button):hover {
242
- background-color: #218838;
243
- }
244
- .gradio-button:disabled { /* λΉ„ν™œμ„±ν™”λœ λ²„νŠΌ */
245
- opacity: 0.6;
246
- cursor: not-allowed;
247
- }
248
-
249
-
250
- .gradio-button.clear-button { /* Clear λ²„νŠΌ */
251
- background-color: #dc3545; /* 빨간색 */
252
- color: white;
253
- }
254
- .gradio-button.clear-button:hover {
255
- background-color: #c82333;
256
- }
257
-
258
- /* Additional inputs (μΆ”κ°€ μ„€μ •) μ˜μ—­ μŠ€νƒ€μΌ */
259
- /* 이 μ˜μ—­μ€ 보톡 μ•„μ½”λ””μ–Έ ν˜•νƒœλ‘œ λ˜μ–΄ 있으며, .gradio-accordion 클래슀λ₯Ό κ°€μ§‘λ‹ˆλ‹€. */
260
- .gradio-accordion {
261
- border-radius: 12px; /* μ™ΈλΆ€ μ»¨ν…Œμ΄λ„ˆμ™€ λ™μΌν•œ λͺ¨μ„œλ¦¬ */
262
- margin-top: 15px; /* μ±„νŒ… μ˜μ—­κ³Όμ˜ 간격 */
263
- border: 1px solid #ddd; /* 경계선 */
264
- box-shadow: none; /* λ‚΄λΆ€ 그림자 제거 */
265
- }
266
- /* μ•„μ½”λ””μ–Έ 헀더 (라벨) μŠ€νƒ€μΌ */
267
- .gradio-accordion .label {
268
- font-weight: bold;
269
- color: #007bff; /* νŒŒλž€μƒ‰ 계열 */
270
- padding: 15px; /* 헀더 νŒ¨λ”© */
271
- background-color: #e9ecef; /* 헀더 배경색 */
272
- border-bottom: 1px solid #ddd; /* 헀더 μ•„λž˜ 경계선 */
273
- border-top-left-radius: 11px; /* 상단 λͺ¨μ„œλ¦¬ */
274
- border-top-right-radius: 11px;
275
- }
276
- /* μ•„μ½”λ””μ–Έ λ‚΄μš© μ˜μ—­ μŠ€νƒ€μΌ */
277
- .gradio-accordion .wrap {
278
- padding: 15px; /* λ‚΄μš© νŒ¨λ”© */
279
- background-color: #fefefe; /* λ‚΄μš© 배경색 */
280
- border-bottom-left-radius: 11px; /* ν•˜λ‹¨ λͺ¨μ„œλ¦¬ */
281
- border-bottom-right-radius: 11px;
282
- }
283
- /* μΆ”κ°€ μ„€μ • λ‚΄ κ°œλ³„ μž…λ ₯ μ»΄ν¬λ„ŒνŠΈ 슀���일 (μŠ¬λΌμ΄λ”, ν…μŠ€νŠΈλ°•μŠ€ λ“±) */
284
- .gradio-slider, .gradio-textbox, .gradio-number {
285
- margin-bottom: 10px; /* 각 μž…λ ₯ μš”μ†Œ μ•„λž˜ 간격 */
286
- padding: 8px; /* λ‚΄λΆ€ νŒ¨λ”© */
287
- border: 1px solid #e0e0e0; /* 경계선 */
288
- border-radius: 8px; /* λ‘₯κ·Ό λͺ¨μ„œλ¦¬ */
289
- background-color: #fff; /* 배경색 */
290
- }
291
- /* μž…λ ₯ ν•„λ“œ 라벨 μŠ€νƒ€μΌ */
292
- .gradio-label {
293
- font-weight: normal; /* 라벨 폰트 ꡡ기 */
294
- margin-bottom: 5px; /* 라벨과 μž…λ ₯ ν•„λ“œ κ°„ 간격 */
295
- color: #555; /* 라벨 색상 */
296
- display: block; /* 라벨을 블둝 μš”μ†Œλ‘œ λ§Œλ“€μ–΄ μœ„λ‘œ 올림 */
297
- }
298
- /* μŠ¬λΌμ΄λ” νŠΈλž™ 및 ν•Έλ“€ μŠ€νƒ€μΌ (더 μ„Έλ°€ν•œ μ‘°μ • κ°€λŠ₯) */
299
- /* 예: .gradio-slider input[type="range"]::-webkit-slider-thumb {} */
300
-
301
 
302
- /* λ§ˆν¬λ‹€μš΄/HTML μ»΄ν¬λ„ŒνŠΈ λ‚΄ μŠ€νƒ€μΌ */
303
- .gradio-markdown, .gradio-html {
304
- padding: 10px 0; /* μƒν•˜ νŒ¨λ”© */
305
- }
306
- """
307
- # --- λ””μžμΈ κ°œμ„ μ„ μœ„ν•œ CSS μ½”λ“œ 끝 ---
 
 
 
 
 
 
 
308
 
 
 
 
 
309
 
310
- # Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
311
  demo = gr.ChatInterface(
312
  fn=respond,
313
- # 타이틀 및 μ„€λͺ…에 HTML νƒœκ·Έ μ‚¬μš© μ˜ˆμ‹œ (<br> νƒœκ·Έ μ‚¬μš©)
314
- title="<h1 style='text-align: center; color: #007bff;'>Bitnet-b1.58-2B-4T Chatbot</h1>",
315
- description="<p style='text-align: center; color: #555;'>This chat application is powered by Microsoft's SOTA Bitnet-b1.58-2B-4T and designed for natural and fast conversations.</p>",
316
  examples=[
317
  [
318
- "Hello! How are you?",
319
- "You are a helpful AI assistant for everyday tasks.",
320
  512,
321
  0.7,
322
  0.95,
323
  ],
324
  [
325
- "Can you code a snake game in Python?",
326
- "You are a helpful AI assistant for coding.",
327
  2048,
328
  0.7,
329
  0.95,
@@ -332,8 +101,7 @@ demo = gr.ChatInterface(
332
  additional_inputs=[
333
  gr.Textbox(
334
  value="You are a helpful AI assistant.",
335
- label="System message",
336
- lines=3 # μ‹œμŠ€ν…œ λ©”μ‹œμ§€ μž…λ ₯μ°½ 높이 쑰절
337
  ),
338
  gr.Slider(
339
  minimum=1,
@@ -357,14 +125,7 @@ demo = gr.ChatInterface(
357
  label="Top-p (nucleus sampling)"
358
  ),
359
  ],
360
- # ν…Œλ§ˆ 적용 (μ—¬λŸ¬ ν…Œλ§ˆ 쀑 선택 κ°€λŠ₯: gr.themes.Soft(), gr.themes.Glass(), gr.themes.Default(), etc.)
361
- theme=gr.themes.Soft(),
362
- # μ»€μŠ€ν…€ CSS 적용
363
- css=css_styles,
364
  )
365
 
366
- # μ• ν”Œλ¦¬μΌ€μ΄μ…˜ μ‹€ν–‰
367
  if __name__ == "__main__":
368
- # launch(share=True)λŠ” 퍼블릭 URL 생성 (디버깅/곡유 λͺ©μ , 주의 ν•„μš”)
369
- demo.launch()
370
- # demo.launch(debug=True) # 디버깅 λͺ¨λ“œ ν™œμ„±ν™”
 
1
  import os
2
+
3
+ os.system("pip install git+https://github.com/shumingma/transformers.git")
4
+
5
  import threading
6
  import torch
7
  import torch._dynamo
 
15
  import gradio as gr
16
  import spaces
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  model_id = "microsoft/bitnet-b1.58-2B-4T"
19
 
20
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_id,
23
+ torch_dtype=torch.bfloat16,
24
+ device_map="auto"
25
+ )
26
+ print(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ @spaces.GPU
29
  def respond(
30
  message: str,
31
  history: list[tuple[str, str]],
 
46
  Yields:
47
  The growing response text as new tokens are generated.
48
  """
 
 
 
 
 
49
  messages = [{"role": "system", "content": system_message}]
50
  for user_msg, bot_msg in history:
51
  if user_msg:
 
54
  messages.append({"role": "assistant", "content": bot_msg})
55
  messages.append({"role": "user", "content": message})
56
 
57
+ prompt = tokenizer.apply_chat_template(
58
+ messages, tokenize=False, add_generation_prompt=True
59
+ )
60
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ streamer = TextIteratorStreamer(
63
+ tokenizer, skip_prompt=True, skip_special_tokens=True
64
+ )
65
+ generate_kwargs = dict(
66
+ **inputs,
67
+ streamer=streamer,
68
+ max_new_tokens=max_tokens,
69
+ temperature=temperature,
70
+ top_p=top_p,
71
+ do_sample=True,
72
+ )
73
+ thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
74
+ thread.start()
75
 
76
+ response = ""
77
+ for new_text in streamer:
78
+ response += new_text
79
+ yield response
80
 
 
81
  demo = gr.ChatInterface(
82
  fn=respond,
83
+ title="Bitnet-b1.58-2B-4T",
84
+ description="Bitnet-b1.58-2B-4T",
 
85
  examples=[
86
  [
87
+ "Hello!",
88
+ "You are a helpful AI.",
89
  512,
90
  0.7,
91
  0.95,
92
  ],
93
  [
94
+ "Can you code a snake game?",
95
+ "You are a helpful AI.",
96
  2048,
97
  0.7,
98
  0.95,
 
101
  additional_inputs=[
102
  gr.Textbox(
103
  value="You are a helpful AI assistant.",
104
+ label="System message"
 
105
  ),
106
  gr.Slider(
107
  minimum=1,
 
125
  label="Top-p (nucleus sampling)"
126
  ),
127
  ],
 
 
 
 
128
  )
129
 
 
130
  if __name__ == "__main__":
131
+ demo.launch()