Command-R

Runtime error

minhdang commited on Apr 1, 2024

Commit

12625fd

verified ·

1 Parent(s): 71bf837

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,6 +12,8 @@ key =  os.environ.get("key")
 from huggingface_hub import login
 login(key)
 from bitnet import replace_linears_in_hf
 # os.system("pip install flash-attn --no-build-isolation")
 nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
@@ -22,12 +24,12 @@ nf4_config = BitsAndBytesConfig(
 model_id = "CohereForAI/c4ai-command-r-v01"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id,
                                              # load_in_8bit=True,
-                                             quantization_config=nf4_config,
                                             # attn_implementation="flash_attention_2",
                                              # torch_dtype = torch.bfloat16,
-                                             device_map="auto"
                                             )
 # replace_linears_in_hf(model)

 from huggingface_hub import login
 login(key)
 from bitnet import replace_linears_in_hf
+os.system("mkdir c4ai-command-r-v01-exl2")
+os.system("huggingface-cli download bartowski/c4ai-command-r-v01-exl2 --revision 6_5 --local-dir c4ai-command-r-v01-exl2 --local-dir-use-symlinks False")
 # os.system("pip install flash-attn --no-build-isolation")
 nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
 model_id = "CohereForAI/c4ai-command-r-v01"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained("c4ai-command-r-v01-exl2",
                                              # load_in_8bit=True,
+                                             #quantization_config=nf4_config,
                                             # attn_implementation="flash_attention_2",
                                              # torch_dtype = torch.bfloat16,
+                                             #device_map="auto"
                                             )
 # replace_linears_in_hf(model)