Set main branch to 4bit-128g-True, sharded
Browse files- ACCEPTABLE_USE_POLICY.txt +11 -0
- config.json +2 -4
- model-00001-of-00010.safetensors +2 -2
- model-00002-of-00010.safetensors +2 -2
- model-00003-of-00010.safetensors +2 -2
- model-00004-of-00010.safetensors +2 -2
- model-00005-of-00010.safetensors +2 -2
- model-00006-of-00010.safetensors +2 -2
- model-00007-of-00010.safetensors +2 -2
- model-00008-of-00010.safetensors +2 -2
- model-00009-of-00010.safetensors +2 -2
- model-00010-of-00010.safetensors +2 -2
- model.safetensors.index.json +478 -478
- quantize_config.json +2 -2
ACCEPTABLE_USE_POLICY.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FALCON 180B TII LICENSE VERSION 1.0
|
| 2 |
+
September 2023
|
| 3 |
+
falconllm.tii.ae
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
ACCEPTABLE USE POLICY
|
| 7 |
+
You agree not to use Falcon 180B or any Work or Derivative Work (as such terms are defined in the Falcon 180B TII License Version 1.0):
|
| 8 |
+
1. In any way that violates any applicable national, federal, state, local or international law or regulation;
|
| 9 |
+
2. For the purpose of exploiting, harming or attempting to exploit or harm minors and/or living beings in any way;
|
| 10 |
+
3. To generate or disseminate verifiably false information with the purpose of harming others; and/or
|
| 11 |
+
4. To defame, disparage or otherwise harass others.
|
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "/workspace/
|
| 3 |
"alibi": false,
|
| 4 |
"architectures": [
|
| 5 |
"FalconForCausalLM"
|
|
@@ -19,9 +19,7 @@
|
|
| 19 |
"num_attention_heads": 232,
|
| 20 |
"num_hidden_layers": 80,
|
| 21 |
"num_kv_heads": 8,
|
| 22 |
-
"pad_token_id": 0,
|
| 23 |
"parallel_attn": true,
|
| 24 |
-
"pretraining_tp": 1,
|
| 25 |
"quantization_config": {
|
| 26 |
"batch_size": 1,
|
| 27 |
"bits": 4,
|
|
@@ -30,7 +28,7 @@
|
|
| 30 |
"dataset": null,
|
| 31 |
"desc_act": true,
|
| 32 |
"disable_exllama": true,
|
| 33 |
-
"group_size":
|
| 34 |
"model_seqlen": null,
|
| 35 |
"module_name_preceding_first_block": null,
|
| 36 |
"pad_token_id": null,
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "/workspace/process/tiiuae_falcon-180b-chat/gptq/gptq-4bit-128g-actorder_True/",
|
| 3 |
"alibi": false,
|
| 4 |
"architectures": [
|
| 5 |
"FalconForCausalLM"
|
|
|
|
| 19 |
"num_attention_heads": 232,
|
| 20 |
"num_hidden_layers": 80,
|
| 21 |
"num_kv_heads": 8,
|
|
|
|
| 22 |
"parallel_attn": true,
|
|
|
|
| 23 |
"quantization_config": {
|
| 24 |
"batch_size": 1,
|
| 25 |
"bits": 4,
|
|
|
|
| 28 |
"dataset": null,
|
| 29 |
"desc_act": true,
|
| 30 |
"disable_exllama": true,
|
| 31 |
+
"group_size": 128,
|
| 32 |
"model_seqlen": null,
|
| 33 |
"module_name_preceding_first_block": null,
|
| 34 |
"pad_token_id": null,
|
model-00001-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0710855b0308e1a37410e6b67253974815c0f97d5c303d03e1132ab7ad7c55f7
|
| 3 |
+
size 9995023968
|
model-00002-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ecbfc54c8304049dd69afd6280886fad89bc7e566f5b9570749e4be935448f6
|
| 3 |
+
size 9941818992
|
model-00003-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3568ba7cefbf02ad057e9a32e1f883b13ec912ff79ef511b3f589451cd2106f
|
| 3 |
+
size 9927772520
|
model-00004-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6d8e65ea85221c1b8ce251bec6ea9e5b498ccf2f4677f59faa8a6b54869be4a
|
| 3 |
+
size 9690586856
|
model-00005-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fca596b681e4541b2d1e168ecceb3f2fa2b675ee9ace856032ac6c26f104e795
|
| 3 |
+
size 9927772520
|
model-00006-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d73cd57d08723ae9efc442bae774702b518ab65c6fcbdfc35637dd2f9d791538
|
| 3 |
+
size 9690586856
|
model-00007-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cf2e1595288202bdf9abcf4bfc14cb8f6142466adf1a1e9c64df19cf1367dfe
|
| 3 |
+
size 9927772520
|
model-00008-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b60dff7f3adf1fb7f49aa484ffc6d13f797acd61865bf9a1270e6f368f354f6
|
| 3 |
+
size 9690586856
|
model-00009-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09092c16c2675c5d1621dda898f57aa9b6a6fb10c39a7baa9a3db35a5c1d8cdd
|
| 3 |
+
size 9927772520
|
model-00010-of-00010.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5724bbe7696a4281087fd85831fc8fa6489117332176c9243145d4b424aab3a7
|
| 3 |
+
size 5533009736
|
model.safetensors.index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_size":
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"transformer.h.0.ln_attn.bias": "model-00001-of-00010.safetensors",
|
|
@@ -171,20 +171,20 @@
|
|
| 171 |
"transformer.h.14.self_attention.query_key_value.qweight": "model-00002-of-00010.safetensors",
|
| 172 |
"transformer.h.14.self_attention.query_key_value.qzeros": "model-00002-of-00010.safetensors",
|
| 173 |
"transformer.h.14.self_attention.query_key_value.scales": "model-00002-of-00010.safetensors",
|
| 174 |
-
"transformer.h.15.ln_attn.bias": "model-
|
| 175 |
-
"transformer.h.15.ln_attn.weight": "model-
|
| 176 |
-
"transformer.h.15.ln_mlp.bias": "model-
|
| 177 |
-
"transformer.h.15.ln_mlp.weight": "model-
|
| 178 |
"transformer.h.15.mlp.dense_4h_to_h.bias": "model-00002-of-00010.safetensors",
|
| 179 |
"transformer.h.15.mlp.dense_4h_to_h.g_idx": "model-00002-of-00010.safetensors",
|
| 180 |
"transformer.h.15.mlp.dense_4h_to_h.qweight": "model-00002-of-00010.safetensors",
|
| 181 |
"transformer.h.15.mlp.dense_4h_to_h.qzeros": "model-00002-of-00010.safetensors",
|
| 182 |
"transformer.h.15.mlp.dense_4h_to_h.scales": "model-00002-of-00010.safetensors",
|
| 183 |
-
"transformer.h.15.mlp.dense_h_to_4h.bias": "model-
|
| 184 |
-
"transformer.h.15.mlp.dense_h_to_4h.g_idx": "model-
|
| 185 |
-
"transformer.h.15.mlp.dense_h_to_4h.qweight": "model-
|
| 186 |
-
"transformer.h.15.mlp.dense_h_to_4h.qzeros": "model-
|
| 187 |
-
"transformer.h.15.mlp.dense_h_to_4h.scales": "model-
|
| 188 |
"transformer.h.15.self_attention.dense.bias": "model-00002-of-00010.safetensors",
|
| 189 |
"transformer.h.15.self_attention.dense.g_idx": "model-00002-of-00010.safetensors",
|
| 190 |
"transformer.h.15.self_attention.dense.qweight": "model-00002-of-00010.safetensors",
|
|
@@ -209,16 +209,16 @@
|
|
| 209 |
"transformer.h.16.mlp.dense_h_to_4h.qweight": "model-00003-of-00010.safetensors",
|
| 210 |
"transformer.h.16.mlp.dense_h_to_4h.qzeros": "model-00003-of-00010.safetensors",
|
| 211 |
"transformer.h.16.mlp.dense_h_to_4h.scales": "model-00003-of-00010.safetensors",
|
| 212 |
-
"transformer.h.16.self_attention.dense.bias": "model-
|
| 213 |
-
"transformer.h.16.self_attention.dense.g_idx": "model-
|
| 214 |
-
"transformer.h.16.self_attention.dense.qweight": "model-
|
| 215 |
-
"transformer.h.16.self_attention.dense.qzeros": "model-
|
| 216 |
-
"transformer.h.16.self_attention.dense.scales": "model-
|
| 217 |
-
"transformer.h.16.self_attention.query_key_value.bias": "model-
|
| 218 |
-
"transformer.h.16.self_attention.query_key_value.g_idx": "model-
|
| 219 |
-
"transformer.h.16.self_attention.query_key_value.qweight": "model-
|
| 220 |
-
"transformer.h.16.self_attention.query_key_value.qzeros": "model-
|
| 221 |
-
"transformer.h.16.self_attention.query_key_value.scales": "model-
|
| 222 |
"transformer.h.17.ln_attn.bias": "model-00003-of-00010.safetensors",
|
| 223 |
"transformer.h.17.ln_attn.weight": "model-00003-of-00010.safetensors",
|
| 224 |
"transformer.h.17.ln_mlp.bias": "model-00003-of-00010.safetensors",
|
|
@@ -411,20 +411,20 @@
|
|
| 411 |
"transformer.h.23.self_attention.query_key_value.qweight": "model-00003-of-00010.safetensors",
|
| 412 |
"transformer.h.23.self_attention.query_key_value.qzeros": "model-00003-of-00010.safetensors",
|
| 413 |
"transformer.h.23.self_attention.query_key_value.scales": "model-00003-of-00010.safetensors",
|
| 414 |
-
"transformer.h.24.ln_attn.bias": "model-
|
| 415 |
-
"transformer.h.24.ln_attn.weight": "model-
|
| 416 |
-
"transformer.h.24.ln_mlp.bias": "model-
|
| 417 |
-
"transformer.h.24.ln_mlp.weight": "model-
|
| 418 |
-
"transformer.h.24.mlp.dense_4h_to_h.bias": "model-
|
| 419 |
-
"transformer.h.24.mlp.dense_4h_to_h.g_idx": "model-
|
| 420 |
-
"transformer.h.24.mlp.dense_4h_to_h.qweight": "model-
|
| 421 |
-
"transformer.h.24.mlp.dense_4h_to_h.qzeros": "model-
|
| 422 |
-
"transformer.h.24.mlp.dense_4h_to_h.scales": "model-
|
| 423 |
-
"transformer.h.24.mlp.dense_h_to_4h.bias": "model-
|
| 424 |
-
"transformer.h.24.mlp.dense_h_to_4h.g_idx": "model-
|
| 425 |
-
"transformer.h.24.mlp.dense_h_to_4h.qweight": "model-
|
| 426 |
-
"transformer.h.24.mlp.dense_h_to_4h.qzeros": "model-
|
| 427 |
-
"transformer.h.24.mlp.dense_h_to_4h.scales": "model-
|
| 428 |
"transformer.h.24.self_attention.dense.bias": "model-00003-of-00010.safetensors",
|
| 429 |
"transformer.h.24.self_attention.dense.g_idx": "model-00003-of-00010.safetensors",
|
| 430 |
"transformer.h.24.self_attention.dense.qweight": "model-00003-of-00010.safetensors",
|
|
@@ -449,16 +449,16 @@
|
|
| 449 |
"transformer.h.25.mlp.dense_h_to_4h.qweight": "model-00004-of-00010.safetensors",
|
| 450 |
"transformer.h.25.mlp.dense_h_to_4h.qzeros": "model-00004-of-00010.safetensors",
|
| 451 |
"transformer.h.25.mlp.dense_h_to_4h.scales": "model-00004-of-00010.safetensors",
|
| 452 |
-
"transformer.h.25.self_attention.dense.bias": "model-
|
| 453 |
-
"transformer.h.25.self_attention.dense.g_idx": "model-
|
| 454 |
-
"transformer.h.25.self_attention.dense.qweight": "model-
|
| 455 |
-
"transformer.h.25.self_attention.dense.qzeros": "model-
|
| 456 |
-
"transformer.h.25.self_attention.dense.scales": "model-
|
| 457 |
-
"transformer.h.25.self_attention.query_key_value.bias": "model-
|
| 458 |
-
"transformer.h.25.self_attention.query_key_value.g_idx": "model-
|
| 459 |
-
"transformer.h.25.self_attention.query_key_value.qweight": "model-
|
| 460 |
-
"transformer.h.25.self_attention.query_key_value.qzeros": "model-
|
| 461 |
-
"transformer.h.25.self_attention.query_key_value.scales": "model-
|
| 462 |
"transformer.h.26.ln_attn.bias": "model-00004-of-00010.safetensors",
|
| 463 |
"transformer.h.26.ln_attn.weight": "model-00004-of-00010.safetensors",
|
| 464 |
"transformer.h.26.ln_mlp.bias": "model-00004-of-00010.safetensors",
|
|
@@ -627,20 +627,20 @@
|
|
| 627 |
"transformer.h.31.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
|
| 628 |
"transformer.h.31.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
|
| 629 |
"transformer.h.31.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
|
| 630 |
-
"transformer.h.32.ln_attn.bias": "model-
|
| 631 |
-
"transformer.h.32.ln_attn.weight": "model-
|
| 632 |
-
"transformer.h.32.ln_mlp.bias": "model-
|
| 633 |
-
"transformer.h.32.ln_mlp.weight": "model-
|
| 634 |
"transformer.h.32.mlp.dense_4h_to_h.bias": "model-00004-of-00010.safetensors",
|
| 635 |
"transformer.h.32.mlp.dense_4h_to_h.g_idx": "model-00004-of-00010.safetensors",
|
| 636 |
"transformer.h.32.mlp.dense_4h_to_h.qweight": "model-00004-of-00010.safetensors",
|
| 637 |
"transformer.h.32.mlp.dense_4h_to_h.qzeros": "model-00004-of-00010.safetensors",
|
| 638 |
"transformer.h.32.mlp.dense_4h_to_h.scales": "model-00004-of-00010.safetensors",
|
| 639 |
-
"transformer.h.32.mlp.dense_h_to_4h.bias": "model-
|
| 640 |
-
"transformer.h.32.mlp.dense_h_to_4h.g_idx": "model-
|
| 641 |
-
"transformer.h.32.mlp.dense_h_to_4h.qweight": "model-
|
| 642 |
-
"transformer.h.32.mlp.dense_h_to_4h.qzeros": "model-
|
| 643 |
-
"transformer.h.32.mlp.dense_h_to_4h.scales": "model-
|
| 644 |
"transformer.h.32.self_attention.dense.bias": "model-00004-of-00010.safetensors",
|
| 645 |
"transformer.h.32.self_attention.dense.g_idx": "model-00004-of-00010.safetensors",
|
| 646 |
"transformer.h.32.self_attention.dense.qweight": "model-00004-of-00010.safetensors",
|
|
@@ -651,30 +651,30 @@
|
|
| 651 |
"transformer.h.32.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
|
| 652 |
"transformer.h.32.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
|
| 653 |
"transformer.h.32.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
|
| 654 |
-
"transformer.h.33.ln_attn.bias": "model-
|
| 655 |
-
"transformer.h.33.ln_attn.weight": "model-
|
| 656 |
-
"transformer.h.33.ln_mlp.bias": "model-
|
| 657 |
-
"transformer.h.33.ln_mlp.weight": "model-
|
| 658 |
-
"transformer.h.33.mlp.dense_4h_to_h.bias": "model-
|
| 659 |
-
"transformer.h.33.mlp.dense_4h_to_h.g_idx": "model-
|
| 660 |
-
"transformer.h.33.mlp.dense_4h_to_h.qweight": "model-
|
| 661 |
-
"transformer.h.33.mlp.dense_4h_to_h.qzeros": "model-
|
| 662 |
-
"transformer.h.33.mlp.dense_4h_to_h.scales": "model-
|
| 663 |
-
"transformer.h.33.mlp.dense_h_to_4h.bias": "model-
|
| 664 |
-
"transformer.h.33.mlp.dense_h_to_4h.g_idx": "model-
|
| 665 |
-
"transformer.h.33.mlp.dense_h_to_4h.qweight": "model-
|
| 666 |
-
"transformer.h.33.mlp.dense_h_to_4h.qzeros": "model-
|
| 667 |
-
"transformer.h.33.mlp.dense_h_to_4h.scales": "model-
|
| 668 |
-
"transformer.h.33.self_attention.dense.bias": "model-
|
| 669 |
-
"transformer.h.33.self_attention.dense.g_idx": "model-
|
| 670 |
-
"transformer.h.33.self_attention.dense.qweight": "model-
|
| 671 |
-
"transformer.h.33.self_attention.dense.qzeros": "model-
|
| 672 |
-
"transformer.h.33.self_attention.dense.scales": "model-
|
| 673 |
-
"transformer.h.33.self_attention.query_key_value.bias": "model-
|
| 674 |
-
"transformer.h.33.self_attention.query_key_value.g_idx": "model-
|
| 675 |
-
"transformer.h.33.self_attention.query_key_value.qweight": "model-
|
| 676 |
-
"transformer.h.33.self_attention.query_key_value.qzeros": "model-
|
| 677 |
-
"transformer.h.33.self_attention.query_key_value.scales": "model-
|
| 678 |
"transformer.h.34.ln_attn.bias": "model-00005-of-00010.safetensors",
|
| 679 |
"transformer.h.34.ln_attn.weight": "model-00005-of-00010.safetensors",
|
| 680 |
"transformer.h.34.ln_mlp.bias": "model-00005-of-00010.safetensors",
|
|
@@ -689,16 +689,16 @@
|
|
| 689 |
"transformer.h.34.mlp.dense_h_to_4h.qweight": "model-00005-of-00010.safetensors",
|
| 690 |
"transformer.h.34.mlp.dense_h_to_4h.qzeros": "model-00005-of-00010.safetensors",
|
| 691 |
"transformer.h.34.mlp.dense_h_to_4h.scales": "model-00005-of-00010.safetensors",
|
| 692 |
-
"transformer.h.34.self_attention.dense.bias": "model-
|
| 693 |
-
"transformer.h.34.self_attention.dense.g_idx": "model-
|
| 694 |
-
"transformer.h.34.self_attention.dense.qweight": "model-
|
| 695 |
-
"transformer.h.34.self_attention.dense.qzeros": "model-
|
| 696 |
-
"transformer.h.34.self_attention.dense.scales": "model-
|
| 697 |
-
"transformer.h.34.self_attention.query_key_value.bias": "model-
|
| 698 |
-
"transformer.h.34.self_attention.query_key_value.g_idx": "model-
|
| 699 |
-
"transformer.h.34.self_attention.query_key_value.qweight": "model-
|
| 700 |
-
"transformer.h.34.self_attention.query_key_value.qzeros": "model-
|
| 701 |
-
"transformer.h.34.self_attention.query_key_value.scales": "model-
|
| 702 |
"transformer.h.35.ln_attn.bias": "model-00005-of-00010.safetensors",
|
| 703 |
"transformer.h.35.ln_attn.weight": "model-00005-of-00010.safetensors",
|
| 704 |
"transformer.h.35.ln_mlp.bias": "model-00005-of-00010.safetensors",
|
|
@@ -867,20 +867,20 @@
|
|
| 867 |
"transformer.h.40.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
|
| 868 |
"transformer.h.40.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
|
| 869 |
"transformer.h.40.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
|
| 870 |
-
"transformer.h.41.ln_attn.bias": "model-
|
| 871 |
-
"transformer.h.41.ln_attn.weight": "model-
|
| 872 |
-
"transformer.h.41.ln_mlp.bias": "model-
|
| 873 |
-
"transformer.h.41.ln_mlp.weight": "model-
|
| 874 |
-
"transformer.h.41.mlp.dense_4h_to_h.bias": "model-
|
| 875 |
-
"transformer.h.41.mlp.dense_4h_to_h.g_idx": "model-
|
| 876 |
-
"transformer.h.41.mlp.dense_4h_to_h.qweight": "model-
|
| 877 |
-
"transformer.h.41.mlp.dense_4h_to_h.qzeros": "model-
|
| 878 |
-
"transformer.h.41.mlp.dense_4h_to_h.scales": "model-
|
| 879 |
-
"transformer.h.41.mlp.dense_h_to_4h.bias": "model-
|
| 880 |
-
"transformer.h.41.mlp.dense_h_to_4h.g_idx": "model-
|
| 881 |
-
"transformer.h.41.mlp.dense_h_to_4h.qweight": "model-
|
| 882 |
-
"transformer.h.41.mlp.dense_h_to_4h.qzeros": "model-
|
| 883 |
-
"transformer.h.41.mlp.dense_h_to_4h.scales": "model-
|
| 884 |
"transformer.h.41.self_attention.dense.bias": "model-00005-of-00010.safetensors",
|
| 885 |
"transformer.h.41.self_attention.dense.g_idx": "model-00005-of-00010.safetensors",
|
| 886 |
"transformer.h.41.self_attention.dense.qweight": "model-00005-of-00010.safetensors",
|
|
@@ -891,30 +891,30 @@
|
|
| 891 |
"transformer.h.41.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
|
| 892 |
"transformer.h.41.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
|
| 893 |
"transformer.h.41.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
|
| 894 |
-
"transformer.h.42.ln_attn.bias": "model-
|
| 895 |
-
"transformer.h.42.ln_attn.weight": "model-
|
| 896 |
-
"transformer.h.42.ln_mlp.bias": "model-
|
| 897 |
-
"transformer.h.42.ln_mlp.weight": "model-
|
| 898 |
-
"transformer.h.42.mlp.dense_4h_to_h.bias": "model-
|
| 899 |
-
"transformer.h.42.mlp.dense_4h_to_h.g_idx": "model-
|
| 900 |
-
"transformer.h.42.mlp.dense_4h_to_h.qweight": "model-
|
| 901 |
-
"transformer.h.42.mlp.dense_4h_to_h.qzeros": "model-
|
| 902 |
-
"transformer.h.42.mlp.dense_4h_to_h.scales": "model-
|
| 903 |
-
"transformer.h.42.mlp.dense_h_to_4h.bias": "model-
|
| 904 |
-
"transformer.h.42.mlp.dense_h_to_4h.g_idx": "model-
|
| 905 |
-
"transformer.h.42.mlp.dense_h_to_4h.qweight": "model-
|
| 906 |
-
"transformer.h.42.mlp.dense_h_to_4h.qzeros": "model-
|
| 907 |
-
"transformer.h.42.mlp.dense_h_to_4h.scales": "model-
|
| 908 |
-
"transformer.h.42.self_attention.dense.bias": "model-
|
| 909 |
-
"transformer.h.42.self_attention.dense.g_idx": "model-
|
| 910 |
-
"transformer.h.42.self_attention.dense.qweight": "model-
|
| 911 |
-
"transformer.h.42.self_attention.dense.qzeros": "model-
|
| 912 |
-
"transformer.h.42.self_attention.dense.scales": "model-
|
| 913 |
-
"transformer.h.42.self_attention.query_key_value.bias": "model-
|
| 914 |
-
"transformer.h.42.self_attention.query_key_value.g_idx": "model-
|
| 915 |
-
"transformer.h.42.self_attention.query_key_value.qweight": "model-
|
| 916 |
-
"transformer.h.42.self_attention.query_key_value.qzeros": "model-
|
| 917 |
-
"transformer.h.42.self_attention.query_key_value.scales": "model-
|
| 918 |
"transformer.h.43.ln_attn.bias": "model-00006-of-00010.safetensors",
|
| 919 |
"transformer.h.43.ln_attn.weight": "model-00006-of-00010.safetensors",
|
| 920 |
"transformer.h.43.ln_mlp.bias": "model-00006-of-00010.safetensors",
|
|
@@ -929,16 +929,16 @@
|
|
| 929 |
"transformer.h.43.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
|
| 930 |
"transformer.h.43.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
|
| 931 |
"transformer.h.43.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
|
| 932 |
-
"transformer.h.43.self_attention.dense.bias": "model-
|
| 933 |
-
"transformer.h.43.self_attention.dense.g_idx": "model-
|
| 934 |
-
"transformer.h.43.self_attention.dense.qweight": "model-
|
| 935 |
-
"transformer.h.43.self_attention.dense.qzeros": "model-
|
| 936 |
-
"transformer.h.43.self_attention.dense.scales": "model-
|
| 937 |
-
"transformer.h.43.self_attention.query_key_value.bias": "model-
|
| 938 |
-
"transformer.h.43.self_attention.query_key_value.g_idx": "model-
|
| 939 |
-
"transformer.h.43.self_attention.query_key_value.qweight": "model-
|
| 940 |
-
"transformer.h.43.self_attention.query_key_value.qzeros": "model-
|
| 941 |
-
"transformer.h.43.self_attention.query_key_value.scales": "model-
|
| 942 |
"transformer.h.44.ln_attn.bias": "model-00006-of-00010.safetensors",
|
| 943 |
"transformer.h.44.ln_attn.weight": "model-00006-of-00010.safetensors",
|
| 944 |
"transformer.h.44.ln_mlp.bias": "model-00006-of-00010.safetensors",
|
|
@@ -1059,20 +1059,20 @@
|
|
| 1059 |
"transformer.h.48.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
|
| 1060 |
"transformer.h.48.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
|
| 1061 |
"transformer.h.48.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
|
| 1062 |
-
"transformer.h.49.ln_attn.bias": "model-
|
| 1063 |
-
"transformer.h.49.ln_attn.weight": "model-
|
| 1064 |
-
"transformer.h.49.ln_mlp.bias": "model-
|
| 1065 |
-
"transformer.h.49.ln_mlp.weight": "model-
|
| 1066 |
"transformer.h.49.mlp.dense_4h_to_h.bias": "model-00006-of-00010.safetensors",
|
| 1067 |
"transformer.h.49.mlp.dense_4h_to_h.g_idx": "model-00006-of-00010.safetensors",
|
| 1068 |
"transformer.h.49.mlp.dense_4h_to_h.qweight": "model-00006-of-00010.safetensors",
|
| 1069 |
"transformer.h.49.mlp.dense_4h_to_h.qzeros": "model-00006-of-00010.safetensors",
|
| 1070 |
"transformer.h.49.mlp.dense_4h_to_h.scales": "model-00006-of-00010.safetensors",
|
| 1071 |
-
"transformer.h.49.mlp.dense_h_to_4h.bias": "model-
|
| 1072 |
-
"transformer.h.49.mlp.dense_h_to_4h.g_idx": "model-
|
| 1073 |
-
"transformer.h.49.mlp.dense_h_to_4h.qweight": "model-
|
| 1074 |
-
"transformer.h.49.mlp.dense_h_to_4h.qzeros": "model-
|
| 1075 |
-
"transformer.h.49.mlp.dense_h_to_4h.scales": "model-
|
| 1076 |
"transformer.h.49.self_attention.dense.bias": "model-00006-of-00010.safetensors",
|
| 1077 |
"transformer.h.49.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
|
| 1078 |
"transformer.h.49.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
|
|
@@ -1107,54 +1107,54 @@
|
|
| 1107 |
"transformer.h.5.self_attention.query_key_value.qweight": "model-00001-of-00010.safetensors",
|
| 1108 |
"transformer.h.5.self_attention.query_key_value.qzeros": "model-00001-of-00010.safetensors",
|
| 1109 |
"transformer.h.5.self_attention.query_key_value.scales": "model-00001-of-00010.safetensors",
|
| 1110 |
-
"transformer.h.50.ln_attn.bias": "model-
|
| 1111 |
-
"transformer.h.50.ln_attn.weight": "model-
|
| 1112 |
-
"transformer.h.50.ln_mlp.bias": "model-
|
| 1113 |
-
"transformer.h.50.ln_mlp.weight": "model-
|
| 1114 |
-
"transformer.h.50.mlp.dense_4h_to_h.bias": "model-
|
| 1115 |
-
"transformer.h.50.mlp.dense_4h_to_h.g_idx": "model-
|
| 1116 |
-
"transformer.h.50.mlp.dense_4h_to_h.qweight": "model-
|
| 1117 |
-
"transformer.h.50.mlp.dense_4h_to_h.qzeros": "model-
|
| 1118 |
-
"transformer.h.50.mlp.dense_4h_to_h.scales": "model-
|
| 1119 |
-
"transformer.h.50.mlp.dense_h_to_4h.bias": "model-
|
| 1120 |
-
"transformer.h.50.mlp.dense_h_to_4h.g_idx": "model-
|
| 1121 |
-
"transformer.h.50.mlp.dense_h_to_4h.qweight": "model-
|
| 1122 |
-
"transformer.h.50.mlp.dense_h_to_4h.qzeros": "model-
|
| 1123 |
-
"transformer.h.50.mlp.dense_h_to_4h.scales": "model-
|
| 1124 |
-
"transformer.h.50.self_attention.dense.bias": "model-
|
| 1125 |
-
"transformer.h.50.self_attention.dense.g_idx": "model-
|
| 1126 |
-
"transformer.h.50.self_attention.dense.qweight": "model-
|
| 1127 |
-
"transformer.h.50.self_attention.dense.qzeros": "model-
|
| 1128 |
-
"transformer.h.50.self_attention.dense.scales": "model-
|
| 1129 |
-
"transformer.h.50.self_attention.query_key_value.bias": "model-
|
| 1130 |
-
"transformer.h.50.self_attention.query_key_value.g_idx": "model-
|
| 1131 |
-
"transformer.h.50.self_attention.query_key_value.qweight": "model-
|
| 1132 |
-
"transformer.h.50.self_attention.query_key_value.qzeros": "model-
|
| 1133 |
-
"transformer.h.50.self_attention.query_key_value.scales": "model-
|
| 1134 |
-
"transformer.h.51.ln_attn.bias": "model-
|
| 1135 |
-
"transformer.h.51.ln_attn.weight": "model-
|
| 1136 |
-
"transformer.h.51.ln_mlp.bias": "model-
|
| 1137 |
-
"transformer.h.51.ln_mlp.weight": "model-
|
| 1138 |
-
"transformer.h.51.mlp.dense_4h_to_h.bias": "model-
|
| 1139 |
-
"transformer.h.51.mlp.dense_4h_to_h.g_idx": "model-
|
| 1140 |
-
"transformer.h.51.mlp.dense_4h_to_h.qweight": "model-
|
| 1141 |
-
"transformer.h.51.mlp.dense_4h_to_h.qzeros": "model-
|
| 1142 |
-
"transformer.h.51.mlp.dense_4h_to_h.scales": "model-
|
| 1143 |
-
"transformer.h.51.mlp.dense_h_to_4h.bias": "model-
|
| 1144 |
-
"transformer.h.51.mlp.dense_h_to_4h.g_idx": "model-
|
| 1145 |
-
"transformer.h.51.mlp.dense_h_to_4h.qweight": "model-
|
| 1146 |
-
"transformer.h.51.mlp.dense_h_to_4h.qzeros": "model-
|
| 1147 |
-
"transformer.h.51.mlp.dense_h_to_4h.scales": "model-
|
| 1148 |
-
"transformer.h.51.self_attention.dense.bias": "model-
|
| 1149 |
-
"transformer.h.51.self_attention.dense.g_idx": "model-
|
| 1150 |
-
"transformer.h.51.self_attention.dense.qweight": "model-
|
| 1151 |
-
"transformer.h.51.self_attention.dense.qzeros": "model-
|
| 1152 |
-
"transformer.h.51.self_attention.dense.scales": "model-
|
| 1153 |
-
"transformer.h.51.self_attention.query_key_value.bias": "model-
|
| 1154 |
-
"transformer.h.51.self_attention.query_key_value.g_idx": "model-
|
| 1155 |
-
"transformer.h.51.self_attention.query_key_value.qweight": "model-
|
| 1156 |
-
"transformer.h.51.self_attention.query_key_value.qzeros": "model-
|
| 1157 |
-
"transformer.h.51.self_attention.query_key_value.scales": "model-
|
| 1158 |
"transformer.h.52.ln_attn.bias": "model-00007-of-00010.safetensors",
|
| 1159 |
"transformer.h.52.ln_attn.weight": "model-00007-of-00010.safetensors",
|
| 1160 |
"transformer.h.52.ln_mlp.bias": "model-00007-of-00010.safetensors",
|
|
@@ -1169,16 +1169,16 @@
|
|
| 1169 |
"transformer.h.52.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
|
| 1170 |
"transformer.h.52.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
|
| 1171 |
"transformer.h.52.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
|
| 1172 |
-
"transformer.h.52.self_attention.dense.bias": "model-
|
| 1173 |
-
"transformer.h.52.self_attention.dense.g_idx": "model-
|
| 1174 |
-
"transformer.h.52.self_attention.dense.qweight": "model-
|
| 1175 |
-
"transformer.h.52.self_attention.dense.qzeros": "model-
|
| 1176 |
-
"transformer.h.52.self_attention.dense.scales": "model-
|
| 1177 |
-
"transformer.h.52.self_attention.query_key_value.bias": "model-
|
| 1178 |
-
"transformer.h.52.self_attention.query_key_value.g_idx": "model-
|
| 1179 |
-
"transformer.h.52.self_attention.query_key_value.qweight": "model-
|
| 1180 |
-
"transformer.h.52.self_attention.query_key_value.qzeros": "model-
|
| 1181 |
-
"transformer.h.52.self_attention.query_key_value.scales": "model-
|
| 1182 |
"transformer.h.53.ln_attn.bias": "model-00007-of-00010.safetensors",
|
| 1183 |
"transformer.h.53.ln_attn.weight": "model-00007-of-00010.safetensors",
|
| 1184 |
"transformer.h.53.ln_mlp.bias": "model-00007-of-00010.safetensors",
|
|
@@ -1299,20 +1299,20 @@
|
|
| 1299 |
"transformer.h.57.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
|
| 1300 |
"transformer.h.57.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
|
| 1301 |
"transformer.h.57.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
|
| 1302 |
-
"transformer.h.58.ln_attn.bias": "model-
|
| 1303 |
-
"transformer.h.58.ln_attn.weight": "model-
|
| 1304 |
-
"transformer.h.58.ln_mlp.bias": "model-
|
| 1305 |
-
"transformer.h.58.ln_mlp.weight": "model-
|
| 1306 |
-
"transformer.h.58.mlp.dense_4h_to_h.bias": "model-
|
| 1307 |
-
"transformer.h.58.mlp.dense_4h_to_h.g_idx": "model-
|
| 1308 |
-
"transformer.h.58.mlp.dense_4h_to_h.qweight": "model-
|
| 1309 |
-
"transformer.h.58.mlp.dense_4h_to_h.qzeros": "model-
|
| 1310 |
-
"transformer.h.58.mlp.dense_4h_to_h.scales": "model-
|
| 1311 |
-
"transformer.h.58.mlp.dense_h_to_4h.bias": "model-
|
| 1312 |
-
"transformer.h.58.mlp.dense_h_to_4h.g_idx": "model-
|
| 1313 |
-
"transformer.h.58.mlp.dense_h_to_4h.qweight": "model-
|
| 1314 |
-
"transformer.h.58.mlp.dense_h_to_4h.qzeros": "model-
|
| 1315 |
-
"transformer.h.58.mlp.dense_h_to_4h.scales": "model-
|
| 1316 |
"transformer.h.58.self_attention.dense.bias": "model-00007-of-00010.safetensors",
|
| 1317 |
"transformer.h.58.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
|
| 1318 |
"transformer.h.58.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
|
|
@@ -1323,44 +1323,44 @@
|
|
| 1323 |
"transformer.h.58.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
|
| 1324 |
"transformer.h.58.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
|
| 1325 |
"transformer.h.58.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
|
| 1326 |
-
"transformer.h.59.ln_attn.bias": "model-
|
| 1327 |
-
"transformer.h.59.ln_attn.weight": "model-
|
| 1328 |
-
"transformer.h.59.ln_mlp.bias": "model-
|
| 1329 |
-
"transformer.h.59.ln_mlp.weight": "model-
|
| 1330 |
-
"transformer.h.59.mlp.dense_4h_to_h.bias": "model-
|
| 1331 |
-
"transformer.h.59.mlp.dense_4h_to_h.g_idx": "model-
|
| 1332 |
-
"transformer.h.59.mlp.dense_4h_to_h.qweight": "model-
|
| 1333 |
-
"transformer.h.59.mlp.dense_4h_to_h.qzeros": "model-
|
| 1334 |
-
"transformer.h.59.mlp.dense_4h_to_h.scales": "model-
|
| 1335 |
-
"transformer.h.59.mlp.dense_h_to_4h.bias": "model-
|
| 1336 |
-
"transformer.h.59.mlp.dense_h_to_4h.g_idx": "model-
|
| 1337 |
-
"transformer.h.59.mlp.dense_h_to_4h.qweight": "model-
|
| 1338 |
-
"transformer.h.59.mlp.dense_h_to_4h.qzeros": "model-
|
| 1339 |
-
"transformer.h.59.mlp.dense_h_to_4h.scales": "model-
|
| 1340 |
-
"transformer.h.59.self_attention.dense.bias": "model-
|
| 1341 |
-
"transformer.h.59.self_attention.dense.g_idx": "model-
|
| 1342 |
-
"transformer.h.59.self_attention.dense.qweight": "model-
|
| 1343 |
-
"transformer.h.59.self_attention.dense.qzeros": "model-
|
| 1344 |
-
"transformer.h.59.self_attention.dense.scales": "model-
|
| 1345 |
-
"transformer.h.59.self_attention.query_key_value.bias": "model-
|
| 1346 |
-
"transformer.h.59.self_attention.query_key_value.g_idx": "model-
|
| 1347 |
-
"transformer.h.59.self_attention.query_key_value.qweight": "model-
|
| 1348 |
-
"transformer.h.59.self_attention.query_key_value.qzeros": "model-
|
| 1349 |
-
"transformer.h.59.self_attention.query_key_value.scales": "model-
|
| 1350 |
-
"transformer.h.6.ln_attn.bias": "model-
|
| 1351 |
-
"transformer.h.6.ln_attn.weight": "model-
|
| 1352 |
-
"transformer.h.6.ln_mlp.bias": "model-
|
| 1353 |
-
"transformer.h.6.ln_mlp.weight": "model-
|
| 1354 |
"transformer.h.6.mlp.dense_4h_to_h.bias": "model-00001-of-00010.safetensors",
|
| 1355 |
"transformer.h.6.mlp.dense_4h_to_h.g_idx": "model-00001-of-00010.safetensors",
|
| 1356 |
"transformer.h.6.mlp.dense_4h_to_h.qweight": "model-00001-of-00010.safetensors",
|
| 1357 |
"transformer.h.6.mlp.dense_4h_to_h.qzeros": "model-00001-of-00010.safetensors",
|
| 1358 |
"transformer.h.6.mlp.dense_4h_to_h.scales": "model-00001-of-00010.safetensors",
|
| 1359 |
-
"transformer.h.6.mlp.dense_h_to_4h.bias": "model-
|
| 1360 |
-
"transformer.h.6.mlp.dense_h_to_4h.g_idx": "model-
|
| 1361 |
"transformer.h.6.mlp.dense_h_to_4h.qweight": "model-00001-of-00010.safetensors",
|
| 1362 |
"transformer.h.6.mlp.dense_h_to_4h.qzeros": "model-00001-of-00010.safetensors",
|
| 1363 |
-
"transformer.h.6.mlp.dense_h_to_4h.scales": "model-
|
| 1364 |
"transformer.h.6.self_attention.dense.bias": "model-00001-of-00010.safetensors",
|
| 1365 |
"transformer.h.6.self_attention.dense.g_idx": "model-00001-of-00010.safetensors",
|
| 1366 |
"transformer.h.6.self_attention.dense.qweight": "model-00001-of-00010.safetensors",
|
|
@@ -1371,30 +1371,30 @@
|
|
| 1371 |
"transformer.h.6.self_attention.query_key_value.qweight": "model-00001-of-00010.safetensors",
|
| 1372 |
"transformer.h.6.self_attention.query_key_value.qzeros": "model-00001-of-00010.safetensors",
|
| 1373 |
"transformer.h.6.self_attention.query_key_value.scales": "model-00001-of-00010.safetensors",
|
| 1374 |
-
"transformer.h.60.ln_attn.bias": "model-
|
| 1375 |
-
"transformer.h.60.ln_attn.weight": "model-
|
| 1376 |
-
"transformer.h.60.ln_mlp.bias": "model-
|
| 1377 |
-
"transformer.h.60.ln_mlp.weight": "model-
|
| 1378 |
-
"transformer.h.60.mlp.dense_4h_to_h.bias": "model-
|
| 1379 |
-
"transformer.h.60.mlp.dense_4h_to_h.g_idx": "model-
|
| 1380 |
-
"transformer.h.60.mlp.dense_4h_to_h.qweight": "model-
|
| 1381 |
-
"transformer.h.60.mlp.dense_4h_to_h.qzeros": "model-
|
| 1382 |
-
"transformer.h.60.mlp.dense_4h_to_h.scales": "model-
|
| 1383 |
-
"transformer.h.60.mlp.dense_h_to_4h.bias": "model-
|
| 1384 |
-
"transformer.h.60.mlp.dense_h_to_4h.g_idx": "model-
|
| 1385 |
-
"transformer.h.60.mlp.dense_h_to_4h.qweight": "model-
|
| 1386 |
-
"transformer.h.60.mlp.dense_h_to_4h.qzeros": "model-
|
| 1387 |
-
"transformer.h.60.mlp.dense_h_to_4h.scales": "model-
|
| 1388 |
-
"transformer.h.60.self_attention.dense.bias": "model-
|
| 1389 |
-
"transformer.h.60.self_attention.dense.g_idx": "model-
|
| 1390 |
-
"transformer.h.60.self_attention.dense.qweight": "model-
|
| 1391 |
-
"transformer.h.60.self_attention.dense.qzeros": "model-
|
| 1392 |
-
"transformer.h.60.self_attention.dense.scales": "model-
|
| 1393 |
-
"transformer.h.60.self_attention.query_key_value.bias": "model-
|
| 1394 |
-
"transformer.h.60.self_attention.query_key_value.g_idx": "model-
|
| 1395 |
-
"transformer.h.60.self_attention.query_key_value.qweight": "model-
|
| 1396 |
-
"transformer.h.60.self_attention.query_key_value.qzeros": "model-
|
| 1397 |
-
"transformer.h.60.self_attention.query_key_value.scales": "model-
|
| 1398 |
"transformer.h.61.ln_attn.bias": "model-00008-of-00010.safetensors",
|
| 1399 |
"transformer.h.61.ln_attn.weight": "model-00008-of-00010.safetensors",
|
| 1400 |
"transformer.h.61.ln_mlp.bias": "model-00008-of-00010.safetensors",
|
|
@@ -1409,16 +1409,16 @@
|
|
| 1409 |
"transformer.h.61.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
|
| 1410 |
"transformer.h.61.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
|
| 1411 |
"transformer.h.61.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
|
| 1412 |
-
"transformer.h.61.self_attention.dense.bias": "model-
|
| 1413 |
-
"transformer.h.61.self_attention.dense.g_idx": "model-
|
| 1414 |
-
"transformer.h.61.self_attention.dense.qweight": "model-
|
| 1415 |
-
"transformer.h.61.self_attention.dense.qzeros": "model-
|
| 1416 |
-
"transformer.h.61.self_attention.dense.scales": "model-
|
| 1417 |
-
"transformer.h.61.self_attention.query_key_value.bias": "model-
|
| 1418 |
-
"transformer.h.61.self_attention.query_key_value.g_idx": "model-
|
| 1419 |
-
"transformer.h.61.self_attention.query_key_value.qweight": "model-
|
| 1420 |
-
"transformer.h.61.self_attention.query_key_value.qzeros": "model-
|
| 1421 |
-
"transformer.h.61.self_attention.query_key_value.scales": "model-
|
| 1422 |
"transformer.h.62.ln_attn.bias": "model-00008-of-00010.safetensors",
|
| 1423 |
"transformer.h.62.ln_attn.weight": "model-00008-of-00010.safetensors",
|
| 1424 |
"transformer.h.62.ln_mlp.bias": "model-00008-of-00010.safetensors",
|
|
@@ -1515,20 +1515,20 @@
|
|
| 1515 |
"transformer.h.65.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
|
| 1516 |
"transformer.h.65.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
|
| 1517 |
"transformer.h.65.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
|
| 1518 |
-
"transformer.h.66.ln_attn.bias": "model-
|
| 1519 |
-
"transformer.h.66.ln_attn.weight": "model-
|
| 1520 |
-
"transformer.h.66.ln_mlp.bias": "model-
|
| 1521 |
-
"transformer.h.66.ln_mlp.weight": "model-
|
| 1522 |
"transformer.h.66.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
|
| 1523 |
"transformer.h.66.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
|
| 1524 |
"transformer.h.66.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
|
| 1525 |
"transformer.h.66.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
|
| 1526 |
"transformer.h.66.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
|
| 1527 |
-
"transformer.h.66.mlp.dense_h_to_4h.bias": "model-
|
| 1528 |
-
"transformer.h.66.mlp.dense_h_to_4h.g_idx": "model-
|
| 1529 |
-
"transformer.h.66.mlp.dense_h_to_4h.qweight": "model-
|
| 1530 |
-
"transformer.h.66.mlp.dense_h_to_4h.qzeros": "model-
|
| 1531 |
-
"transformer.h.66.mlp.dense_h_to_4h.scales": "model-
|
| 1532 |
"transformer.h.66.self_attention.dense.bias": "model-00008-of-00010.safetensors",
|
| 1533 |
"transformer.h.66.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
|
| 1534 |
"transformer.h.66.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
|
|
@@ -1539,78 +1539,78 @@
|
|
| 1539 |
"transformer.h.66.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
|
| 1540 |
"transformer.h.66.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
|
| 1541 |
"transformer.h.66.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
|
| 1542 |
-
"transformer.h.67.ln_attn.bias": "model-
|
| 1543 |
-
"transformer.h.67.ln_attn.weight": "model-
|
| 1544 |
-
"transformer.h.67.ln_mlp.bias": "model-
|
| 1545 |
-
"transformer.h.67.ln_mlp.weight": "model-
|
| 1546 |
-
"transformer.h.67.mlp.dense_4h_to_h.bias": "model-
|
| 1547 |
-
"transformer.h.67.mlp.dense_4h_to_h.g_idx": "model-
|
| 1548 |
-
"transformer.h.67.mlp.dense_4h_to_h.qweight": "model-
|
| 1549 |
-
"transformer.h.67.mlp.dense_4h_to_h.qzeros": "model-
|
| 1550 |
-
"transformer.h.67.mlp.dense_4h_to_h.scales": "model-
|
| 1551 |
-
"transformer.h.67.mlp.dense_h_to_4h.bias": "model-
|
| 1552 |
-
"transformer.h.67.mlp.dense_h_to_4h.g_idx": "model-
|
| 1553 |
-
"transformer.h.67.mlp.dense_h_to_4h.qweight": "model-
|
| 1554 |
-
"transformer.h.67.mlp.dense_h_to_4h.qzeros": "model-
|
| 1555 |
-
"transformer.h.67.mlp.dense_h_to_4h.scales": "model-
|
| 1556 |
-
"transformer.h.67.self_attention.dense.bias": "model-
|
| 1557 |
-
"transformer.h.67.self_attention.dense.g_idx": "model-
|
| 1558 |
-
"transformer.h.67.self_attention.dense.qweight": "model-
|
| 1559 |
-
"transformer.h.67.self_attention.dense.qzeros": "model-
|
| 1560 |
-
"transformer.h.67.self_attention.dense.scales": "model-
|
| 1561 |
-
"transformer.h.67.self_attention.query_key_value.bias": "model-
|
| 1562 |
-
"transformer.h.67.self_attention.query_key_value.g_idx": "model-
|
| 1563 |
-
"transformer.h.67.self_attention.query_key_value.qweight": "model-
|
| 1564 |
-
"transformer.h.67.self_attention.query_key_value.qzeros": "model-
|
| 1565 |
-
"transformer.h.67.self_attention.query_key_value.scales": "model-
|
| 1566 |
-
"transformer.h.68.ln_attn.bias": "model-
|
| 1567 |
-
"transformer.h.68.ln_attn.weight": "model-
|
| 1568 |
-
"transformer.h.68.ln_mlp.bias": "model-
|
| 1569 |
-
"transformer.h.68.ln_mlp.weight": "model-
|
| 1570 |
-
"transformer.h.68.mlp.dense_4h_to_h.bias": "model-
|
| 1571 |
-
"transformer.h.68.mlp.dense_4h_to_h.g_idx": "model-
|
| 1572 |
-
"transformer.h.68.mlp.dense_4h_to_h.qweight": "model-
|
| 1573 |
-
"transformer.h.68.mlp.dense_4h_to_h.qzeros": "model-
|
| 1574 |
-
"transformer.h.68.mlp.dense_4h_to_h.scales": "model-
|
| 1575 |
-
"transformer.h.68.mlp.dense_h_to_4h.bias": "model-
|
| 1576 |
-
"transformer.h.68.mlp.dense_h_to_4h.g_idx": "model-
|
| 1577 |
-
"transformer.h.68.mlp.dense_h_to_4h.qweight": "model-
|
| 1578 |
-
"transformer.h.68.mlp.dense_h_to_4h.qzeros": "model-
|
| 1579 |
-
"transformer.h.68.mlp.dense_h_to_4h.scales": "model-
|
| 1580 |
-
"transformer.h.68.self_attention.dense.bias": "model-
|
| 1581 |
-
"transformer.h.68.self_attention.dense.g_idx": "model-
|
| 1582 |
-
"transformer.h.68.self_attention.dense.qweight": "model-
|
| 1583 |
-
"transformer.h.68.self_attention.dense.qzeros": "model-
|
| 1584 |
-
"transformer.h.68.self_attention.dense.scales": "model-
|
| 1585 |
-
"transformer.h.68.self_attention.query_key_value.bias": "model-
|
| 1586 |
-
"transformer.h.68.self_attention.query_key_value.g_idx": "model-
|
| 1587 |
-
"transformer.h.68.self_attention.query_key_value.qweight": "model-
|
| 1588 |
-
"transformer.h.68.self_attention.query_key_value.qzeros": "model-
|
| 1589 |
-
"transformer.h.68.self_attention.query_key_value.scales": "model-
|
| 1590 |
-
"transformer.h.69.ln_attn.bias": "model-
|
| 1591 |
-
"transformer.h.69.ln_attn.weight": "model-
|
| 1592 |
-
"transformer.h.69.ln_mlp.bias": "model-
|
| 1593 |
-
"transformer.h.69.ln_mlp.weight": "model-
|
| 1594 |
-
"transformer.h.69.mlp.dense_4h_to_h.bias": "model-
|
| 1595 |
-
"transformer.h.69.mlp.dense_4h_to_h.g_idx": "model-
|
| 1596 |
-
"transformer.h.69.mlp.dense_4h_to_h.qweight": "model-
|
| 1597 |
-
"transformer.h.69.mlp.dense_4h_to_h.qzeros": "model-
|
| 1598 |
-
"transformer.h.69.mlp.dense_4h_to_h.scales": "model-
|
| 1599 |
-
"transformer.h.69.mlp.dense_h_to_4h.bias": "model-
|
| 1600 |
-
"transformer.h.69.mlp.dense_h_to_4h.g_idx": "model-
|
| 1601 |
-
"transformer.h.69.mlp.dense_h_to_4h.qweight": "model-
|
| 1602 |
-
"transformer.h.69.mlp.dense_h_to_4h.qzeros": "model-
|
| 1603 |
-
"transformer.h.69.mlp.dense_h_to_4h.scales": "model-
|
| 1604 |
-
"transformer.h.69.self_attention.dense.bias": "model-
|
| 1605 |
-
"transformer.h.69.self_attention.dense.g_idx": "model-
|
| 1606 |
-
"transformer.h.69.self_attention.dense.qweight": "model-
|
| 1607 |
-
"transformer.h.69.self_attention.dense.qzeros": "model-
|
| 1608 |
-
"transformer.h.69.self_attention.dense.scales": "model-
|
| 1609 |
-
"transformer.h.69.self_attention.query_key_value.bias": "model-
|
| 1610 |
-
"transformer.h.69.self_attention.query_key_value.g_idx": "model-
|
| 1611 |
-
"transformer.h.69.self_attention.query_key_value.qweight": "model-
|
| 1612 |
-
"transformer.h.69.self_attention.query_key_value.qzeros": "model-
|
| 1613 |
-
"transformer.h.69.self_attention.query_key_value.scales": "model-
|
| 1614 |
"transformer.h.7.ln_attn.bias": "model-00002-of-00010.safetensors",
|
| 1615 |
"transformer.h.7.ln_attn.weight": "model-00002-of-00010.safetensors",
|
| 1616 |
"transformer.h.7.ln_mlp.bias": "model-00002-of-00010.safetensors",
|
|
@@ -1625,16 +1625,16 @@
|
|
| 1625 |
"transformer.h.7.mlp.dense_h_to_4h.qweight": "model-00002-of-00010.safetensors",
|
| 1626 |
"transformer.h.7.mlp.dense_h_to_4h.qzeros": "model-00002-of-00010.safetensors",
|
| 1627 |
"transformer.h.7.mlp.dense_h_to_4h.scales": "model-00002-of-00010.safetensors",
|
| 1628 |
-
"transformer.h.7.self_attention.dense.bias": "model-
|
| 1629 |
-
"transformer.h.7.self_attention.dense.g_idx": "model-
|
| 1630 |
-
"transformer.h.7.self_attention.dense.qweight": "model-
|
| 1631 |
-
"transformer.h.7.self_attention.dense.qzeros": "model-
|
| 1632 |
-
"transformer.h.7.self_attention.dense.scales": "model-
|
| 1633 |
-
"transformer.h.7.self_attention.query_key_value.bias": "model-
|
| 1634 |
-
"transformer.h.7.self_attention.query_key_value.g_idx": "model-
|
| 1635 |
-
"transformer.h.7.self_attention.query_key_value.qweight": "model-
|
| 1636 |
-
"transformer.h.7.self_attention.query_key_value.qzeros": "model-
|
| 1637 |
-
"transformer.h.7.self_attention.query_key_value.scales": "model-
|
| 1638 |
"transformer.h.70.ln_attn.bias": "model-00009-of-00010.safetensors",
|
| 1639 |
"transformer.h.70.ln_attn.weight": "model-00009-of-00010.safetensors",
|
| 1640 |
"transformer.h.70.ln_mlp.bias": "model-00009-of-00010.safetensors",
|
|
@@ -1649,16 +1649,16 @@
|
|
| 1649 |
"transformer.h.70.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
|
| 1650 |
"transformer.h.70.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
|
| 1651 |
"transformer.h.70.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
|
| 1652 |
-
"transformer.h.70.self_attention.dense.bias": "model-
|
| 1653 |
-
"transformer.h.70.self_attention.dense.g_idx": "model-
|
| 1654 |
-
"transformer.h.70.self_attention.dense.qweight": "model-
|
| 1655 |
-
"transformer.h.70.self_attention.dense.qzeros": "model-
|
| 1656 |
-
"transformer.h.70.self_attention.dense.scales": "model-
|
| 1657 |
-
"transformer.h.70.self_attention.query_key_value.bias": "model-
|
| 1658 |
-
"transformer.h.70.self_attention.query_key_value.g_idx": "model-
|
| 1659 |
-
"transformer.h.70.self_attention.query_key_value.qweight": "model-
|
| 1660 |
-
"transformer.h.70.self_attention.query_key_value.qzeros": "model-
|
| 1661 |
-
"transformer.h.70.self_attention.query_key_value.scales": "model-
|
| 1662 |
"transformer.h.71.ln_attn.bias": "model-00009-of-00010.safetensors",
|
| 1663 |
"transformer.h.71.ln_attn.weight": "model-00009-of-00010.safetensors",
|
| 1664 |
"transformer.h.71.ln_mlp.bias": "model-00009-of-00010.safetensors",
|
|
@@ -1755,20 +1755,20 @@
|
|
| 1755 |
"transformer.h.74.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
|
| 1756 |
"transformer.h.74.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
|
| 1757 |
"transformer.h.74.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
|
| 1758 |
-
"transformer.h.75.ln_attn.bias": "model-
|
| 1759 |
-
"transformer.h.75.ln_attn.weight": "model-
|
| 1760 |
-
"transformer.h.75.ln_mlp.bias": "model-
|
| 1761 |
-
"transformer.h.75.ln_mlp.weight": "model-
|
| 1762 |
-
"transformer.h.75.mlp.dense_4h_to_h.bias": "model-
|
| 1763 |
-
"transformer.h.75.mlp.dense_4h_to_h.g_idx": "model-
|
| 1764 |
-
"transformer.h.75.mlp.dense_4h_to_h.qweight": "model-
|
| 1765 |
-
"transformer.h.75.mlp.dense_4h_to_h.qzeros": "model-
|
| 1766 |
-
"transformer.h.75.mlp.dense_4h_to_h.scales": "model-
|
| 1767 |
-
"transformer.h.75.mlp.dense_h_to_4h.bias": "model-
|
| 1768 |
-
"transformer.h.75.mlp.dense_h_to_4h.g_idx": "model-
|
| 1769 |
-
"transformer.h.75.mlp.dense_h_to_4h.qweight": "model-
|
| 1770 |
-
"transformer.h.75.mlp.dense_h_to_4h.qzeros": "model-
|
| 1771 |
-
"transformer.h.75.mlp.dense_h_to_4h.scales": "model-
|
| 1772 |
"transformer.h.75.self_attention.dense.bias": "model-00009-of-00010.safetensors",
|
| 1773 |
"transformer.h.75.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
|
| 1774 |
"transformer.h.75.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
|
|
@@ -1779,78 +1779,78 @@
|
|
| 1779 |
"transformer.h.75.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
|
| 1780 |
"transformer.h.75.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
|
| 1781 |
"transformer.h.75.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
|
| 1782 |
-
"transformer.h.76.ln_attn.bias": "model-
|
| 1783 |
-
"transformer.h.76.ln_attn.weight": "model-
|
| 1784 |
-
"transformer.h.76.ln_mlp.bias": "model-
|
| 1785 |
-
"transformer.h.76.ln_mlp.weight": "model-
|
| 1786 |
-
"transformer.h.76.mlp.dense_4h_to_h.bias": "model-
|
| 1787 |
-
"transformer.h.76.mlp.dense_4h_to_h.g_idx": "model-
|
| 1788 |
-
"transformer.h.76.mlp.dense_4h_to_h.qweight": "model-
|
| 1789 |
-
"transformer.h.76.mlp.dense_4h_to_h.qzeros": "model-
|
| 1790 |
-
"transformer.h.76.mlp.dense_4h_to_h.scales": "model-
|
| 1791 |
-
"transformer.h.76.mlp.dense_h_to_4h.bias": "model-
|
| 1792 |
-
"transformer.h.76.mlp.dense_h_to_4h.g_idx": "model-
|
| 1793 |
-
"transformer.h.76.mlp.dense_h_to_4h.qweight": "model-
|
| 1794 |
-
"transformer.h.76.mlp.dense_h_to_4h.qzeros": "model-
|
| 1795 |
-
"transformer.h.76.mlp.dense_h_to_4h.scales": "model-
|
| 1796 |
-
"transformer.h.76.self_attention.dense.bias": "model-
|
| 1797 |
-
"transformer.h.76.self_attention.dense.g_idx": "model-
|
| 1798 |
-
"transformer.h.76.self_attention.dense.qweight": "model-
|
| 1799 |
-
"transformer.h.76.self_attention.dense.qzeros": "model-
|
| 1800 |
-
"transformer.h.76.self_attention.dense.scales": "model-
|
| 1801 |
-
"transformer.h.76.self_attention.query_key_value.bias": "model-
|
| 1802 |
-
"transformer.h.76.self_attention.query_key_value.g_idx": "model-
|
| 1803 |
-
"transformer.h.76.self_attention.query_key_value.qweight": "model-
|
| 1804 |
-
"transformer.h.76.self_attention.query_key_value.qzeros": "model-
|
| 1805 |
-
"transformer.h.76.self_attention.query_key_value.scales": "model-
|
| 1806 |
-
"transformer.h.77.ln_attn.bias": "model-
|
| 1807 |
-
"transformer.h.77.ln_attn.weight": "model-
|
| 1808 |
-
"transformer.h.77.ln_mlp.bias": "model-
|
| 1809 |
-
"transformer.h.77.ln_mlp.weight": "model-
|
| 1810 |
-
"transformer.h.77.mlp.dense_4h_to_h.bias": "model-
|
| 1811 |
-
"transformer.h.77.mlp.dense_4h_to_h.g_idx": "model-
|
| 1812 |
-
"transformer.h.77.mlp.dense_4h_to_h.qweight": "model-
|
| 1813 |
-
"transformer.h.77.mlp.dense_4h_to_h.qzeros": "model-
|
| 1814 |
-
"transformer.h.77.mlp.dense_4h_to_h.scales": "model-
|
| 1815 |
-
"transformer.h.77.mlp.dense_h_to_4h.bias": "model-
|
| 1816 |
-
"transformer.h.77.mlp.dense_h_to_4h.g_idx": "model-
|
| 1817 |
-
"transformer.h.77.mlp.dense_h_to_4h.qweight": "model-
|
| 1818 |
-
"transformer.h.77.mlp.dense_h_to_4h.qzeros": "model-
|
| 1819 |
-
"transformer.h.77.mlp.dense_h_to_4h.scales": "model-
|
| 1820 |
-
"transformer.h.77.self_attention.dense.bias": "model-
|
| 1821 |
-
"transformer.h.77.self_attention.dense.g_idx": "model-
|
| 1822 |
-
"transformer.h.77.self_attention.dense.qweight": "model-
|
| 1823 |
-
"transformer.h.77.self_attention.dense.qzeros": "model-
|
| 1824 |
-
"transformer.h.77.self_attention.dense.scales": "model-
|
| 1825 |
-
"transformer.h.77.self_attention.query_key_value.bias": "model-
|
| 1826 |
-
"transformer.h.77.self_attention.query_key_value.g_idx": "model-
|
| 1827 |
-
"transformer.h.77.self_attention.query_key_value.qweight": "model-
|
| 1828 |
-
"transformer.h.77.self_attention.query_key_value.qzeros": "model-
|
| 1829 |
-
"transformer.h.77.self_attention.query_key_value.scales": "model-
|
| 1830 |
-
"transformer.h.78.ln_attn.bias": "model-
|
| 1831 |
-
"transformer.h.78.ln_attn.weight": "model-
|
| 1832 |
-
"transformer.h.78.ln_mlp.bias": "model-
|
| 1833 |
-
"transformer.h.78.ln_mlp.weight": "model-
|
| 1834 |
-
"transformer.h.78.mlp.dense_4h_to_h.bias": "model-
|
| 1835 |
-
"transformer.h.78.mlp.dense_4h_to_h.g_idx": "model-
|
| 1836 |
-
"transformer.h.78.mlp.dense_4h_to_h.qweight": "model-
|
| 1837 |
-
"transformer.h.78.mlp.dense_4h_to_h.qzeros": "model-
|
| 1838 |
-
"transformer.h.78.mlp.dense_4h_to_h.scales": "model-
|
| 1839 |
-
"transformer.h.78.mlp.dense_h_to_4h.bias": "model-
|
| 1840 |
-
"transformer.h.78.mlp.dense_h_to_4h.g_idx": "model-
|
| 1841 |
-
"transformer.h.78.mlp.dense_h_to_4h.qweight": "model-
|
| 1842 |
-
"transformer.h.78.mlp.dense_h_to_4h.qzeros": "model-
|
| 1843 |
-
"transformer.h.78.mlp.dense_h_to_4h.scales": "model-
|
| 1844 |
-
"transformer.h.78.self_attention.dense.bias": "model-
|
| 1845 |
-
"transformer.h.78.self_attention.dense.g_idx": "model-
|
| 1846 |
-
"transformer.h.78.self_attention.dense.qweight": "model-
|
| 1847 |
-
"transformer.h.78.self_attention.dense.qzeros": "model-
|
| 1848 |
-
"transformer.h.78.self_attention.dense.scales": "model-
|
| 1849 |
-
"transformer.h.78.self_attention.query_key_value.bias": "model-
|
| 1850 |
-
"transformer.h.78.self_attention.query_key_value.g_idx": "model-
|
| 1851 |
-
"transformer.h.78.self_attention.query_key_value.qweight": "model-
|
| 1852 |
-
"transformer.h.78.self_attention.query_key_value.qzeros": "model-
|
| 1853 |
-
"transformer.h.78.self_attention.query_key_value.scales": "model-
|
| 1854 |
"transformer.h.79.ln_attn.bias": "model-00010-of-00010.safetensors",
|
| 1855 |
"transformer.h.79.ln_attn.weight": "model-00010-of-00010.safetensors",
|
| 1856 |
"transformer.h.79.ln_mlp.bias": "model-00010-of-00010.safetensors",
|
|
@@ -1865,16 +1865,16 @@
|
|
| 1865 |
"transformer.h.79.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
|
| 1866 |
"transformer.h.79.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
|
| 1867 |
"transformer.h.79.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
|
| 1868 |
-
"transformer.h.79.self_attention.dense.bias": "model-
|
| 1869 |
-
"transformer.h.79.self_attention.dense.g_idx": "model-
|
| 1870 |
-
"transformer.h.79.self_attention.dense.qweight": "model-
|
| 1871 |
-
"transformer.h.79.self_attention.dense.qzeros": "model-
|
| 1872 |
-
"transformer.h.79.self_attention.dense.scales": "model-
|
| 1873 |
-
"transformer.h.79.self_attention.query_key_value.bias": "model-
|
| 1874 |
-
"transformer.h.79.self_attention.query_key_value.g_idx": "model-
|
| 1875 |
-
"transformer.h.79.self_attention.query_key_value.qweight": "model-
|
| 1876 |
-
"transformer.h.79.self_attention.query_key_value.qzeros": "model-
|
| 1877 |
-
"transformer.h.79.self_attention.query_key_value.scales": "model-
|
| 1878 |
"transformer.h.8.ln_attn.bias": "model-00002-of-00010.safetensors",
|
| 1879 |
"transformer.h.8.ln_attn.weight": "model-00002-of-00010.safetensors",
|
| 1880 |
"transformer.h.8.ln_mlp.bias": "model-00002-of-00010.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_size": 94252476416
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"transformer.h.0.ln_attn.bias": "model-00001-of-00010.safetensors",
|
|
|
|
| 171 |
"transformer.h.14.self_attention.query_key_value.qweight": "model-00002-of-00010.safetensors",
|
| 172 |
"transformer.h.14.self_attention.query_key_value.qzeros": "model-00002-of-00010.safetensors",
|
| 173 |
"transformer.h.14.self_attention.query_key_value.scales": "model-00002-of-00010.safetensors",
|
| 174 |
+
"transformer.h.15.ln_attn.bias": "model-00003-of-00010.safetensors",
|
| 175 |
+
"transformer.h.15.ln_attn.weight": "model-00003-of-00010.safetensors",
|
| 176 |
+
"transformer.h.15.ln_mlp.bias": "model-00003-of-00010.safetensors",
|
| 177 |
+
"transformer.h.15.ln_mlp.weight": "model-00003-of-00010.safetensors",
|
| 178 |
"transformer.h.15.mlp.dense_4h_to_h.bias": "model-00002-of-00010.safetensors",
|
| 179 |
"transformer.h.15.mlp.dense_4h_to_h.g_idx": "model-00002-of-00010.safetensors",
|
| 180 |
"transformer.h.15.mlp.dense_4h_to_h.qweight": "model-00002-of-00010.safetensors",
|
| 181 |
"transformer.h.15.mlp.dense_4h_to_h.qzeros": "model-00002-of-00010.safetensors",
|
| 182 |
"transformer.h.15.mlp.dense_4h_to_h.scales": "model-00002-of-00010.safetensors",
|
| 183 |
+
"transformer.h.15.mlp.dense_h_to_4h.bias": "model-00003-of-00010.safetensors",
|
| 184 |
+
"transformer.h.15.mlp.dense_h_to_4h.g_idx": "model-00003-of-00010.safetensors",
|
| 185 |
+
"transformer.h.15.mlp.dense_h_to_4h.qweight": "model-00003-of-00010.safetensors",
|
| 186 |
+
"transformer.h.15.mlp.dense_h_to_4h.qzeros": "model-00003-of-00010.safetensors",
|
| 187 |
+
"transformer.h.15.mlp.dense_h_to_4h.scales": "model-00003-of-00010.safetensors",
|
| 188 |
"transformer.h.15.self_attention.dense.bias": "model-00002-of-00010.safetensors",
|
| 189 |
"transformer.h.15.self_attention.dense.g_idx": "model-00002-of-00010.safetensors",
|
| 190 |
"transformer.h.15.self_attention.dense.qweight": "model-00002-of-00010.safetensors",
|
|
|
|
| 209 |
"transformer.h.16.mlp.dense_h_to_4h.qweight": "model-00003-of-00010.safetensors",
|
| 210 |
"transformer.h.16.mlp.dense_h_to_4h.qzeros": "model-00003-of-00010.safetensors",
|
| 211 |
"transformer.h.16.mlp.dense_h_to_4h.scales": "model-00003-of-00010.safetensors",
|
| 212 |
+
"transformer.h.16.self_attention.dense.bias": "model-00003-of-00010.safetensors",
|
| 213 |
+
"transformer.h.16.self_attention.dense.g_idx": "model-00003-of-00010.safetensors",
|
| 214 |
+
"transformer.h.16.self_attention.dense.qweight": "model-00003-of-00010.safetensors",
|
| 215 |
+
"transformer.h.16.self_attention.dense.qzeros": "model-00003-of-00010.safetensors",
|
| 216 |
+
"transformer.h.16.self_attention.dense.scales": "model-00003-of-00010.safetensors",
|
| 217 |
+
"transformer.h.16.self_attention.query_key_value.bias": "model-00003-of-00010.safetensors",
|
| 218 |
+
"transformer.h.16.self_attention.query_key_value.g_idx": "model-00003-of-00010.safetensors",
|
| 219 |
+
"transformer.h.16.self_attention.query_key_value.qweight": "model-00003-of-00010.safetensors",
|
| 220 |
+
"transformer.h.16.self_attention.query_key_value.qzeros": "model-00003-of-00010.safetensors",
|
| 221 |
+
"transformer.h.16.self_attention.query_key_value.scales": "model-00003-of-00010.safetensors",
|
| 222 |
"transformer.h.17.ln_attn.bias": "model-00003-of-00010.safetensors",
|
| 223 |
"transformer.h.17.ln_attn.weight": "model-00003-of-00010.safetensors",
|
| 224 |
"transformer.h.17.ln_mlp.bias": "model-00003-of-00010.safetensors",
|
|
|
|
| 411 |
"transformer.h.23.self_attention.query_key_value.qweight": "model-00003-of-00010.safetensors",
|
| 412 |
"transformer.h.23.self_attention.query_key_value.qzeros": "model-00003-of-00010.safetensors",
|
| 413 |
"transformer.h.23.self_attention.query_key_value.scales": "model-00003-of-00010.safetensors",
|
| 414 |
+
"transformer.h.24.ln_attn.bias": "model-00004-of-00010.safetensors",
|
| 415 |
+
"transformer.h.24.ln_attn.weight": "model-00004-of-00010.safetensors",
|
| 416 |
+
"transformer.h.24.ln_mlp.bias": "model-00004-of-00010.safetensors",
|
| 417 |
+
"transformer.h.24.ln_mlp.weight": "model-00004-of-00010.safetensors",
|
| 418 |
+
"transformer.h.24.mlp.dense_4h_to_h.bias": "model-00004-of-00010.safetensors",
|
| 419 |
+
"transformer.h.24.mlp.dense_4h_to_h.g_idx": "model-00004-of-00010.safetensors",
|
| 420 |
+
"transformer.h.24.mlp.dense_4h_to_h.qweight": "model-00004-of-00010.safetensors",
|
| 421 |
+
"transformer.h.24.mlp.dense_4h_to_h.qzeros": "model-00004-of-00010.safetensors",
|
| 422 |
+
"transformer.h.24.mlp.dense_4h_to_h.scales": "model-00004-of-00010.safetensors",
|
| 423 |
+
"transformer.h.24.mlp.dense_h_to_4h.bias": "model-00004-of-00010.safetensors",
|
| 424 |
+
"transformer.h.24.mlp.dense_h_to_4h.g_idx": "model-00004-of-00010.safetensors",
|
| 425 |
+
"transformer.h.24.mlp.dense_h_to_4h.qweight": "model-00004-of-00010.safetensors",
|
| 426 |
+
"transformer.h.24.mlp.dense_h_to_4h.qzeros": "model-00004-of-00010.safetensors",
|
| 427 |
+
"transformer.h.24.mlp.dense_h_to_4h.scales": "model-00004-of-00010.safetensors",
|
| 428 |
"transformer.h.24.self_attention.dense.bias": "model-00003-of-00010.safetensors",
|
| 429 |
"transformer.h.24.self_attention.dense.g_idx": "model-00003-of-00010.safetensors",
|
| 430 |
"transformer.h.24.self_attention.dense.qweight": "model-00003-of-00010.safetensors",
|
|
|
|
| 449 |
"transformer.h.25.mlp.dense_h_to_4h.qweight": "model-00004-of-00010.safetensors",
|
| 450 |
"transformer.h.25.mlp.dense_h_to_4h.qzeros": "model-00004-of-00010.safetensors",
|
| 451 |
"transformer.h.25.mlp.dense_h_to_4h.scales": "model-00004-of-00010.safetensors",
|
| 452 |
+
"transformer.h.25.self_attention.dense.bias": "model-00004-of-00010.safetensors",
|
| 453 |
+
"transformer.h.25.self_attention.dense.g_idx": "model-00004-of-00010.safetensors",
|
| 454 |
+
"transformer.h.25.self_attention.dense.qweight": "model-00004-of-00010.safetensors",
|
| 455 |
+
"transformer.h.25.self_attention.dense.qzeros": "model-00004-of-00010.safetensors",
|
| 456 |
+
"transformer.h.25.self_attention.dense.scales": "model-00004-of-00010.safetensors",
|
| 457 |
+
"transformer.h.25.self_attention.query_key_value.bias": "model-00004-of-00010.safetensors",
|
| 458 |
+
"transformer.h.25.self_attention.query_key_value.g_idx": "model-00004-of-00010.safetensors",
|
| 459 |
+
"transformer.h.25.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
|
| 460 |
+
"transformer.h.25.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
|
| 461 |
+
"transformer.h.25.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
|
| 462 |
"transformer.h.26.ln_attn.bias": "model-00004-of-00010.safetensors",
|
| 463 |
"transformer.h.26.ln_attn.weight": "model-00004-of-00010.safetensors",
|
| 464 |
"transformer.h.26.ln_mlp.bias": "model-00004-of-00010.safetensors",
|
|
|
|
| 627 |
"transformer.h.31.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
|
| 628 |
"transformer.h.31.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
|
| 629 |
"transformer.h.31.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
|
| 630 |
+
"transformer.h.32.ln_attn.bias": "model-00005-of-00010.safetensors",
|
| 631 |
+
"transformer.h.32.ln_attn.weight": "model-00005-of-00010.safetensors",
|
| 632 |
+
"transformer.h.32.ln_mlp.bias": "model-00005-of-00010.safetensors",
|
| 633 |
+
"transformer.h.32.ln_mlp.weight": "model-00005-of-00010.safetensors",
|
| 634 |
"transformer.h.32.mlp.dense_4h_to_h.bias": "model-00004-of-00010.safetensors",
|
| 635 |
"transformer.h.32.mlp.dense_4h_to_h.g_idx": "model-00004-of-00010.safetensors",
|
| 636 |
"transformer.h.32.mlp.dense_4h_to_h.qweight": "model-00004-of-00010.safetensors",
|
| 637 |
"transformer.h.32.mlp.dense_4h_to_h.qzeros": "model-00004-of-00010.safetensors",
|
| 638 |
"transformer.h.32.mlp.dense_4h_to_h.scales": "model-00004-of-00010.safetensors",
|
| 639 |
+
"transformer.h.32.mlp.dense_h_to_4h.bias": "model-00005-of-00010.safetensors",
|
| 640 |
+
"transformer.h.32.mlp.dense_h_to_4h.g_idx": "model-00005-of-00010.safetensors",
|
| 641 |
+
"transformer.h.32.mlp.dense_h_to_4h.qweight": "model-00005-of-00010.safetensors",
|
| 642 |
+
"transformer.h.32.mlp.dense_h_to_4h.qzeros": "model-00005-of-00010.safetensors",
|
| 643 |
+
"transformer.h.32.mlp.dense_h_to_4h.scales": "model-00005-of-00010.safetensors",
|
| 644 |
"transformer.h.32.self_attention.dense.bias": "model-00004-of-00010.safetensors",
|
| 645 |
"transformer.h.32.self_attention.dense.g_idx": "model-00004-of-00010.safetensors",
|
| 646 |
"transformer.h.32.self_attention.dense.qweight": "model-00004-of-00010.safetensors",
|
|
|
|
| 651 |
"transformer.h.32.self_attention.query_key_value.qweight": "model-00004-of-00010.safetensors",
|
| 652 |
"transformer.h.32.self_attention.query_key_value.qzeros": "model-00004-of-00010.safetensors",
|
| 653 |
"transformer.h.32.self_attention.query_key_value.scales": "model-00004-of-00010.safetensors",
|
| 654 |
+
"transformer.h.33.ln_attn.bias": "model-00005-of-00010.safetensors",
|
| 655 |
+
"transformer.h.33.ln_attn.weight": "model-00005-of-00010.safetensors",
|
| 656 |
+
"transformer.h.33.ln_mlp.bias": "model-00005-of-00010.safetensors",
|
| 657 |
+
"transformer.h.33.ln_mlp.weight": "model-00005-of-00010.safetensors",
|
| 658 |
+
"transformer.h.33.mlp.dense_4h_to_h.bias": "model-00005-of-00010.safetensors",
|
| 659 |
+
"transformer.h.33.mlp.dense_4h_to_h.g_idx": "model-00005-of-00010.safetensors",
|
| 660 |
+
"transformer.h.33.mlp.dense_4h_to_h.qweight": "model-00005-of-00010.safetensors",
|
| 661 |
+
"transformer.h.33.mlp.dense_4h_to_h.qzeros": "model-00005-of-00010.safetensors",
|
| 662 |
+
"transformer.h.33.mlp.dense_4h_to_h.scales": "model-00005-of-00010.safetensors",
|
| 663 |
+
"transformer.h.33.mlp.dense_h_to_4h.bias": "model-00005-of-00010.safetensors",
|
| 664 |
+
"transformer.h.33.mlp.dense_h_to_4h.g_idx": "model-00005-of-00010.safetensors",
|
| 665 |
+
"transformer.h.33.mlp.dense_h_to_4h.qweight": "model-00005-of-00010.safetensors",
|
| 666 |
+
"transformer.h.33.mlp.dense_h_to_4h.qzeros": "model-00005-of-00010.safetensors",
|
| 667 |
+
"transformer.h.33.mlp.dense_h_to_4h.scales": "model-00005-of-00010.safetensors",
|
| 668 |
+
"transformer.h.33.self_attention.dense.bias": "model-00005-of-00010.safetensors",
|
| 669 |
+
"transformer.h.33.self_attention.dense.g_idx": "model-00005-of-00010.safetensors",
|
| 670 |
+
"transformer.h.33.self_attention.dense.qweight": "model-00005-of-00010.safetensors",
|
| 671 |
+
"transformer.h.33.self_attention.dense.qzeros": "model-00005-of-00010.safetensors",
|
| 672 |
+
"transformer.h.33.self_attention.dense.scales": "model-00005-of-00010.safetensors",
|
| 673 |
+
"transformer.h.33.self_attention.query_key_value.bias": "model-00005-of-00010.safetensors",
|
| 674 |
+
"transformer.h.33.self_attention.query_key_value.g_idx": "model-00005-of-00010.safetensors",
|
| 675 |
+
"transformer.h.33.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
|
| 676 |
+
"transformer.h.33.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
|
| 677 |
+
"transformer.h.33.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
|
| 678 |
"transformer.h.34.ln_attn.bias": "model-00005-of-00010.safetensors",
|
| 679 |
"transformer.h.34.ln_attn.weight": "model-00005-of-00010.safetensors",
|
| 680 |
"transformer.h.34.ln_mlp.bias": "model-00005-of-00010.safetensors",
|
|
|
|
| 689 |
"transformer.h.34.mlp.dense_h_to_4h.qweight": "model-00005-of-00010.safetensors",
|
| 690 |
"transformer.h.34.mlp.dense_h_to_4h.qzeros": "model-00005-of-00010.safetensors",
|
| 691 |
"transformer.h.34.mlp.dense_h_to_4h.scales": "model-00005-of-00010.safetensors",
|
| 692 |
+
"transformer.h.34.self_attention.dense.bias": "model-00005-of-00010.safetensors",
|
| 693 |
+
"transformer.h.34.self_attention.dense.g_idx": "model-00005-of-00010.safetensors",
|
| 694 |
+
"transformer.h.34.self_attention.dense.qweight": "model-00005-of-00010.safetensors",
|
| 695 |
+
"transformer.h.34.self_attention.dense.qzeros": "model-00005-of-00010.safetensors",
|
| 696 |
+
"transformer.h.34.self_attention.dense.scales": "model-00005-of-00010.safetensors",
|
| 697 |
+
"transformer.h.34.self_attention.query_key_value.bias": "model-00005-of-00010.safetensors",
|
| 698 |
+
"transformer.h.34.self_attention.query_key_value.g_idx": "model-00005-of-00010.safetensors",
|
| 699 |
+
"transformer.h.34.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
|
| 700 |
+
"transformer.h.34.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
|
| 701 |
+
"transformer.h.34.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
|
| 702 |
"transformer.h.35.ln_attn.bias": "model-00005-of-00010.safetensors",
|
| 703 |
"transformer.h.35.ln_attn.weight": "model-00005-of-00010.safetensors",
|
| 704 |
"transformer.h.35.ln_mlp.bias": "model-00005-of-00010.safetensors",
|
|
|
|
| 867 |
"transformer.h.40.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
|
| 868 |
"transformer.h.40.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
|
| 869 |
"transformer.h.40.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
|
| 870 |
+
"transformer.h.41.ln_attn.bias": "model-00006-of-00010.safetensors",
|
| 871 |
+
"transformer.h.41.ln_attn.weight": "model-00006-of-00010.safetensors",
|
| 872 |
+
"transformer.h.41.ln_mlp.bias": "model-00006-of-00010.safetensors",
|
| 873 |
+
"transformer.h.41.ln_mlp.weight": "model-00006-of-00010.safetensors",
|
| 874 |
+
"transformer.h.41.mlp.dense_4h_to_h.bias": "model-00006-of-00010.safetensors",
|
| 875 |
+
"transformer.h.41.mlp.dense_4h_to_h.g_idx": "model-00006-of-00010.safetensors",
|
| 876 |
+
"transformer.h.41.mlp.dense_4h_to_h.qweight": "model-00006-of-00010.safetensors",
|
| 877 |
+
"transformer.h.41.mlp.dense_4h_to_h.qzeros": "model-00006-of-00010.safetensors",
|
| 878 |
+
"transformer.h.41.mlp.dense_4h_to_h.scales": "model-00006-of-00010.safetensors",
|
| 879 |
+
"transformer.h.41.mlp.dense_h_to_4h.bias": "model-00006-of-00010.safetensors",
|
| 880 |
+
"transformer.h.41.mlp.dense_h_to_4h.g_idx": "model-00006-of-00010.safetensors",
|
| 881 |
+
"transformer.h.41.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
|
| 882 |
+
"transformer.h.41.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
|
| 883 |
+
"transformer.h.41.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
|
| 884 |
"transformer.h.41.self_attention.dense.bias": "model-00005-of-00010.safetensors",
|
| 885 |
"transformer.h.41.self_attention.dense.g_idx": "model-00005-of-00010.safetensors",
|
| 886 |
"transformer.h.41.self_attention.dense.qweight": "model-00005-of-00010.safetensors",
|
|
|
|
| 891 |
"transformer.h.41.self_attention.query_key_value.qweight": "model-00005-of-00010.safetensors",
|
| 892 |
"transformer.h.41.self_attention.query_key_value.qzeros": "model-00005-of-00010.safetensors",
|
| 893 |
"transformer.h.41.self_attention.query_key_value.scales": "model-00005-of-00010.safetensors",
|
| 894 |
+
"transformer.h.42.ln_attn.bias": "model-00006-of-00010.safetensors",
|
| 895 |
+
"transformer.h.42.ln_attn.weight": "model-00006-of-00010.safetensors",
|
| 896 |
+
"transformer.h.42.ln_mlp.bias": "model-00006-of-00010.safetensors",
|
| 897 |
+
"transformer.h.42.ln_mlp.weight": "model-00006-of-00010.safetensors",
|
| 898 |
+
"transformer.h.42.mlp.dense_4h_to_h.bias": "model-00006-of-00010.safetensors",
|
| 899 |
+
"transformer.h.42.mlp.dense_4h_to_h.g_idx": "model-00006-of-00010.safetensors",
|
| 900 |
+
"transformer.h.42.mlp.dense_4h_to_h.qweight": "model-00006-of-00010.safetensors",
|
| 901 |
+
"transformer.h.42.mlp.dense_4h_to_h.qzeros": "model-00006-of-00010.safetensors",
|
| 902 |
+
"transformer.h.42.mlp.dense_4h_to_h.scales": "model-00006-of-00010.safetensors",
|
| 903 |
+
"transformer.h.42.mlp.dense_h_to_4h.bias": "model-00006-of-00010.safetensors",
|
| 904 |
+
"transformer.h.42.mlp.dense_h_to_4h.g_idx": "model-00006-of-00010.safetensors",
|
| 905 |
+
"transformer.h.42.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
|
| 906 |
+
"transformer.h.42.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
|
| 907 |
+
"transformer.h.42.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
|
| 908 |
+
"transformer.h.42.self_attention.dense.bias": "model-00006-of-00010.safetensors",
|
| 909 |
+
"transformer.h.42.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
|
| 910 |
+
"transformer.h.42.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
|
| 911 |
+
"transformer.h.42.self_attention.dense.qzeros": "model-00006-of-00010.safetensors",
|
| 912 |
+
"transformer.h.42.self_attention.dense.scales": "model-00006-of-00010.safetensors",
|
| 913 |
+
"transformer.h.42.self_attention.query_key_value.bias": "model-00006-of-00010.safetensors",
|
| 914 |
+
"transformer.h.42.self_attention.query_key_value.g_idx": "model-00006-of-00010.safetensors",
|
| 915 |
+
"transformer.h.42.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
|
| 916 |
+
"transformer.h.42.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
|
| 917 |
+
"transformer.h.42.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
|
| 918 |
"transformer.h.43.ln_attn.bias": "model-00006-of-00010.safetensors",
|
| 919 |
"transformer.h.43.ln_attn.weight": "model-00006-of-00010.safetensors",
|
| 920 |
"transformer.h.43.ln_mlp.bias": "model-00006-of-00010.safetensors",
|
|
|
|
| 929 |
"transformer.h.43.mlp.dense_h_to_4h.qweight": "model-00006-of-00010.safetensors",
|
| 930 |
"transformer.h.43.mlp.dense_h_to_4h.qzeros": "model-00006-of-00010.safetensors",
|
| 931 |
"transformer.h.43.mlp.dense_h_to_4h.scales": "model-00006-of-00010.safetensors",
|
| 932 |
+
"transformer.h.43.self_attention.dense.bias": "model-00006-of-00010.safetensors",
|
| 933 |
+
"transformer.h.43.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
|
| 934 |
+
"transformer.h.43.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
|
| 935 |
+
"transformer.h.43.self_attention.dense.qzeros": "model-00006-of-00010.safetensors",
|
| 936 |
+
"transformer.h.43.self_attention.dense.scales": "model-00006-of-00010.safetensors",
|
| 937 |
+
"transformer.h.43.self_attention.query_key_value.bias": "model-00006-of-00010.safetensors",
|
| 938 |
+
"transformer.h.43.self_attention.query_key_value.g_idx": "model-00006-of-00010.safetensors",
|
| 939 |
+
"transformer.h.43.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
|
| 940 |
+
"transformer.h.43.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
|
| 941 |
+
"transformer.h.43.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
|
| 942 |
"transformer.h.44.ln_attn.bias": "model-00006-of-00010.safetensors",
|
| 943 |
"transformer.h.44.ln_attn.weight": "model-00006-of-00010.safetensors",
|
| 944 |
"transformer.h.44.ln_mlp.bias": "model-00006-of-00010.safetensors",
|
|
|
|
| 1059 |
"transformer.h.48.self_attention.query_key_value.qweight": "model-00006-of-00010.safetensors",
|
| 1060 |
"transformer.h.48.self_attention.query_key_value.qzeros": "model-00006-of-00010.safetensors",
|
| 1061 |
"transformer.h.48.self_attention.query_key_value.scales": "model-00006-of-00010.safetensors",
|
| 1062 |
+
"transformer.h.49.ln_attn.bias": "model-00007-of-00010.safetensors",
|
| 1063 |
+
"transformer.h.49.ln_attn.weight": "model-00007-of-00010.safetensors",
|
| 1064 |
+
"transformer.h.49.ln_mlp.bias": "model-00007-of-00010.safetensors",
|
| 1065 |
+
"transformer.h.49.ln_mlp.weight": "model-00007-of-00010.safetensors",
|
| 1066 |
"transformer.h.49.mlp.dense_4h_to_h.bias": "model-00006-of-00010.safetensors",
|
| 1067 |
"transformer.h.49.mlp.dense_4h_to_h.g_idx": "model-00006-of-00010.safetensors",
|
| 1068 |
"transformer.h.49.mlp.dense_4h_to_h.qweight": "model-00006-of-00010.safetensors",
|
| 1069 |
"transformer.h.49.mlp.dense_4h_to_h.qzeros": "model-00006-of-00010.safetensors",
|
| 1070 |
"transformer.h.49.mlp.dense_4h_to_h.scales": "model-00006-of-00010.safetensors",
|
| 1071 |
+
"transformer.h.49.mlp.dense_h_to_4h.bias": "model-00007-of-00010.safetensors",
|
| 1072 |
+
"transformer.h.49.mlp.dense_h_to_4h.g_idx": "model-00007-of-00010.safetensors",
|
| 1073 |
+
"transformer.h.49.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
|
| 1074 |
+
"transformer.h.49.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
|
| 1075 |
+
"transformer.h.49.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
|
| 1076 |
"transformer.h.49.self_attention.dense.bias": "model-00006-of-00010.safetensors",
|
| 1077 |
"transformer.h.49.self_attention.dense.g_idx": "model-00006-of-00010.safetensors",
|
| 1078 |
"transformer.h.49.self_attention.dense.qweight": "model-00006-of-00010.safetensors",
|
|
|
|
| 1107 |
"transformer.h.5.self_attention.query_key_value.qweight": "model-00001-of-00010.safetensors",
|
| 1108 |
"transformer.h.5.self_attention.query_key_value.qzeros": "model-00001-of-00010.safetensors",
|
| 1109 |
"transformer.h.5.self_attention.query_key_value.scales": "model-00001-of-00010.safetensors",
|
| 1110 |
+
"transformer.h.50.ln_attn.bias": "model-00007-of-00010.safetensors",
|
| 1111 |
+
"transformer.h.50.ln_attn.weight": "model-00007-of-00010.safetensors",
|
| 1112 |
+
"transformer.h.50.ln_mlp.bias": "model-00007-of-00010.safetensors",
|
| 1113 |
+
"transformer.h.50.ln_mlp.weight": "model-00007-of-00010.safetensors",
|
| 1114 |
+
"transformer.h.50.mlp.dense_4h_to_h.bias": "model-00007-of-00010.safetensors",
|
| 1115 |
+
"transformer.h.50.mlp.dense_4h_to_h.g_idx": "model-00007-of-00010.safetensors",
|
| 1116 |
+
"transformer.h.50.mlp.dense_4h_to_h.qweight": "model-00007-of-00010.safetensors",
|
| 1117 |
+
"transformer.h.50.mlp.dense_4h_to_h.qzeros": "model-00007-of-00010.safetensors",
|
| 1118 |
+
"transformer.h.50.mlp.dense_4h_to_h.scales": "model-00007-of-00010.safetensors",
|
| 1119 |
+
"transformer.h.50.mlp.dense_h_to_4h.bias": "model-00007-of-00010.safetensors",
|
| 1120 |
+
"transformer.h.50.mlp.dense_h_to_4h.g_idx": "model-00007-of-00010.safetensors",
|
| 1121 |
+
"transformer.h.50.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
|
| 1122 |
+
"transformer.h.50.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
|
| 1123 |
+
"transformer.h.50.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
|
| 1124 |
+
"transformer.h.50.self_attention.dense.bias": "model-00007-of-00010.safetensors",
|
| 1125 |
+
"transformer.h.50.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
|
| 1126 |
+
"transformer.h.50.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
|
| 1127 |
+
"transformer.h.50.self_attention.dense.qzeros": "model-00007-of-00010.safetensors",
|
| 1128 |
+
"transformer.h.50.self_attention.dense.scales": "model-00007-of-00010.safetensors",
|
| 1129 |
+
"transformer.h.50.self_attention.query_key_value.bias": "model-00007-of-00010.safetensors",
|
| 1130 |
+
"transformer.h.50.self_attention.query_key_value.g_idx": "model-00007-of-00010.safetensors",
|
| 1131 |
+
"transformer.h.50.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
|
| 1132 |
+
"transformer.h.50.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
|
| 1133 |
+
"transformer.h.50.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
|
| 1134 |
+
"transformer.h.51.ln_attn.bias": "model-00007-of-00010.safetensors",
|
| 1135 |
+
"transformer.h.51.ln_attn.weight": "model-00007-of-00010.safetensors",
|
| 1136 |
+
"transformer.h.51.ln_mlp.bias": "model-00007-of-00010.safetensors",
|
| 1137 |
+
"transformer.h.51.ln_mlp.weight": "model-00007-of-00010.safetensors",
|
| 1138 |
+
"transformer.h.51.mlp.dense_4h_to_h.bias": "model-00007-of-00010.safetensors",
|
| 1139 |
+
"transformer.h.51.mlp.dense_4h_to_h.g_idx": "model-00007-of-00010.safetensors",
|
| 1140 |
+
"transformer.h.51.mlp.dense_4h_to_h.qweight": "model-00007-of-00010.safetensors",
|
| 1141 |
+
"transformer.h.51.mlp.dense_4h_to_h.qzeros": "model-00007-of-00010.safetensors",
|
| 1142 |
+
"transformer.h.51.mlp.dense_4h_to_h.scales": "model-00007-of-00010.safetensors",
|
| 1143 |
+
"transformer.h.51.mlp.dense_h_to_4h.bias": "model-00007-of-00010.safetensors",
|
| 1144 |
+
"transformer.h.51.mlp.dense_h_to_4h.g_idx": "model-00007-of-00010.safetensors",
|
| 1145 |
+
"transformer.h.51.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
|
| 1146 |
+
"transformer.h.51.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
|
| 1147 |
+
"transformer.h.51.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
|
| 1148 |
+
"transformer.h.51.self_attention.dense.bias": "model-00007-of-00010.safetensors",
|
| 1149 |
+
"transformer.h.51.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
|
| 1150 |
+
"transformer.h.51.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
|
| 1151 |
+
"transformer.h.51.self_attention.dense.qzeros": "model-00007-of-00010.safetensors",
|
| 1152 |
+
"transformer.h.51.self_attention.dense.scales": "model-00007-of-00010.safetensors",
|
| 1153 |
+
"transformer.h.51.self_attention.query_key_value.bias": "model-00007-of-00010.safetensors",
|
| 1154 |
+
"transformer.h.51.self_attention.query_key_value.g_idx": "model-00007-of-00010.safetensors",
|
| 1155 |
+
"transformer.h.51.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
|
| 1156 |
+
"transformer.h.51.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
|
| 1157 |
+
"transformer.h.51.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
|
| 1158 |
"transformer.h.52.ln_attn.bias": "model-00007-of-00010.safetensors",
|
| 1159 |
"transformer.h.52.ln_attn.weight": "model-00007-of-00010.safetensors",
|
| 1160 |
"transformer.h.52.ln_mlp.bias": "model-00007-of-00010.safetensors",
|
|
|
|
| 1169 |
"transformer.h.52.mlp.dense_h_to_4h.qweight": "model-00007-of-00010.safetensors",
|
| 1170 |
"transformer.h.52.mlp.dense_h_to_4h.qzeros": "model-00007-of-00010.safetensors",
|
| 1171 |
"transformer.h.52.mlp.dense_h_to_4h.scales": "model-00007-of-00010.safetensors",
|
| 1172 |
+
"transformer.h.52.self_attention.dense.bias": "model-00007-of-00010.safetensors",
|
| 1173 |
+
"transformer.h.52.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
|
| 1174 |
+
"transformer.h.52.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
|
| 1175 |
+
"transformer.h.52.self_attention.dense.qzeros": "model-00007-of-00010.safetensors",
|
| 1176 |
+
"transformer.h.52.self_attention.dense.scales": "model-00007-of-00010.safetensors",
|
| 1177 |
+
"transformer.h.52.self_attention.query_key_value.bias": "model-00007-of-00010.safetensors",
|
| 1178 |
+
"transformer.h.52.self_attention.query_key_value.g_idx": "model-00007-of-00010.safetensors",
|
| 1179 |
+
"transformer.h.52.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
|
| 1180 |
+
"transformer.h.52.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
|
| 1181 |
+
"transformer.h.52.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
|
| 1182 |
"transformer.h.53.ln_attn.bias": "model-00007-of-00010.safetensors",
|
| 1183 |
"transformer.h.53.ln_attn.weight": "model-00007-of-00010.safetensors",
|
| 1184 |
"transformer.h.53.ln_mlp.bias": "model-00007-of-00010.safetensors",
|
|
|
|
| 1299 |
"transformer.h.57.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
|
| 1300 |
"transformer.h.57.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
|
| 1301 |
"transformer.h.57.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
|
| 1302 |
+
"transformer.h.58.ln_attn.bias": "model-00008-of-00010.safetensors",
|
| 1303 |
+
"transformer.h.58.ln_attn.weight": "model-00008-of-00010.safetensors",
|
| 1304 |
+
"transformer.h.58.ln_mlp.bias": "model-00008-of-00010.safetensors",
|
| 1305 |
+
"transformer.h.58.ln_mlp.weight": "model-00008-of-00010.safetensors",
|
| 1306 |
+
"transformer.h.58.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
|
| 1307 |
+
"transformer.h.58.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
|
| 1308 |
+
"transformer.h.58.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
|
| 1309 |
+
"transformer.h.58.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
|
| 1310 |
+
"transformer.h.58.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
|
| 1311 |
+
"transformer.h.58.mlp.dense_h_to_4h.bias": "model-00008-of-00010.safetensors",
|
| 1312 |
+
"transformer.h.58.mlp.dense_h_to_4h.g_idx": "model-00008-of-00010.safetensors",
|
| 1313 |
+
"transformer.h.58.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
|
| 1314 |
+
"transformer.h.58.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
|
| 1315 |
+
"transformer.h.58.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
|
| 1316 |
"transformer.h.58.self_attention.dense.bias": "model-00007-of-00010.safetensors",
|
| 1317 |
"transformer.h.58.self_attention.dense.g_idx": "model-00007-of-00010.safetensors",
|
| 1318 |
"transformer.h.58.self_attention.dense.qweight": "model-00007-of-00010.safetensors",
|
|
|
|
| 1323 |
"transformer.h.58.self_attention.query_key_value.qweight": "model-00007-of-00010.safetensors",
|
| 1324 |
"transformer.h.58.self_attention.query_key_value.qzeros": "model-00007-of-00010.safetensors",
|
| 1325 |
"transformer.h.58.self_attention.query_key_value.scales": "model-00007-of-00010.safetensors",
|
| 1326 |
+
"transformer.h.59.ln_attn.bias": "model-00008-of-00010.safetensors",
|
| 1327 |
+
"transformer.h.59.ln_attn.weight": "model-00008-of-00010.safetensors",
|
| 1328 |
+
"transformer.h.59.ln_mlp.bias": "model-00008-of-00010.safetensors",
|
| 1329 |
+
"transformer.h.59.ln_mlp.weight": "model-00008-of-00010.safetensors",
|
| 1330 |
+
"transformer.h.59.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
|
| 1331 |
+
"transformer.h.59.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
|
| 1332 |
+
"transformer.h.59.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
|
| 1333 |
+
"transformer.h.59.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
|
| 1334 |
+
"transformer.h.59.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
|
| 1335 |
+
"transformer.h.59.mlp.dense_h_to_4h.bias": "model-00008-of-00010.safetensors",
|
| 1336 |
+
"transformer.h.59.mlp.dense_h_to_4h.g_idx": "model-00008-of-00010.safetensors",
|
| 1337 |
+
"transformer.h.59.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
|
| 1338 |
+
"transformer.h.59.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
|
| 1339 |
+
"transformer.h.59.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
|
| 1340 |
+
"transformer.h.59.self_attention.dense.bias": "model-00008-of-00010.safetensors",
|
| 1341 |
+
"transformer.h.59.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
|
| 1342 |
+
"transformer.h.59.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
|
| 1343 |
+
"transformer.h.59.self_attention.dense.qzeros": "model-00008-of-00010.safetensors",
|
| 1344 |
+
"transformer.h.59.self_attention.dense.scales": "model-00008-of-00010.safetensors",
|
| 1345 |
+
"transformer.h.59.self_attention.query_key_value.bias": "model-00008-of-00010.safetensors",
|
| 1346 |
+
"transformer.h.59.self_attention.query_key_value.g_idx": "model-00008-of-00010.safetensors",
|
| 1347 |
+
"transformer.h.59.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
|
| 1348 |
+
"transformer.h.59.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
|
| 1349 |
+
"transformer.h.59.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
|
| 1350 |
+
"transformer.h.6.ln_attn.bias": "model-00002-of-00010.safetensors",
|
| 1351 |
+
"transformer.h.6.ln_attn.weight": "model-00002-of-00010.safetensors",
|
| 1352 |
+
"transformer.h.6.ln_mlp.bias": "model-00002-of-00010.safetensors",
|
| 1353 |
+
"transformer.h.6.ln_mlp.weight": "model-00002-of-00010.safetensors",
|
| 1354 |
"transformer.h.6.mlp.dense_4h_to_h.bias": "model-00001-of-00010.safetensors",
|
| 1355 |
"transformer.h.6.mlp.dense_4h_to_h.g_idx": "model-00001-of-00010.safetensors",
|
| 1356 |
"transformer.h.6.mlp.dense_4h_to_h.qweight": "model-00001-of-00010.safetensors",
|
| 1357 |
"transformer.h.6.mlp.dense_4h_to_h.qzeros": "model-00001-of-00010.safetensors",
|
| 1358 |
"transformer.h.6.mlp.dense_4h_to_h.scales": "model-00001-of-00010.safetensors",
|
| 1359 |
+
"transformer.h.6.mlp.dense_h_to_4h.bias": "model-00002-of-00010.safetensors",
|
| 1360 |
+
"transformer.h.6.mlp.dense_h_to_4h.g_idx": "model-00002-of-00010.safetensors",
|
| 1361 |
"transformer.h.6.mlp.dense_h_to_4h.qweight": "model-00001-of-00010.safetensors",
|
| 1362 |
"transformer.h.6.mlp.dense_h_to_4h.qzeros": "model-00001-of-00010.safetensors",
|
| 1363 |
+
"transformer.h.6.mlp.dense_h_to_4h.scales": "model-00002-of-00010.safetensors",
|
| 1364 |
"transformer.h.6.self_attention.dense.bias": "model-00001-of-00010.safetensors",
|
| 1365 |
"transformer.h.6.self_attention.dense.g_idx": "model-00001-of-00010.safetensors",
|
| 1366 |
"transformer.h.6.self_attention.dense.qweight": "model-00001-of-00010.safetensors",
|
|
|
|
| 1371 |
"transformer.h.6.self_attention.query_key_value.qweight": "model-00001-of-00010.safetensors",
|
| 1372 |
"transformer.h.6.self_attention.query_key_value.qzeros": "model-00001-of-00010.safetensors",
|
| 1373 |
"transformer.h.6.self_attention.query_key_value.scales": "model-00001-of-00010.safetensors",
|
| 1374 |
+
"transformer.h.60.ln_attn.bias": "model-00008-of-00010.safetensors",
|
| 1375 |
+
"transformer.h.60.ln_attn.weight": "model-00008-of-00010.safetensors",
|
| 1376 |
+
"transformer.h.60.ln_mlp.bias": "model-00008-of-00010.safetensors",
|
| 1377 |
+
"transformer.h.60.ln_mlp.weight": "model-00008-of-00010.safetensors",
|
| 1378 |
+
"transformer.h.60.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
|
| 1379 |
+
"transformer.h.60.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
|
| 1380 |
+
"transformer.h.60.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
|
| 1381 |
+
"transformer.h.60.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
|
| 1382 |
+
"transformer.h.60.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
|
| 1383 |
+
"transformer.h.60.mlp.dense_h_to_4h.bias": "model-00008-of-00010.safetensors",
|
| 1384 |
+
"transformer.h.60.mlp.dense_h_to_4h.g_idx": "model-00008-of-00010.safetensors",
|
| 1385 |
+
"transformer.h.60.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
|
| 1386 |
+
"transformer.h.60.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
|
| 1387 |
+
"transformer.h.60.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
|
| 1388 |
+
"transformer.h.60.self_attention.dense.bias": "model-00008-of-00010.safetensors",
|
| 1389 |
+
"transformer.h.60.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
|
| 1390 |
+
"transformer.h.60.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
|
| 1391 |
+
"transformer.h.60.self_attention.dense.qzeros": "model-00008-of-00010.safetensors",
|
| 1392 |
+
"transformer.h.60.self_attention.dense.scales": "model-00008-of-00010.safetensors",
|
| 1393 |
+
"transformer.h.60.self_attention.query_key_value.bias": "model-00008-of-00010.safetensors",
|
| 1394 |
+
"transformer.h.60.self_attention.query_key_value.g_idx": "model-00008-of-00010.safetensors",
|
| 1395 |
+
"transformer.h.60.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
|
| 1396 |
+
"transformer.h.60.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
|
| 1397 |
+
"transformer.h.60.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
|
| 1398 |
"transformer.h.61.ln_attn.bias": "model-00008-of-00010.safetensors",
|
| 1399 |
"transformer.h.61.ln_attn.weight": "model-00008-of-00010.safetensors",
|
| 1400 |
"transformer.h.61.ln_mlp.bias": "model-00008-of-00010.safetensors",
|
|
|
|
| 1409 |
"transformer.h.61.mlp.dense_h_to_4h.qweight": "model-00008-of-00010.safetensors",
|
| 1410 |
"transformer.h.61.mlp.dense_h_to_4h.qzeros": "model-00008-of-00010.safetensors",
|
| 1411 |
"transformer.h.61.mlp.dense_h_to_4h.scales": "model-00008-of-00010.safetensors",
|
| 1412 |
+
"transformer.h.61.self_attention.dense.bias": "model-00008-of-00010.safetensors",
|
| 1413 |
+
"transformer.h.61.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
|
| 1414 |
+
"transformer.h.61.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
|
| 1415 |
+
"transformer.h.61.self_attention.dense.qzeros": "model-00008-of-00010.safetensors",
|
| 1416 |
+
"transformer.h.61.self_attention.dense.scales": "model-00008-of-00010.safetensors",
|
| 1417 |
+
"transformer.h.61.self_attention.query_key_value.bias": "model-00008-of-00010.safetensors",
|
| 1418 |
+
"transformer.h.61.self_attention.query_key_value.g_idx": "model-00008-of-00010.safetensors",
|
| 1419 |
+
"transformer.h.61.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
|
| 1420 |
+
"transformer.h.61.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
|
| 1421 |
+
"transformer.h.61.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
|
| 1422 |
"transformer.h.62.ln_attn.bias": "model-00008-of-00010.safetensors",
|
| 1423 |
"transformer.h.62.ln_attn.weight": "model-00008-of-00010.safetensors",
|
| 1424 |
"transformer.h.62.ln_mlp.bias": "model-00008-of-00010.safetensors",
|
|
|
|
| 1515 |
"transformer.h.65.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
|
| 1516 |
"transformer.h.65.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
|
| 1517 |
"transformer.h.65.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
|
| 1518 |
+
"transformer.h.66.ln_attn.bias": "model-00009-of-00010.safetensors",
|
| 1519 |
+
"transformer.h.66.ln_attn.weight": "model-00009-of-00010.safetensors",
|
| 1520 |
+
"transformer.h.66.ln_mlp.bias": "model-00009-of-00010.safetensors",
|
| 1521 |
+
"transformer.h.66.ln_mlp.weight": "model-00009-of-00010.safetensors",
|
| 1522 |
"transformer.h.66.mlp.dense_4h_to_h.bias": "model-00008-of-00010.safetensors",
|
| 1523 |
"transformer.h.66.mlp.dense_4h_to_h.g_idx": "model-00008-of-00010.safetensors",
|
| 1524 |
"transformer.h.66.mlp.dense_4h_to_h.qweight": "model-00008-of-00010.safetensors",
|
| 1525 |
"transformer.h.66.mlp.dense_4h_to_h.qzeros": "model-00008-of-00010.safetensors",
|
| 1526 |
"transformer.h.66.mlp.dense_4h_to_h.scales": "model-00008-of-00010.safetensors",
|
| 1527 |
+
"transformer.h.66.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
|
| 1528 |
+
"transformer.h.66.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
|
| 1529 |
+
"transformer.h.66.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
|
| 1530 |
+
"transformer.h.66.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
|
| 1531 |
+
"transformer.h.66.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
|
| 1532 |
"transformer.h.66.self_attention.dense.bias": "model-00008-of-00010.safetensors",
|
| 1533 |
"transformer.h.66.self_attention.dense.g_idx": "model-00008-of-00010.safetensors",
|
| 1534 |
"transformer.h.66.self_attention.dense.qweight": "model-00008-of-00010.safetensors",
|
|
|
|
| 1539 |
"transformer.h.66.self_attention.query_key_value.qweight": "model-00008-of-00010.safetensors",
|
| 1540 |
"transformer.h.66.self_attention.query_key_value.qzeros": "model-00008-of-00010.safetensors",
|
| 1541 |
"transformer.h.66.self_attention.query_key_value.scales": "model-00008-of-00010.safetensors",
|
| 1542 |
+
"transformer.h.67.ln_attn.bias": "model-00009-of-00010.safetensors",
|
| 1543 |
+
"transformer.h.67.ln_attn.weight": "model-00009-of-00010.safetensors",
|
| 1544 |
+
"transformer.h.67.ln_mlp.bias": "model-00009-of-00010.safetensors",
|
| 1545 |
+
"transformer.h.67.ln_mlp.weight": "model-00009-of-00010.safetensors",
|
| 1546 |
+
"transformer.h.67.mlp.dense_4h_to_h.bias": "model-00009-of-00010.safetensors",
|
| 1547 |
+
"transformer.h.67.mlp.dense_4h_to_h.g_idx": "model-00009-of-00010.safetensors",
|
| 1548 |
+
"transformer.h.67.mlp.dense_4h_to_h.qweight": "model-00009-of-00010.safetensors",
|
| 1549 |
+
"transformer.h.67.mlp.dense_4h_to_h.qzeros": "model-00009-of-00010.safetensors",
|
| 1550 |
+
"transformer.h.67.mlp.dense_4h_to_h.scales": "model-00009-of-00010.safetensors",
|
| 1551 |
+
"transformer.h.67.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
|
| 1552 |
+
"transformer.h.67.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
|
| 1553 |
+
"transformer.h.67.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
|
| 1554 |
+
"transformer.h.67.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
|
| 1555 |
+
"transformer.h.67.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
|
| 1556 |
+
"transformer.h.67.self_attention.dense.bias": "model-00009-of-00010.safetensors",
|
| 1557 |
+
"transformer.h.67.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
|
| 1558 |
+
"transformer.h.67.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
|
| 1559 |
+
"transformer.h.67.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
|
| 1560 |
+
"transformer.h.67.self_attention.dense.scales": "model-00009-of-00010.safetensors",
|
| 1561 |
+
"transformer.h.67.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
|
| 1562 |
+
"transformer.h.67.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
|
| 1563 |
+
"transformer.h.67.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
|
| 1564 |
+
"transformer.h.67.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
|
| 1565 |
+
"transformer.h.67.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
|
| 1566 |
+
"transformer.h.68.ln_attn.bias": "model-00009-of-00010.safetensors",
|
| 1567 |
+
"transformer.h.68.ln_attn.weight": "model-00009-of-00010.safetensors",
|
| 1568 |
+
"transformer.h.68.ln_mlp.bias": "model-00009-of-00010.safetensors",
|
| 1569 |
+
"transformer.h.68.ln_mlp.weight": "model-00009-of-00010.safetensors",
|
| 1570 |
+
"transformer.h.68.mlp.dense_4h_to_h.bias": "model-00009-of-00010.safetensors",
|
| 1571 |
+
"transformer.h.68.mlp.dense_4h_to_h.g_idx": "model-00009-of-00010.safetensors",
|
| 1572 |
+
"transformer.h.68.mlp.dense_4h_to_h.qweight": "model-00009-of-00010.safetensors",
|
| 1573 |
+
"transformer.h.68.mlp.dense_4h_to_h.qzeros": "model-00009-of-00010.safetensors",
|
| 1574 |
+
"transformer.h.68.mlp.dense_4h_to_h.scales": "model-00009-of-00010.safetensors",
|
| 1575 |
+
"transformer.h.68.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
|
| 1576 |
+
"transformer.h.68.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
|
| 1577 |
+
"transformer.h.68.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
|
| 1578 |
+
"transformer.h.68.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
|
| 1579 |
+
"transformer.h.68.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
|
| 1580 |
+
"transformer.h.68.self_attention.dense.bias": "model-00009-of-00010.safetensors",
|
| 1581 |
+
"transformer.h.68.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
|
| 1582 |
+
"transformer.h.68.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
|
| 1583 |
+
"transformer.h.68.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
|
| 1584 |
+
"transformer.h.68.self_attention.dense.scales": "model-00009-of-00010.safetensors",
|
| 1585 |
+
"transformer.h.68.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
|
| 1586 |
+
"transformer.h.68.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
|
| 1587 |
+
"transformer.h.68.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
|
| 1588 |
+
"transformer.h.68.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
|
| 1589 |
+
"transformer.h.68.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
|
| 1590 |
+
"transformer.h.69.ln_attn.bias": "model-00009-of-00010.safetensors",
|
| 1591 |
+
"transformer.h.69.ln_attn.weight": "model-00009-of-00010.safetensors",
|
| 1592 |
+
"transformer.h.69.ln_mlp.bias": "model-00009-of-00010.safetensors",
|
| 1593 |
+
"transformer.h.69.ln_mlp.weight": "model-00009-of-00010.safetensors",
|
| 1594 |
+
"transformer.h.69.mlp.dense_4h_to_h.bias": "model-00009-of-00010.safetensors",
|
| 1595 |
+
"transformer.h.69.mlp.dense_4h_to_h.g_idx": "model-00009-of-00010.safetensors",
|
| 1596 |
+
"transformer.h.69.mlp.dense_4h_to_h.qweight": "model-00009-of-00010.safetensors",
|
| 1597 |
+
"transformer.h.69.mlp.dense_4h_to_h.qzeros": "model-00009-of-00010.safetensors",
|
| 1598 |
+
"transformer.h.69.mlp.dense_4h_to_h.scales": "model-00009-of-00010.safetensors",
|
| 1599 |
+
"transformer.h.69.mlp.dense_h_to_4h.bias": "model-00009-of-00010.safetensors",
|
| 1600 |
+
"transformer.h.69.mlp.dense_h_to_4h.g_idx": "model-00009-of-00010.safetensors",
|
| 1601 |
+
"transformer.h.69.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
|
| 1602 |
+
"transformer.h.69.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
|
| 1603 |
+
"transformer.h.69.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
|
| 1604 |
+
"transformer.h.69.self_attention.dense.bias": "model-00009-of-00010.safetensors",
|
| 1605 |
+
"transformer.h.69.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
|
| 1606 |
+
"transformer.h.69.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
|
| 1607 |
+
"transformer.h.69.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
|
| 1608 |
+
"transformer.h.69.self_attention.dense.scales": "model-00009-of-00010.safetensors",
|
| 1609 |
+
"transformer.h.69.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
|
| 1610 |
+
"transformer.h.69.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
|
| 1611 |
+
"transformer.h.69.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
|
| 1612 |
+
"transformer.h.69.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
|
| 1613 |
+
"transformer.h.69.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
|
| 1614 |
"transformer.h.7.ln_attn.bias": "model-00002-of-00010.safetensors",
|
| 1615 |
"transformer.h.7.ln_attn.weight": "model-00002-of-00010.safetensors",
|
| 1616 |
"transformer.h.7.ln_mlp.bias": "model-00002-of-00010.safetensors",
|
|
|
|
| 1625 |
"transformer.h.7.mlp.dense_h_to_4h.qweight": "model-00002-of-00010.safetensors",
|
| 1626 |
"transformer.h.7.mlp.dense_h_to_4h.qzeros": "model-00002-of-00010.safetensors",
|
| 1627 |
"transformer.h.7.mlp.dense_h_to_4h.scales": "model-00002-of-00010.safetensors",
|
| 1628 |
+
"transformer.h.7.self_attention.dense.bias": "model-00002-of-00010.safetensors",
|
| 1629 |
+
"transformer.h.7.self_attention.dense.g_idx": "model-00002-of-00010.safetensors",
|
| 1630 |
+
"transformer.h.7.self_attention.dense.qweight": "model-00002-of-00010.safetensors",
|
| 1631 |
+
"transformer.h.7.self_attention.dense.qzeros": "model-00002-of-00010.safetensors",
|
| 1632 |
+
"transformer.h.7.self_attention.dense.scales": "model-00002-of-00010.safetensors",
|
| 1633 |
+
"transformer.h.7.self_attention.query_key_value.bias": "model-00002-of-00010.safetensors",
|
| 1634 |
+
"transformer.h.7.self_attention.query_key_value.g_idx": "model-00002-of-00010.safetensors",
|
| 1635 |
+
"transformer.h.7.self_attention.query_key_value.qweight": "model-00002-of-00010.safetensors",
|
| 1636 |
+
"transformer.h.7.self_attention.query_key_value.qzeros": "model-00002-of-00010.safetensors",
|
| 1637 |
+
"transformer.h.7.self_attention.query_key_value.scales": "model-00002-of-00010.safetensors",
|
| 1638 |
"transformer.h.70.ln_attn.bias": "model-00009-of-00010.safetensors",
|
| 1639 |
"transformer.h.70.ln_attn.weight": "model-00009-of-00010.safetensors",
|
| 1640 |
"transformer.h.70.ln_mlp.bias": "model-00009-of-00010.safetensors",
|
|
|
|
| 1649 |
"transformer.h.70.mlp.dense_h_to_4h.qweight": "model-00009-of-00010.safetensors",
|
| 1650 |
"transformer.h.70.mlp.dense_h_to_4h.qzeros": "model-00009-of-00010.safetensors",
|
| 1651 |
"transformer.h.70.mlp.dense_h_to_4h.scales": "model-00009-of-00010.safetensors",
|
| 1652 |
+
"transformer.h.70.self_attention.dense.bias": "model-00009-of-00010.safetensors",
|
| 1653 |
+
"transformer.h.70.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
|
| 1654 |
+
"transformer.h.70.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
|
| 1655 |
+
"transformer.h.70.self_attention.dense.qzeros": "model-00009-of-00010.safetensors",
|
| 1656 |
+
"transformer.h.70.self_attention.dense.scales": "model-00009-of-00010.safetensors",
|
| 1657 |
+
"transformer.h.70.self_attention.query_key_value.bias": "model-00009-of-00010.safetensors",
|
| 1658 |
+
"transformer.h.70.self_attention.query_key_value.g_idx": "model-00009-of-00010.safetensors",
|
| 1659 |
+
"transformer.h.70.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
|
| 1660 |
+
"transformer.h.70.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
|
| 1661 |
+
"transformer.h.70.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
|
| 1662 |
"transformer.h.71.ln_attn.bias": "model-00009-of-00010.safetensors",
|
| 1663 |
"transformer.h.71.ln_attn.weight": "model-00009-of-00010.safetensors",
|
| 1664 |
"transformer.h.71.ln_mlp.bias": "model-00009-of-00010.safetensors",
|
|
|
|
| 1755 |
"transformer.h.74.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
|
| 1756 |
"transformer.h.74.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
|
| 1757 |
"transformer.h.74.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
|
| 1758 |
+
"transformer.h.75.ln_attn.bias": "model-00010-of-00010.safetensors",
|
| 1759 |
+
"transformer.h.75.ln_attn.weight": "model-00010-of-00010.safetensors",
|
| 1760 |
+
"transformer.h.75.ln_mlp.bias": "model-00010-of-00010.safetensors",
|
| 1761 |
+
"transformer.h.75.ln_mlp.weight": "model-00010-of-00010.safetensors",
|
| 1762 |
+
"transformer.h.75.mlp.dense_4h_to_h.bias": "model-00010-of-00010.safetensors",
|
| 1763 |
+
"transformer.h.75.mlp.dense_4h_to_h.g_idx": "model-00010-of-00010.safetensors",
|
| 1764 |
+
"transformer.h.75.mlp.dense_4h_to_h.qweight": "model-00010-of-00010.safetensors",
|
| 1765 |
+
"transformer.h.75.mlp.dense_4h_to_h.qzeros": "model-00010-of-00010.safetensors",
|
| 1766 |
+
"transformer.h.75.mlp.dense_4h_to_h.scales": "model-00010-of-00010.safetensors",
|
| 1767 |
+
"transformer.h.75.mlp.dense_h_to_4h.bias": "model-00010-of-00010.safetensors",
|
| 1768 |
+
"transformer.h.75.mlp.dense_h_to_4h.g_idx": "model-00010-of-00010.safetensors",
|
| 1769 |
+
"transformer.h.75.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
|
| 1770 |
+
"transformer.h.75.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
|
| 1771 |
+
"transformer.h.75.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
|
| 1772 |
"transformer.h.75.self_attention.dense.bias": "model-00009-of-00010.safetensors",
|
| 1773 |
"transformer.h.75.self_attention.dense.g_idx": "model-00009-of-00010.safetensors",
|
| 1774 |
"transformer.h.75.self_attention.dense.qweight": "model-00009-of-00010.safetensors",
|
|
|
|
| 1779 |
"transformer.h.75.self_attention.query_key_value.qweight": "model-00009-of-00010.safetensors",
|
| 1780 |
"transformer.h.75.self_attention.query_key_value.qzeros": "model-00009-of-00010.safetensors",
|
| 1781 |
"transformer.h.75.self_attention.query_key_value.scales": "model-00009-of-00010.safetensors",
|
| 1782 |
+
"transformer.h.76.ln_attn.bias": "model-00010-of-00010.safetensors",
|
| 1783 |
+
"transformer.h.76.ln_attn.weight": "model-00010-of-00010.safetensors",
|
| 1784 |
+
"transformer.h.76.ln_mlp.bias": "model-00010-of-00010.safetensors",
|
| 1785 |
+
"transformer.h.76.ln_mlp.weight": "model-00010-of-00010.safetensors",
|
| 1786 |
+
"transformer.h.76.mlp.dense_4h_to_h.bias": "model-00010-of-00010.safetensors",
|
| 1787 |
+
"transformer.h.76.mlp.dense_4h_to_h.g_idx": "model-00010-of-00010.safetensors",
|
| 1788 |
+
"transformer.h.76.mlp.dense_4h_to_h.qweight": "model-00010-of-00010.safetensors",
|
| 1789 |
+
"transformer.h.76.mlp.dense_4h_to_h.qzeros": "model-00010-of-00010.safetensors",
|
| 1790 |
+
"transformer.h.76.mlp.dense_4h_to_h.scales": "model-00010-of-00010.safetensors",
|
| 1791 |
+
"transformer.h.76.mlp.dense_h_to_4h.bias": "model-00010-of-00010.safetensors",
|
| 1792 |
+
"transformer.h.76.mlp.dense_h_to_4h.g_idx": "model-00010-of-00010.safetensors",
|
| 1793 |
+
"transformer.h.76.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
|
| 1794 |
+
"transformer.h.76.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
|
| 1795 |
+
"transformer.h.76.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
|
| 1796 |
+
"transformer.h.76.self_attention.dense.bias": "model-00010-of-00010.safetensors",
|
| 1797 |
+
"transformer.h.76.self_attention.dense.g_idx": "model-00010-of-00010.safetensors",
|
| 1798 |
+
"transformer.h.76.self_attention.dense.qweight": "model-00010-of-00010.safetensors",
|
| 1799 |
+
"transformer.h.76.self_attention.dense.qzeros": "model-00010-of-00010.safetensors",
|
| 1800 |
+
"transformer.h.76.self_attention.dense.scales": "model-00010-of-00010.safetensors",
|
| 1801 |
+
"transformer.h.76.self_attention.query_key_value.bias": "model-00010-of-00010.safetensors",
|
| 1802 |
+
"transformer.h.76.self_attention.query_key_value.g_idx": "model-00010-of-00010.safetensors",
|
| 1803 |
+
"transformer.h.76.self_attention.query_key_value.qweight": "model-00010-of-00010.safetensors",
|
| 1804 |
+
"transformer.h.76.self_attention.query_key_value.qzeros": "model-00010-of-00010.safetensors",
|
| 1805 |
+
"transformer.h.76.self_attention.query_key_value.scales": "model-00010-of-00010.safetensors",
|
| 1806 |
+
"transformer.h.77.ln_attn.bias": "model-00010-of-00010.safetensors",
|
| 1807 |
+
"transformer.h.77.ln_attn.weight": "model-00010-of-00010.safetensors",
|
| 1808 |
+
"transformer.h.77.ln_mlp.bias": "model-00010-of-00010.safetensors",
|
| 1809 |
+
"transformer.h.77.ln_mlp.weight": "model-00010-of-00010.safetensors",
|
| 1810 |
+
"transformer.h.77.mlp.dense_4h_to_h.bias": "model-00010-of-00010.safetensors",
|
| 1811 |
+
"transformer.h.77.mlp.dense_4h_to_h.g_idx": "model-00010-of-00010.safetensors",
|
| 1812 |
+
"transformer.h.77.mlp.dense_4h_to_h.qweight": "model-00010-of-00010.safetensors",
|
| 1813 |
+
"transformer.h.77.mlp.dense_4h_to_h.qzeros": "model-00010-of-00010.safetensors",
|
| 1814 |
+
"transformer.h.77.mlp.dense_4h_to_h.scales": "model-00010-of-00010.safetensors",
|
| 1815 |
+
"transformer.h.77.mlp.dense_h_to_4h.bias": "model-00010-of-00010.safetensors",
|
| 1816 |
+
"transformer.h.77.mlp.dense_h_to_4h.g_idx": "model-00010-of-00010.safetensors",
|
| 1817 |
+
"transformer.h.77.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
|
| 1818 |
+
"transformer.h.77.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
|
| 1819 |
+
"transformer.h.77.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
|
| 1820 |
+
"transformer.h.77.self_attention.dense.bias": "model-00010-of-00010.safetensors",
|
| 1821 |
+
"transformer.h.77.self_attention.dense.g_idx": "model-00010-of-00010.safetensors",
|
| 1822 |
+
"transformer.h.77.self_attention.dense.qweight": "model-00010-of-00010.safetensors",
|
| 1823 |
+
"transformer.h.77.self_attention.dense.qzeros": "model-00010-of-00010.safetensors",
|
| 1824 |
+
"transformer.h.77.self_attention.dense.scales": "model-00010-of-00010.safetensors",
|
| 1825 |
+
"transformer.h.77.self_attention.query_key_value.bias": "model-00010-of-00010.safetensors",
|
| 1826 |
+
"transformer.h.77.self_attention.query_key_value.g_idx": "model-00010-of-00010.safetensors",
|
| 1827 |
+
"transformer.h.77.self_attention.query_key_value.qweight": "model-00010-of-00010.safetensors",
|
| 1828 |
+
"transformer.h.77.self_attention.query_key_value.qzeros": "model-00010-of-00010.safetensors",
|
| 1829 |
+
"transformer.h.77.self_attention.query_key_value.scales": "model-00010-of-00010.safetensors",
|
| 1830 |
+
"transformer.h.78.ln_attn.bias": "model-00010-of-00010.safetensors",
|
| 1831 |
+
"transformer.h.78.ln_attn.weight": "model-00010-of-00010.safetensors",
|
| 1832 |
+
"transformer.h.78.ln_mlp.bias": "model-00010-of-00010.safetensors",
|
| 1833 |
+
"transformer.h.78.ln_mlp.weight": "model-00010-of-00010.safetensors",
|
| 1834 |
+
"transformer.h.78.mlp.dense_4h_to_h.bias": "model-00010-of-00010.safetensors",
|
| 1835 |
+
"transformer.h.78.mlp.dense_4h_to_h.g_idx": "model-00010-of-00010.safetensors",
|
| 1836 |
+
"transformer.h.78.mlp.dense_4h_to_h.qweight": "model-00010-of-00010.safetensors",
|
| 1837 |
+
"transformer.h.78.mlp.dense_4h_to_h.qzeros": "model-00010-of-00010.safetensors",
|
| 1838 |
+
"transformer.h.78.mlp.dense_4h_to_h.scales": "model-00010-of-00010.safetensors",
|
| 1839 |
+
"transformer.h.78.mlp.dense_h_to_4h.bias": "model-00010-of-00010.safetensors",
|
| 1840 |
+
"transformer.h.78.mlp.dense_h_to_4h.g_idx": "model-00010-of-00010.safetensors",
|
| 1841 |
+
"transformer.h.78.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
|
| 1842 |
+
"transformer.h.78.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
|
| 1843 |
+
"transformer.h.78.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
|
| 1844 |
+
"transformer.h.78.self_attention.dense.bias": "model-00010-of-00010.safetensors",
|
| 1845 |
+
"transformer.h.78.self_attention.dense.g_idx": "model-00010-of-00010.safetensors",
|
| 1846 |
+
"transformer.h.78.self_attention.dense.qweight": "model-00010-of-00010.safetensors",
|
| 1847 |
+
"transformer.h.78.self_attention.dense.qzeros": "model-00010-of-00010.safetensors",
|
| 1848 |
+
"transformer.h.78.self_attention.dense.scales": "model-00010-of-00010.safetensors",
|
| 1849 |
+
"transformer.h.78.self_attention.query_key_value.bias": "model-00010-of-00010.safetensors",
|
| 1850 |
+
"transformer.h.78.self_attention.query_key_value.g_idx": "model-00010-of-00010.safetensors",
|
| 1851 |
+
"transformer.h.78.self_attention.query_key_value.qweight": "model-00010-of-00010.safetensors",
|
| 1852 |
+
"transformer.h.78.self_attention.query_key_value.qzeros": "model-00010-of-00010.safetensors",
|
| 1853 |
+
"transformer.h.78.self_attention.query_key_value.scales": "model-00010-of-00010.safetensors",
|
| 1854 |
"transformer.h.79.ln_attn.bias": "model-00010-of-00010.safetensors",
|
| 1855 |
"transformer.h.79.ln_attn.weight": "model-00010-of-00010.safetensors",
|
| 1856 |
"transformer.h.79.ln_mlp.bias": "model-00010-of-00010.safetensors",
|
|
|
|
| 1865 |
"transformer.h.79.mlp.dense_h_to_4h.qweight": "model-00010-of-00010.safetensors",
|
| 1866 |
"transformer.h.79.mlp.dense_h_to_4h.qzeros": "model-00010-of-00010.safetensors",
|
| 1867 |
"transformer.h.79.mlp.dense_h_to_4h.scales": "model-00010-of-00010.safetensors",
|
| 1868 |
+
"transformer.h.79.self_attention.dense.bias": "model-00010-of-00010.safetensors",
|
| 1869 |
+
"transformer.h.79.self_attention.dense.g_idx": "model-00010-of-00010.safetensors",
|
| 1870 |
+
"transformer.h.79.self_attention.dense.qweight": "model-00010-of-00010.safetensors",
|
| 1871 |
+
"transformer.h.79.self_attention.dense.qzeros": "model-00010-of-00010.safetensors",
|
| 1872 |
+
"transformer.h.79.self_attention.dense.scales": "model-00010-of-00010.safetensors",
|
| 1873 |
+
"transformer.h.79.self_attention.query_key_value.bias": "model-00010-of-00010.safetensors",
|
| 1874 |
+
"transformer.h.79.self_attention.query_key_value.g_idx": "model-00010-of-00010.safetensors",
|
| 1875 |
+
"transformer.h.79.self_attention.query_key_value.qweight": "model-00010-of-00010.safetensors",
|
| 1876 |
+
"transformer.h.79.self_attention.query_key_value.qzeros": "model-00010-of-00010.safetensors",
|
| 1877 |
+
"transformer.h.79.self_attention.query_key_value.scales": "model-00010-of-00010.safetensors",
|
| 1878 |
"transformer.h.8.ln_attn.bias": "model-00002-of-00010.safetensors",
|
| 1879 |
"transformer.h.8.ln_attn.weight": "model-00002-of-00010.safetensors",
|
| 1880 |
"transformer.h.8.ln_mlp.bias": "model-00002-of-00010.safetensors",
|
quantize_config.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"bits": 4,
|
| 3 |
-
"group_size":
|
| 4 |
"damp_percent": 0.1,
|
| 5 |
"desc_act": true,
|
| 6 |
"sym": true,
|
| 7 |
"true_sequential": true,
|
| 8 |
"model_name_or_path": null,
|
| 9 |
"model_file_base_name": "model"
|
| 10 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
"bits": 4,
|
| 3 |
+
"group_size": 128,
|
| 4 |
"damp_percent": 0.1,
|
| 5 |
"desc_act": true,
|
| 6 |
"sym": true,
|
| 7 |
"true_sequential": true,
|
| 8 |
"model_name_or_path": null,
|
| 9 |
"model_file_base_name": "model"
|
| 10 |
+
}
|