metascroy commited on
Commit
69f1da8
·
verified ·
1 Parent(s): c343cac

Delete config.json

Browse files
Files changed (1) hide show
  1. config.json +0 -189
config.json DELETED
@@ -1,189 +0,0 @@
1
- {
2
- "architectures": [
3
- "SmolLM3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 128000,
8
- "eos_token_id": 128012,
9
- "hidden_act": "silu",
10
- "hidden_size": 2048,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 11008,
13
- "layer_types": [
14
- "full_attention",
15
- "full_attention",
16
- "full_attention",
17
- "full_attention",
18
- "full_attention",
19
- "full_attention",
20
- "full_attention",
21
- "full_attention",
22
- "full_attention",
23
- "full_attention",
24
- "full_attention",
25
- "full_attention",
26
- "full_attention",
27
- "full_attention",
28
- "full_attention",
29
- "full_attention",
30
- "full_attention",
31
- "full_attention",
32
- "full_attention",
33
- "full_attention",
34
- "full_attention",
35
- "full_attention",
36
- "full_attention",
37
- "full_attention",
38
- "full_attention",
39
- "full_attention",
40
- "full_attention",
41
- "full_attention",
42
- "full_attention",
43
- "full_attention",
44
- "full_attention",
45
- "full_attention",
46
- "full_attention",
47
- "full_attention",
48
- "full_attention",
49
- "full_attention"
50
- ],
51
- "max_position_embeddings": 65536,
52
- "max_window_layers": 28,
53
- "mlp_bias": false,
54
- "model_type": "smollm3",
55
- "no_rope_layer_interval": 4,
56
- "no_rope_layers": [
57
- 1,
58
- 1,
59
- 1,
60
- 0,
61
- 1,
62
- 1,
63
- 1,
64
- 0,
65
- 1,
66
- 1,
67
- 1,
68
- 0,
69
- 1,
70
- 1,
71
- 1,
72
- 0,
73
- 1,
74
- 1,
75
- 1,
76
- 0,
77
- 1,
78
- 1,
79
- 1,
80
- 0,
81
- 1,
82
- 1,
83
- 1,
84
- 0,
85
- 1,
86
- 1,
87
- 1,
88
- 0,
89
- 1,
90
- 1,
91
- 1,
92
- 0
93
- ],
94
- "num_attention_heads": 16,
95
- "num_hidden_layers": 36,
96
- "num_key_value_heads": 4,
97
- "pad_token_id": 128004,
98
- "pretraining_tp": 2,
99
- "quantization_config": {
100
- "include_input_output_embeddings": false,
101
- "modules_to_not_convert": [],
102
- "quant_method": "torchao",
103
- "quant_type": {
104
- "default": {
105
- "_data": {
106
- "module_fqn_to_config": {
107
- "_default": {
108
- "_data": {
109
- "act_mapping_type": {
110
- "_data": "ASYMMETRIC",
111
- "_type": "MappingType"
112
- },
113
- "layout": {
114
- "_data": {},
115
- "_type": "QDQLayout",
116
- "_version": 1
117
- },
118
- "weight_dtype": {
119
- "_data": "int4",
120
- "_type": "torch.dtype"
121
- },
122
- "weight_granularity": {
123
- "_data": {
124
- "group_size": 32
125
- },
126
- "_type": "PerGroup",
127
- "_version": 1
128
- },
129
- "weight_mapping_type": {
130
- "_data": "SYMMETRIC",
131
- "_type": "MappingType"
132
- },
133
- "weight_scale_dtype": {
134
- "_data": "bfloat16",
135
- "_type": "torch.dtype"
136
- }
137
- },
138
- "_type": "Int8DynamicActivationIntxWeightConfig",
139
- "_version": 1
140
- },
141
- "model.embed_tokens": {
142
- "_data": {
143
- "granularity": {
144
- "_data": {
145
- "axis": 0
146
- },
147
- "_type": "PerAxis",
148
- "_version": 1
149
- },
150
- "layout": {
151
- "_data": {},
152
- "_type": "QDQLayout",
153
- "_version": 1
154
- },
155
- "mapping_type": {
156
- "_data": "SYMMETRIC",
157
- "_type": "MappingType"
158
- },
159
- "scale_dtype": null,
160
- "weight_dtype": {
161
- "_data": "int8",
162
- "_type": "torch.dtype"
163
- }
164
- },
165
- "_type": "IntxWeightOnlyConfig",
166
- "_version": 1
167
- }
168
- }
169
- },
170
- "_type": "ModuleFqnToConfig",
171
- "_version": 1
172
- }
173
- },
174
- "quant_type_kwargs": {
175
- "include_embedding": true
176
- },
177
- "untie_embedding_weights": true
178
- },
179
- "rms_norm_eps": 1e-06,
180
- "rope_scaling": null,
181
- "rope_theta": 5000000.0,
182
- "sliding_window": null,
183
- "tie_word_embeddings": false,
184
- "torch_dtype": "float32",
185
- "transformers_version": "4.54.0.dev0",
186
- "use_cache": false,
187
- "use_sliding_window": false,
188
- "vocab_size": 128256
189
- }