camenduru commited on Jan 10, 2025

Commit

d8452e4

verified ·

1 Parent(s): 280c61a

thanks to fudan-generative-ai ❤

Browse files

Files changed (34) hide show

.gitattributes +1 -0
README.md +259 -0
audio_separator/Kim_Vocal_2.onnx +3 -0
audio_separator/download_checks.json +231 -0
audio_separator/mdx_model_data.json +384 -0
audio_separator/vr_model_data.json +137 -0
cogvideox-5b-i2v-sat/transformer/1/mp_rank_00_model_states.pt +3 -0
cogvideox-5b-i2v-sat/transformer/latest +1 -0
cogvideox-5b-i2v-sat/vae/3d-vae.pt +3 -0
face_analysis/models/1k3d68.onnx +3 -0
face_analysis/models/2d106det.onnx +3 -0
face_analysis/models/buffalo_l.zip +3 -0
face_analysis/models/face_landmarker_v2_with_blendshapes.task +3 -0
face_analysis/models/genderage.onnx +3 -0
face_analysis/models/glintr100.onnx +3 -0
face_analysis/models/scrfd_10g_bnkps.onnx +3 -0
hallo3/latest +1 -0
t5-v1_1-xxl/added_tokens.json +102 -0
t5-v1_1-xxl/config.json +32 -0
t5-v1_1-xxl/model-00001-of-00002.safetensors +3 -0
t5-v1_1-xxl/model-00002-of-00002.safetensors +3 -0
t5-v1_1-xxl/model.safetensors.index.json +226 -0
t5-v1_1-xxl/special_tokens_map.json +125 -0
t5-v1_1-xxl/spiece.model +3 -0
t5-v1_1-xxl/tokenizer_config.json +940 -0
wav2vec/wav2vec2-base-960h/.gitattributes +18 -0
wav2vec/wav2vec2-base-960h/README.md +128 -0
wav2vec/wav2vec2-base-960h/config.json +77 -0
wav2vec/wav2vec2-base-960h/feature_extractor_config.json +8 -0
wav2vec/wav2vec2-base-960h/model.safetensors +3 -0
wav2vec/wav2vec2-base-960h/preprocessor_config.json +8 -0
wav2vec/wav2vec2-base-960h/special_tokens_map.json +1 -0
wav2vec/wav2vec2-base-960h/tokenizer_config.json +1 -0
wav2vec/wav2vec2-base-960h/vocab.json +1 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+face_analysis/models/face_landmarker_v2_with_blendshapes.task filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,259 @@

+---
+license: mit
+---
+<h1 align='center'>Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Diffusion Transformer Networks</h1>
+<div align='center'>
+    <a href='https://github.com/cuijh26' target='_blank'>Jiahao Cui</a><sup>1</sup>&emsp;
+    <a href='https://github.com/crystallee-ai' target='_blank'>Hui Li</a><sup>1</sup>&emsp;
+    <a href='https://github.com/subazinga' target='_blank'>Yun Zhan</a><sup>1</sup>&emsp;
+    <a href='https://github.com/NinoNeumann' target='_blank'>Hanlin Shang</a><sup>1</sup>&emsp;
+    <a href='https://github.com/Kaihui-Cheng' target='_blank'>Kaihui Cheng</a><sup>1</sup>&emsp;
+    <a href='https://github.com/mayuqi7777' target='_blank'>Yuqi Ma</a><sup>1</sup>&emsp;
+    <a href='https://github.com/AricGamma' target='_blank'>Shan Mu</a><sup>1</sup>&emsp;
+</div>
+<div align='center'>
+    <a href='https://hangz-nju-cuhk.github.io/' target='_blank'>Hang Zhou</a><sup>2</sup>&emsp;
+    <a href='https://jingdongwang2017.github.io/' target='_blank'>Jingdong Wang</a><sup>2</sup>&emsp;
+    <a href='https://sites.google.com/site/zhusiyucs/home' target='_blank'>Siyu Zhu</a><sup>1✉️</sup>&emsp;
+</div>
+<div align='center'>
+    <sup>1</sup>Fudan University&emsp; <sup>2</sup>Baidu Inc&emsp;
+</div>
+<br>
+<div align='center'>
+    <a href='https://github.com/fudan-generative-vision/hallo3'><img src='https://img.shields.io/github/stars/fudan-generative-vision/hallo3?style=social'></a>
+</div>
+<br>
+## 📸 Showcase
+<table border="0" style="width: 100%; text-align: left; margin-top: 20px;">
+  <tr>
+      <td>
+          <video src="https://github.com/user-attachments/assets/3fc44086-bdbf-4a54-bfe3-62cfd9dfb191" width="100%" controls autoplay loop></video>
+      </td>
+      <td>
+          <video src="https://github.com/user-attachments/assets/ad5a87cf-b50e-48d6-af35-774e3b1713e7" width="100%" controls autoplay loop></video>
+      </td>
+       <td>
+          <video src="https://github.com/user-attachments/assets/78c7acc3-4fa2-447e-b77d-3462d411c81c" width="100%" controls autoplay loop></video>
+     </td>
+  </tr>
+  <tr>
+      <td>
+          <video src="https://github.com/user-attachments/assets/f62f2b6d-9846-40be-a976-56cc7d5a8a5b" width="100%" controls autoplay loop></video>
+      </td>
+       <td>
+          <video src="https://github.com/user-attachments/assets/42b6968e-c68a-4473-b773-406ccf5d90b1" width="100%" controls autoplay loop></video>
+     </td>
+      <td>
+          <video src="https://github.com/user-attachments/assets/015f1d6d-31a8-4454-b51a-5431d3c953c2" width="100%" controls autoplay loop></video>
+     </td>
+  </tr>
+</table>
+Visit our [project page](https://fudan-generative-vision.github.io/hallo3/#/) to view more cases.
+## ⚙️ Installation
+- System requirement: Ubuntu 20.04/Ubuntu 22.04, Cuda 12.1
+- Tested GPUs: H100
+Download the codes:
+```bash
+  git clone https://github.com/fudan-generative-vision/hallo3
+  cd hallo3
+```
+Create conda environment:
+```bash
+  conda create -n hallo python=3.10
+  conda activate hallo
+```
+Install packages with `pip`
+```bash
+  pip install -r requirements.txt
+```
+Besides, ffmpeg is also needed:
+```bash
+  apt-get install ffmpeg
+```
+### 📥 Download Pretrained Models
+You can easily get all pretrained models required by inference from our [HuggingFace repo](https://huggingface.co/fudan-generative-ai/hallo3).
+Using `huggingface-cli` to download the models:
+```shell
+cd $ProjectRootDir
+pip install huggingface-cli
+huggingface-cli download fudan-generative-ai/hallo3 --local-dir ./pretrained_models
+```
+Or you can download them separately from their source repo:
+- [hallo3](https://huggingface.co/fudan-generative-ai/hallo3/tree/main/hallo3): Our checkpoints.
+- [Cogvidex](https://github.com/THUDM/CogVideo): Cogvideox-5b-i2v pretrained model, consisting of transformer and 3d vae
+- [t5-v1_1-xxl](https://huggingface.co/google/t5-v1_1-xxl): text encoder, you can download from [text_encoder](https://huggingface.co/THUDM/CogVideoX-2b/tree/main/text_encoder) and [tokenizer](https://huggingface.co/THUDM/CogVideoX-2b/tree/main/tokenizer)
+- [audio_separator](https://huggingface.co/huangjackson/Kim_Vocal_2): Kim Vocal_2 MDX-Net vocal removal model.
+- [wav2vec](https://huggingface.co/facebook/wav2vec2-base-960h): wav audio to vector model from [Facebook](https://huggingface.co/facebook/wav2vec2-base-960h).
+- [insightface](https://github.com/deepinsight/insightface/tree/master/python-package#model-zoo): 2D and 3D Face Analysis placed into `pretrained_models/face_analysis/models/`. (_Thanks to deepinsight_)
+- [face landmarker](https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task): Face detection & mesh model from [mediapipe](https://ai.google.dev/edge/mediapipe/solutions/vision/face_landmarker#models) placed into `pretrained_models/face_analysis/models`.
+Finally, these pretrained models should be organized as follows:
+```text
+./pretrained_models/
+|-- audio_separator/
+|   |-- download_checks.json
+|   |-- mdx_model_data.json
+|   |-- vr_model_data.json
+|   `-- Kim_Vocal_2.onnx
+|-- cogvideox-5b-i2v-sat/
+|   |-- transformer/
+|       |--1/
+|           |-- mp_rank_00_model_states.pt
+|       `--latest
+|   `-- vae/
+|           |-- 3d-vae.pt
+|-- face_analysis/
+|   `-- models/
+|       |-- face_landmarker_v2_with_blendshapes.task  # face landmarker model from mediapipe
+|       |-- 1k3d68.onnx
+|       |-- 2d106det.onnx
+|       |-- genderage.onnx
+|       |-- glintr100.onnx
+|       `-- scrfd_10g_bnkps.onnx
+|-- hallo3
+|   |--1/
+|       |-- mp_rank_00_model_states.pt
+|   `--latest
+|-- t5-v1_1-xxl/
+|   |-- added_tokens.json
+|   |-- config.json
+|   |-- model-00001-of-00002.safetensors
+|   |-- model-00002-of-00002.safetensors
+|   |-- model.safetensors.index.json
+|   |-- special_tokens_map.json
+|   |-- spiece.model
+|   |-- tokenizer_config.json
+|
+`-- wav2vec/
+    `-- wav2vec2-base-960h/
+        |-- config.json
+        |-- feature_extractor_config.json
+        |-- model.safetensors
+        |-- preprocessor_config.json
+        |-- special_tokens_map.json
+        |-- tokenizer_config.json
+        `-- vocab.json
+```
+### 🛠️ Prepare Inference Data
+Hallo3 has a few simple requirements for the input data of inference:
+1. Reference image must be 1:1 or 3:2 aspect ratio.
+2. Driving audio must be in WAV format.
+3. Audio must be in English since our training datasets are only in this language.
+4. Ensure the vocals of audio are clear; background music is acceptable.
+### 🎮 Run Inference
+Simply to run the `scripts/inference_long_batch.sh`:
+```bash
+bash scripts/inference_long_batch.sh ./examples/inference/input.txt ./output
+```
+Animation results will be saved at `./output`. You can find more examples for inference at [examples folder](https://github.com/fudan-generative-vision/hallo3/tree/main/examples).
+## Training
+#### prepare data for training
+Organize your raw videos into the following directory structure:
+```text
+dataset_name/
+|-- videos/
+|   |-- 0001.mp4
+|   |-- 0002.mp4
+|   `-- 0003.mp4
+|-- caption/
+|   |-- 0001.txt
+|   |-- 0002.txt
+|   `-- 0003.txt
+```
+You can use any dataset_name, but ensure the videos directory and caption directory are named as shown above.
+Next, process the videos with the following commands:
+```bash
+bash scripts/data_preprocess.sh {dataset_name} {parallelism} {rank} {output_name}
+```
+#### Training
+Update the data meta path settings in the configuration YAML files, `configs/sft_s1.yaml` and `configs/sft_s2.yaml`:
+```yaml
+#sft_s1.yaml
+train_data: [
+    "./data/output_name.json"
+]
+#sft_s2.yaml
+train_data: [
+    "./data/output_name.json"
+]
+```
+Start training with the following command:
+```bash
+# stage1
+bash scripts/finetune_multi_gpus_s1.sh
+# stage2
+bash scripts/finetune_multi_gpus_s2.sh
+```
+## 📝 Citation
+If you find our work useful for your research, please consider citing the paper:
+```
+@misc{cui2024hallo3,
+	title={Hallo3: Highly Dynamic and Realistic Portrait Image Animation with Diffusion Transformer Networks},
+	author={Jiahao Cui and Hui Li and Yun Zhang and Hanlin Shang and Kaihui Cheng and Yuqi Ma and Shan Mu and Hang Zhou and Jingdong Wang and Siyu Zhu},
+	year={2024},
+	eprint={2412.00733},
+	archivePrefix={arXiv},
+	primaryClass={cs.CV}
+}
+```
+## ⚠️ Social Risks and Mitigations
+The development of portrait image animation technologies driven by audio inputs poses social risks, such as the ethical implications of creating realistic portraits that could be misused for deepfakes. To mitigate these risks, it is crucial to establish ethical guidelines and responsible use practices. Privacy and consent concerns also arise from using individuals' images and voices. Addressing these involves transparent data usage policies, informed consent, and safeguarding privacy rights. By addressing these risks and implementing mitigations, the research aims to ensure the responsible and ethical development of this technology.
+## 🤗 Acknowledgements
+This model is a fine-tuned derivative version based on the **CogVideo-5B I2V** model. CogVideo-5B is an open-source text-to-video generation model developed by the CogVideoX team. Its original code and model parameters are governed by the [CogVideo-5B LICENSE](https://huggingface.co/THUDM/CogVideoX-5b/blob/main/LICENSE).
+As a derivative work of CogVideo-5B, the use, distribution, and modification of this model must comply with the license terms of CogVideo-5B.
+## 👏 Community Contributors
+Thank you to all the contributors who have helped to make this project better!
+<a href="https://github.com/fudan-generative-vision/hallo2/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=fudan-generative-vision/hallo3" />
+</a>

audio_separator/Kim_Vocal_2.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce74ef3b6a6024ce44211a07be9cf8bc6d87728cc852a68ab34eb8e58cde9c8b
+size 66759214

audio_separator/download_checks.json ADDED Viewed

	@@ -0,0 +1,231 @@

+{
+    "current_version": "UVR_Patch_10_6_23_4_27",
+    "current_version_ocl": "UVR_Patch_10_6_23_4_27",
+    "current_version_mac": "UVR_Patch_10_6_23_4_27",
+    "current_version_linux": "UVR_Patch_10_6_23_4_27",
+    "vr_download_list": {
+                        "VR Arch Single Model v5: 1_HP-UVR": "1_HP-UVR.pth",
+                        "VR Arch Single Model v5: 2_HP-UVR": "2_HP-UVR.pth",
+                        "VR Arch Single Model v5: 3_HP-Vocal-UVR": "3_HP-Vocal-UVR.pth",
+                        "VR Arch Single Model v5: 4_HP-Vocal-UVR": "4_HP-Vocal-UVR.pth",
+                        "VR Arch Single Model v5: 5_HP-Karaoke-UVR": "5_HP-Karaoke-UVR.pth",
+                        "VR Arch Single Model v5: 6_HP-Karaoke-UVR": "6_HP-Karaoke-UVR.pth",
+                        "VR Arch Single Model v5: 7_HP2-UVR": "7_HP2-UVR.pth",
+                        "VR Arch Single Model v5: 8_HP2-UVR": "8_HP2-UVR.pth",
+                        "VR Arch Single Model v5: 9_HP2-UVR": "9_HP2-UVR.pth",
+                        "VR Arch Single Model v5: 10_SP-UVR-2B-32000-1": "10_SP-UVR-2B-32000-1.pth",
+                        "VR Arch Single Model v5: 11_SP-UVR-2B-32000-2": "11_SP-UVR-2B-32000-2.pth",
+                        "VR Arch Single Model v5: 12_SP-UVR-3B-44100": "12_SP-UVR-3B-44100.pth",
+                        "VR Arch Single Model v5: 13_SP-UVR-4B-44100-1": "13_SP-UVR-4B-44100-1.pth",
+                        "VR Arch Single Model v5: 14_SP-UVR-4B-44100-2": "14_SP-UVR-4B-44100-2.pth",
+                        "VR Arch Single Model v5: 15_SP-UVR-MID-44100-1": "15_SP-UVR-MID-44100-1.pth",
+                        "VR Arch Single Model v5: 16_SP-UVR-MID-44100-2": "16_SP-UVR-MID-44100-2.pth",
+                        "VR Arch Single Model v5: 17_HP-Wind_Inst-UVR": "17_HP-Wind_Inst-UVR.pth",
+                        "VR Arch Single Model v5: UVR-De-Echo-Aggressive by FoxJoy": "UVR-De-Echo-Aggressive.pth",
+                        "VR Arch Single Model v5: UVR-De-Echo-Normal by FoxJoy": "UVR-De-Echo-Normal.pth",
+                        "VR Arch Single Model v5: UVR-DeEcho-DeReverb by FoxJoy": "UVR-DeEcho-DeReverb.pth",
+                        "VR Arch Single Model v5: UVR-DeNoise-Lite by FoxJoy": "UVR-DeNoise-Lite.pth",
+                        "VR Arch Single Model v5: UVR-DeNoise by FoxJoy": "UVR-DeNoise.pth",
+                        "VR Arch Single Model v5: UVR-BVE-4B_SN-44100-1": "UVR-BVE-4B_SN-44100-1.pth",
+                        "VR Arch Single Model v4: MGM_HIGHEND_v4": "MGM_HIGHEND_v4.pth",
+                        "VR Arch Single Model v4: MGM_LOWEND_A_v4": "MGM_LOWEND_A_v4.pth",
+                        "VR Arch Single Model v4: MGM_LOWEND_B_v4": "MGM_LOWEND_B_v4.pth",
+                        "VR Arch Single Model v4: MGM_MAIN_v4": "MGM_MAIN_v4.pth"
+                },
+    "mdx_download_list": {
+                        "MDX-Net Model: UVR-MDX-NET Inst HQ 1": "UVR-MDX-NET-Inst_HQ_1.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst HQ 2": "UVR-MDX-NET-Inst_HQ_2.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst HQ 3": "UVR-MDX-NET-Inst_HQ_3.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst HQ 4": "UVR-MDX-NET-Inst_HQ_4.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Main": "UVR_MDXNET_Main.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst Main": "UVR-MDX-NET-Inst_Main.onnx",
+                        "MDX-Net Model: UVR-MDX-NET 1": "UVR_MDXNET_1_9703.onnx",
+                        "MDX-Net Model: UVR-MDX-NET 2": "UVR_MDXNET_2_9682.onnx",
+                        "MDX-Net Model: UVR-MDX-NET 3": "UVR_MDXNET_3_9662.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst 1": "UVR-MDX-NET-Inst_1.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst 2": "UVR-MDX-NET-Inst_2.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst 3": "UVR-MDX-NET-Inst_3.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Karaoke": "UVR_MDXNET_KARA.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Karaoke 2": "UVR_MDXNET_KARA_2.onnx",
+                        "MDX-Net Model: UVR_MDXNET_9482": "UVR_MDXNET_9482.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Voc FT": "UVR-MDX-NET-Voc_FT.onnx",
+                        "MDX-Net Model: Kim Vocal 1": "Kim_Vocal_1.onnx",
+                        "MDX-Net Model: Kim Vocal 2": "Kim_Vocal_2.onnx",
+                        "MDX-Net Model: Kim Inst": "Kim_Inst.onnx",
+                        "MDX-Net Model: Reverb HQ By FoxJoy": "Reverb_HQ_By_FoxJoy.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Crowd HQ 1 By Aufr33": "UVR-MDX-NET_Crowd_HQ_1.onnx",
+                        "MDX-Net Model: kuielab_a_vocals": "kuielab_a_vocals.onnx",
+                        "MDX-Net Model: kuielab_a_other": "kuielab_a_other.onnx",
+                        "MDX-Net Model: kuielab_a_bass": "kuielab_a_bass.onnx",
+                        "MDX-Net Model: kuielab_a_drums": "kuielab_a_drums.onnx",
+                        "MDX-Net Model: kuielab_b_vocals": "kuielab_b_vocals.onnx",
+                        "MDX-Net Model: kuielab_b_other": "kuielab_b_other.onnx",
+                        "MDX-Net Model: kuielab_b_bass": "kuielab_b_bass.onnx",
+                        "MDX-Net Model: kuielab_b_drums": "kuielab_b_drums.onnx"
+                        },
+    "demucs_download_list":{
+                "Demucs v4: htdemucs_ft":{
+                                "f7e0c4bc-ba3fe64a.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/f7e0c4bc-ba3fe64a.th",
+                                "d12395a8-e57c48e6.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/d12395a8-e57c48e6.th",
+                                "92cfc3b6-ef3bcb9c.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/92cfc3b6-ef3bcb9c.th",
+                                "04573f0d-f3cf25b2.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/04573f0d-f3cf25b2.th",
+                                "htdemucs_ft.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/htdemucs_ft.yaml"
+                                },
+                "Demucs v4: htdemucs":{
+                                "955717e8-8726e21a.th": "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/955717e8-8726e21a.th",
+                                "htdemucs.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/htdemucs.yaml"
+                                },
+                "Demucs v4: hdemucs_mmi":{
+                                "75fc33f5-1941ce65.th": "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/75fc33f5-1941ce65.th",
+                                "hdemucs_mmi.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/hdemucs_mmi.yaml"
+                                },
+                "Demucs v4: htdemucs_6s":{
+                                "5c90dfd2-34c22ccb.th": "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/5c90dfd2-34c22ccb.th",
+                                "htdemucs_6s.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/htdemucs_6s.yaml"
+                                },
+                "Demucs v3: mdx":{
+                                "0d19c1c6-0f06f20e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/0d19c1c6-0f06f20e.th",
+                                "7ecf8ec1-70f50cc9.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/7ecf8ec1-70f50cc9.th",
+                                "c511e2ab-fe698775.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/c511e2ab-fe698775.th",
+                                "7d865c68-3d5dd56b.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/7d865c68-3d5dd56b.th",
+                                "mdx.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx.yaml"
+                                },
+                "Demucs v3: mdx_q":{
+                                "6b9c2ca1-3fd82607.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/6b9c2ca1-3fd82607.th",
+                                "b72baf4e-8778635e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/b72baf4e-8778635e.th",
+                                "42e558d4-196e0e1b.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/42e558d4-196e0e1b.th",
+                                "305bc58f-18378783.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/305bc58f-18378783.th",
+                                "mdx_q.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx_q.yaml"
+                                },
+                "Demucs v3: mdx_extra":{
+                                "e51eebcc-c1b80bdd.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/e51eebcc-c1b80bdd.th",
+                                "a1d90b5c-ae9d2452.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/a1d90b5c-ae9d2452.th",
+                                "5d2d6c55-db83574e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/5d2d6c55-db83574e.th",
+                                "cfa93e08-61801ae1.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/cfa93e08-61801ae1.th",
+                                "mdx_extra.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx_extra.yaml"
+                                },
+                "Demucs v3: mdx_extra_q": {
+                                "83fc094f-4a16d450.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/83fc094f-4a16d450.th",
+                                "464b36d7-e5a9386e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/464b36d7-e5a9386e.th",
+                                "14fc6a69-a89dd0ee.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/14fc6a69-a89dd0ee.th",
+                                "7fd6ef75-a905dd85.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/7fd6ef75-a905dd85.th",
+                                "mdx_extra_q.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx_extra_q.yaml"
+                                },
+                "Demucs v3: UVR Model":{
+                                "ebf34a2db.th": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/ebf34a2db.th",
+                                "UVR_Demucs_Model_1.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/UVR_Demucs_Model_1.yaml"
+                                },
+                "Demucs v3: repro_mdx_a":{
+                                "9a6b4851-03af0aa6.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/9a6b4851-03af0aa6.th",
+                                "1ef250f1-592467ce.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/1ef250f1-592467ce.th",
+                                "fa0cb7f9-100d8bf4.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/fa0cb7f9-100d8bf4.th",
+                                "902315c2-b39ce9c9.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/902315c2-b39ce9c9.th",
+                                "repro_mdx_a.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/repro_mdx_a.yaml"
+                                },
+                "Demucs v3: repro_mdx_a_time_only":{
+                                "9a6b4851-03af0aa6.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/9a6b4851-03af0aa6.th",
+                                "1ef250f1-592467ce.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/1ef250f1-592467ce.th",
+                                "repro_mdx_a_time_only.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/repro_mdx_a_time_only.yaml"
+                                },
+                "Demucs v3: repro_mdx_a_hybrid_only":{
+                                "fa0cb7f9-100d8bf4.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/fa0cb7f9-100d8bf4.th",
+                                "902315c2-b39ce9c9.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/902315c2-b39ce9c9.th",
+                                "repro_mdx_a_hybrid_only.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/repro_mdx_a_hybrid_only.yaml"
+                                },
+                "Demucs v2: demucs": {
+                                "demucs-e07c671f.th": "https://dl.fbaipublicfiles.com/demucs/v3.0/demucs-e07c671f.th"
+                                },
+                "Demucs v2: demucs_extra": {
+                                "demucs_extra-3646af93.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/demucs_extra-3646af93.th"
+                                },
+                "Demucs v2: demucs48_hq": {
+                                "demucs48_hq-28a1282c.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/demucs48_hq-28a1282c.th"
+                                },
+                "Demucs v2: tasnet": {
+                                "tasnet-beb46fac.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/tasnet-beb46fac.th"
+                                },
+                "Demucs v2: tasnet_extra": {
+                                "tasnet_extra-df3777b2.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/tasnet_extra-df3777b2.th"
+                                },
+                "Demucs v2: demucs_unittest": {
+                                "demucs_unittest-09ebc15f.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/demucs_unittest-09ebc15f.th"
+                                },
+                "Demucs v1: demucs": {
+                                "demucs.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/demucs.th"
+                                },
+                "Demucs v1: demucs_extra": {
+                                "demucs_extra.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/demucs_extra.th"
+                                },
+                "Demucs v1: light": {
+                                "light.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/light.th"
+                                },
+                "Demucs v1: light_extra": {
+                                "light_extra.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/light_extra.th"
+                                },
+                "Demucs v1: tasnet": {
+                                "tasnet.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/tasnet.th"
+                                },
+                "Demucs v1: tasnet_extra": {
+                                "tasnet_extra.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/tasnet_extra.th"
+                                }
+                },
+    "mdx_download_vip_list": {
+                "MDX-Net Model VIP: UVR-MDX-NET_Main_340": "UVR-MDX-NET_Main_340.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Main_390": "UVR-MDX-NET_Main_390.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Main_406": "UVR-MDX-NET_Main_406.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Main_427": "UVR-MDX-NET_Main_427.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Main_438": "UVR-MDX-NET_Main_438.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Inst_82_beta": "UVR-MDX-NET_Inst_82_beta.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Inst_90_beta": "UVR-MDX-NET_Inst_90_beta.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Inst_187_beta": "UVR-MDX-NET_Inst_187_beta.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET-Inst_full_292": "UVR-MDX-NET-Inst_full_292.onnx"
+                },
+    "mdx23_download_list": {
+                "MDX23C Model: MDX23C_D1581": {"MDX23C_D1581.ckpt":"model_2_stem_061321.yaml"}
+                },
+    "mdx23c_download_list": {
+                "MDX23C Model: MDX23C-InstVoc HQ": {"MDX23C-8KFFT-InstVoc_HQ.ckpt":"model_2_stem_full_band_8k.yaml"}
+                },
+    "roformer_download_list": {
+                "Roformer Model: BS-Roformer-Viperx-1297": {"model_bs_roformer_ep_317_sdr_12.9755.ckpt":"model_bs_roformer_ep_317_sdr_12.9755.yaml"},
+                "Roformer Model: BS-Roformer-Viperx-1296": {"model_bs_roformer_ep_368_sdr_12.9628.ckpt":"model_bs_roformer_ep_368_sdr_12.9628.yaml"},
+                "Roformer Model: BS-Roformer-Viperx-1053": {"model_bs_roformer_ep_937_sdr_10.5309.ckpt":"model_bs_roformer_ep_937_sdr_10.5309.yaml"},
+                "Roformer Model: Mel-Roformer-Viperx-1143": {"model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt":"model_mel_band_roformer_ep_3005_sdr_11.4360.yaml"}
+                },
+    "mdx23c_download_vip_list": {
+            "MDX23C Model VIP: MDX23C_D1581": {"MDX23C_D1581.ckpt":"model_2_stem_061321.yaml"},
+            "MDX23C Model VIP: MDX23C-InstVoc HQ 2": {"MDX23C-8KFFT-InstVoc_HQ_2.ckpt":"model_2_stem_full_band_8k.yaml"}
+            },
+    "vr_download_vip_list": [],
+    "demucs_download_vip_list": []
+}

audio_separator/mdx_model_data.json ADDED Viewed

	@@ -0,0 +1,384 @@

+{
+    "0ddfc0eb5792638ad5dc27850236c246": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "26d308f91f3423a67dc69a6d12a8793d": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 8192,
+        "primary_stem": "Other"
+    },
+    "2cdd429caac38f0194b133884160f2c6": {
+        "compensate": 1.045,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "2f5501189a2f6db6349916fabe8c90de": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals",
+        "is_karaoke": true
+    },
+    "398580b6d5d973af3120df54cee6759d": {
+        "compensate": 1.75,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "488b3e6f8bd3717d9d7c428476be2d75": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "4910e7827f335048bdac11fa967772f9": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 7,
+        "mdx_n_fft_scale_set": 4096,
+        "primary_stem": "Drums"
+    },
+    "53c4baf4d12c3e6c3831bb8f5b532b93": {
+        "compensate": 1.043,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "5d343409ef0df48c7d78cce9f0106781": {
+        "compensate": 1.075,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "5f6483271e1efb9bfb59e4a3e6d4d098": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "65ab5919372a128e4167f5e01a8fda85": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 8192,
+        "primary_stem": "Other"
+    },
+    "6703e39f36f18aa7855ee1047765621d": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 16384,
+        "primary_stem": "Bass"
+    },
+    "6b31de20e84392859a3d09d43f089515": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "867595e9de46f6ab699008295df62798": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "a3cd63058945e777505c01d2507daf37": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "b33d9b3950b6cbf5fe90a32608924700": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "c3b29bdce8c4fa17ec609e16220330ab": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 16384,
+        "primary_stem": "Bass"
+    },
+    "ceed671467c1f64ebdfac8a2490d0d52": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "d2a1376f310e4f7fa37fb9b5774eb701": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "d7bff498db9324db933d913388cba6be": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "d94058f8c7f1fae4164868ae8ae66b20": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "dc41ede5961d50f277eb846db17f5319": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 4096,
+        "primary_stem": "Drums"
+    },
+    "e5572e58abf111f80d8241d2e44e7fa4": {
+        "compensate": 1.028,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "e7324c873b1f615c35c1967f912db92a": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "1c56ec0224f1d559c42fd6fd2a67b154": {
+        "compensate": 1.025,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "f2df6d6863d8f435436d8b561594ff49": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "b06327a00d5e5fbc7d96e1781bbdb596": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "94ff780b977d3ca07c7a343dab2e25dd": {
+        "compensate": 1.039,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "73492b58195c3b52d34590d5474452f6": {
+        "compensate": 1.043,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "970b3f9492014d18fefeedfe4773cb42": {
+        "compensate": 1.009,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "1d64a6d2c30f709b8c9b4ce1366d96ee": {
+        "compensate": 1.065,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental",
+        "is_karaoke": true
+    },
+    "203f2a3955221b64df85a41af87cf8f0": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "291c2049608edb52648b96e27eb80e95": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "ead8d05dab12ec571d67549b3aab03fc": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "cc63408db3d80b4d85b0287d1d7c9632": {
+        "compensate": 1.033,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "cd5b2989ad863f116c855db1dfe24e39": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Reverb"
+    },
+    "55657dd70583b0fedfba5f67df11d711": {
+        "compensate": 1.022,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "b6bccda408a436db8500083ef3491e8b": {
+        "compensate": 1.02,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "8a88db95c7fb5dbe6a095ff2ffb428b1": {
+        "compensate": 1.026,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "b78da4afc6512f98e4756f5977f5c6b9": {
+        "compensate": 1.021,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "77d07b2667ddf05b9e3175941b4454a0": {
+        "compensate": 1.021,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "0f2a6bc5b49d87d64728ee40e23bceb1": {
+        "compensate": 1.019,
+        "mdx_dim_f_set": 2560,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "b02be2d198d4968a121030cf8950b492": {
+        "compensate": 1.020,
+        "mdx_dim_f_set": 2560,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "No Crowd"
+    },
+    "2154254ee89b2945b97a7efed6e88820": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "063aadd735d58150722926dcbf5852a9": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "c09f714d978b41d718facfe3427e6001": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "fe96801369f6a148df2720f5ced88c19": {
+        "config_yaml": "model3.yaml"
+    },
+    "02e8b226f85fb566e5db894b9931c640": {
+        "config_yaml": "model2.yaml"
+    },
+    "e3de6d861635ab9c1d766149edd680d6": {
+        "config_yaml": "model1.yaml"
+    },
+    "3f2936c554ab73ce2e396d54636bd373": {
+        "config_yaml": "modelB.yaml"
+    },
+    "890d0f6f82d7574bca741a9e8bcb8168": {
+        "config_yaml": "modelB.yaml"
+    },
+    "63a3cb8c37c474681049be4ad1ba8815": {
+        "config_yaml": "modelB.yaml"
+    },
+    "a7fc5d719743c7fd6b61bd2b4d48b9f0": {
+        "config_yaml": "modelA.yaml"
+    },
+    "3567f3dee6e77bf366fcb1c7b8bc3745": {
+        "config_yaml": "modelA.yaml"
+    },
+    "a28f4d717bd0d34cd2ff7a3b0a3d065e": {
+        "config_yaml": "modelA.yaml"
+    },
+    "c9971a18da20911822593dc81caa8be9": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "57d94d5ed705460d21c75a5ac829a605": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "e7a25f8764f25a52c1b96c4946e66ba2": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "104081d24e37217086ce5fde09147ee1": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "1e6165b601539f38d0a9330f3facffeb": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "fe0108464ce0d8271be5ab810891bd7c": {
+        "config_yaml": "model_2_stem_full_band.yaml"
+    },
+    "e9b82ec90ee56c507a3a982f1555714c": {
+        "config_yaml": "model_2_stem_full_band_2.yaml"
+    },
+    "99b6ceaae542265a3b6d657bf9fde79f": {
+        "config_yaml": "model_2_stem_full_band_8k.yaml"
+    },
+    "116f6f9dabb907b53d847ed9f7a9475f": {
+        "config_yaml": "model_2_stem_full_band_8k.yaml"
+    },
+    "53f707017bfcbb56f5e1bfac420d6732": {
+        "config_yaml": "model_bs_roformer_ep_317_sdr_12.9755.yaml",
+        "is_roformer": true
+    },
+    "63e41acc264bf681a73aa9f7e5f606cc": {
+        "config_yaml": "model_mel_band_roformer_ep_3005_sdr_11.4360.yaml",
+        "is_roformer": true
+    },
+    "e733736763234047587931fc35322fd9": {
+        "config_yaml": "model_bs_roformer_ep_937_sdr_10.5309.yaml",
+        "is_roformer": true
+    },
+    "d789065adfd747d6f585b27b495bcdae": {
+        "config_yaml": "model_bs_roformer_ep_368_sdr_12.9628.yaml",
+        "is_roformer": true
+    }
+}

audio_separator/vr_model_data.json ADDED Viewed

	@@ -0,0 +1,137 @@

+{
+    "0d0e6d143046b0eecc41a22e60224582": {
+        "vr_model_param": "3band_44100_mid",
+        "primary_stem": "Instrumental"
+    },
+    "18b52f873021a0af556fb4ecd552bb8e": {
+        "vr_model_param": "2band_32000",
+        "primary_stem": "Instrumental"
+    },
+    "1fc66027c82b499c7d8f55f79e64cadc": {
+        "vr_model_param": "2band_32000",
+        "primary_stem": "Instrumental"
+    },
+    "2aa34fbc01f8e6d2bf509726481e7142": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "No Piano"
+    },
+    "3e18f639b11abea7361db1a4a91c2559": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "570b5f50054609a17741369a35007ddd": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "Instrumental"
+    },
+    "5a6e24c1b530f2dab045a522ef89b751": {
+        "vr_model_param": "1band_sr44100_hl512",
+        "primary_stem": "Instrumental"
+    },
+    "6b5916069a49be3fe29d4397ecfd73fa": {
+        "vr_model_param": "3band_44100_msb2",
+        "primary_stem": "Instrumental",
+        "is_karaoke": true
+    },
+    "74b3bc5fa2b69f29baf7839b858bc679": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "827213b316df36b52a1f3d04fec89369": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "911d4048eee7223eca4ee0efb7d29256": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Vocals"
+    },
+    "941f3f7f0b0341f12087aacdfef644b1": {
+        "vr_model_param": "4band_v2",
+        "primary_stem": "Instrumental"
+    },
+    "a02827cf69d75781a35c0e8a327f3195": {
+        "vr_model_param": "1band_sr33075_hl384",
+        "primary_stem": "Instrumental"
+    },
+    "b165fbff113c959dba5303b74c6484bc": {
+        "vr_model_param": "3band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "b5f988cd3e891dca7253bf5f0f3427c7": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "b99c35723bc35cb11ed14a4780006a80": {
+        "vr_model_param": "1band_sr44100_hl1024",
+        "primary_stem": "Instrumental"
+    },
+    "ba02fd25b71d620eebbdb49e18e4c336": {
+        "vr_model_param": "3band_44100_mid",
+        "primary_stem": "Instrumental"
+    },
+    "c4476ef424d8cba65f38d8d04e8514e2": {
+        "vr_model_param": "3band_44100_msb2",
+        "primary_stem": "Instrumental"
+    },
+    "da2d37b8be2972e550a409bae08335aa": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Vocals"
+    },
+    "db57205d3133e39df8e050b435a78c80": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "ea83b08e32ec2303456fe50659035f69": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "Instrumental"
+    },
+    "f6ea8473ff86017b5ebd586ccacf156b": {
+        "vr_model_param": "4band_v2_sn",
+        "primary_stem": "Instrumental",
+        "is_karaoke": true
+    },
+    "fd297a61eafc9d829033f8b987c39a3d": {
+        "vr_model_param": "1band_sr32000_hl512",
+        "primary_stem": "Instrumental"
+    },
+    "0ec76fd9e65f81d8b4fbd13af4826ed8": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Woodwinds"
+    },
+    "0fb9249ffe4ffc38d7b16243f394c0ff": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Reverb"
+    },
+    "6857b2972e1754913aad0c9a1678c753": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Echo",
+        "nout": 48,
+        "nout_lstm": 128
+    },
+    "f200a145434efc7dcf0cd093f517ed52": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Echo",
+        "nout": 48,
+        "nout_lstm": 128
+    },
+    "44c55d8b5d2e3edea98c2b2bf93071c7": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "Noise",
+        "nout": 48,
+        "nout_lstm": 128
+    },
+    "51ea8c43a6928ed3c10ef5cb2707d57b": {
+        "vr_model_param": "1band_sr44100_hl1024",
+        "primary_stem": "Noise",
+        "nout": 16,
+        "nout_lstm": 128
+    },
+    "944950a9c5963a5eb70b445d67b7068a": {
+        "vr_model_param": "4band_v3_sn",
+        "primary_stem": "Vocals",
+        "nout": 64,
+        "nout_lstm": 128,
+        "is_karaoke": false,
+        "is_bv_model": true,
+        "is_bv_model_rebalanced": 0.9
+    }
+}

cogvideox-5b-i2v-sat/transformer/1/mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eea368b16ea2a39a97bde4778b426711da7f971e852d737ea4da13ba57d7858f
+size 11518625392

cogvideox-5b-i2v-sat/transformer/latest ADDED Viewed

	@@ -0,0 +1 @@


1	+ 1

cogvideox-5b-i2v-sat/vae/3d-vae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdf2683a98192fc35ebb1f86ff0bfd620eb0f8905efa4e8eb818af759e2bc418
+size 1176149148

face_analysis/models/1k3d68.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df5c06b8a0c12e422b2ed8947b8869faa4105387f199c477af038aa01f9a45cc
+size 143607619

face_analysis/models/2d106det.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf
+size 5030888

face_analysis/models/buffalo_l.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b320dd731435fb5864ba5aa1ed493ec3f9e471ef84f12e46f25507a79ccad709
+size 1546240

face_analysis/models/face_landmarker_v2_with_blendshapes.task ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64184e229b263107bc2b804c6625db1341ff2bb731874b0bcc2fe6544e0bc9ff
+size 3758596

face_analysis/models/genderage.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb
+size 1322532

face_analysis/models/glintr100.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ab1d6435d639628a6f3e5008dd4f929edf4c4124b1a7169e1048f9fef534cdf
+size 260665334

face_analysis/models/scrfd_10g_bnkps.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91
+size 16923827

hallo3/latest ADDED Viewed

	@@ -0,0 +1 @@


1	+ 1

t5-v1_1-xxl/added_tokens.json ADDED Viewed

	@@ -0,0 +1,102 @@

+{
+  "<extra_id_0>": 32099,
+  "<extra_id_10>": 32089,
+  "<extra_id_11>": 32088,
+  "<extra_id_12>": 32087,
+  "<extra_id_13>": 32086,
+  "<extra_id_14>": 32085,
+  "<extra_id_15>": 32084,
+  "<extra_id_16>": 32083,
+  "<extra_id_17>": 32082,
+  "<extra_id_18>": 32081,
+  "<extra_id_19>": 32080,
+  "<extra_id_1>": 32098,
+  "<extra_id_20>": 32079,
+  "<extra_id_21>": 32078,
+  "<extra_id_22>": 32077,
+  "<extra_id_23>": 32076,
+  "<extra_id_24>": 32075,
+  "<extra_id_25>": 32074,
+  "<extra_id_26>": 32073,
+  "<extra_id_27>": 32072,
+  "<extra_id_28>": 32071,
+  "<extra_id_29>": 32070,
+  "<extra_id_2>": 32097,
+  "<extra_id_30>": 32069,
+  "<extra_id_31>": 32068,
+  "<extra_id_32>": 32067,
+  "<extra_id_33>": 32066,
+  "<extra_id_34>": 32065,
+  "<extra_id_35>": 32064,
+  "<extra_id_36>": 32063,
+  "<extra_id_37>": 32062,
+  "<extra_id_38>": 32061,
+  "<extra_id_39>": 32060,
+  "<extra_id_3>": 32096,
+  "<extra_id_40>": 32059,
+  "<extra_id_41>": 32058,
+  "<extra_id_42>": 32057,
+  "<extra_id_43>": 32056,
+  "<extra_id_44>": 32055,
+  "<extra_id_45>": 32054,
+  "<extra_id_46>": 32053,
+  "<extra_id_47>": 32052,
+  "<extra_id_48>": 32051,
+  "<extra_id_49>": 32050,
+  "<extra_id_4>": 32095,
+  "<extra_id_50>": 32049,
+  "<extra_id_51>": 32048,
+  "<extra_id_52>": 32047,
+  "<extra_id_53>": 32046,
+  "<extra_id_54>": 32045,
+  "<extra_id_55>": 32044,
+  "<extra_id_56>": 32043,
+  "<extra_id_57>": 32042,
+  "<extra_id_58>": 32041,
+  "<extra_id_59>": 32040,
+  "<extra_id_5>": 32094,
+  "<extra_id_60>": 32039,
+  "<extra_id_61>": 32038,
+  "<extra_id_62>": 32037,
+  "<extra_id_63>": 32036,
+  "<extra_id_64>": 32035,
+  "<extra_id_65>": 32034,
+  "<extra_id_66>": 32033,
+  "<extra_id_67>": 32032,
+  "<extra_id_68>": 32031,
+  "<extra_id_69>": 32030,
+  "<extra_id_6>": 32093,
+  "<extra_id_70>": 32029,
+  "<extra_id_71>": 32028,
+  "<extra_id_72>": 32027,
+  "<extra_id_73>": 32026,
+  "<extra_id_74>": 32025,
+  "<extra_id_75>": 32024,
+  "<extra_id_76>": 32023,
+  "<extra_id_77>": 32022,
+  "<extra_id_78>": 32021,
+  "<extra_id_79>": 32020,
+  "<extra_id_7>": 32092,
+  "<extra_id_80>": 32019,
+  "<extra_id_81>": 32018,
+  "<extra_id_82>": 32017,
+  "<extra_id_83>": 32016,
+  "<extra_id_84>": 32015,
+  "<extra_id_85>": 32014,
+  "<extra_id_86>": 32013,
+  "<extra_id_87>": 32012,
+  "<extra_id_88>": 32011,
+  "<extra_id_89>": 32010,
+  "<extra_id_8>": 32091,
+  "<extra_id_90>": 32009,
+  "<extra_id_91>": 32008,
+  "<extra_id_92>": 32007,
+  "<extra_id_93>": 32006,
+  "<extra_id_94>": 32005,
+  "<extra_id_95>": 32004,
+  "<extra_id_96>": 32003,
+  "<extra_id_97>": 32002,
+  "<extra_id_98>": 32001,
+  "<extra_id_99>": 32000,
+  "<extra_id_9>": 32090
+}

t5-v1_1-xxl/config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_name_or_path": "google/t5-v1_1-xxl",
+  "architectures": [
+    "T5EncoderModel"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 10240,
+  "d_kv": 64,
+  "d_model": 4096,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "num_decoder_layers": 24,
+  "num_heads": 64,
+  "num_layers": 24,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.44.2",
+  "use_cache": true,
+  "vocab_size": 32128
+}

t5-v1_1-xxl/model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9162b8ae9152e7a8e3bbebc535c8692783f50aec8cd3bb8ef6a751c432dd6392
+size 4994582224

t5-v1_1-xxl/model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3edef29693d52402b1cc7c362f031e052f2e9482ed0c765c6351950434349b0
+size 4530066360

t5-v1_1-xxl/model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,226 @@

+{
+  "metadata": {
+    "total_size": 9524621312
+  },
+  "weight_map": {
+    "encoder.block.0.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.0.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.0.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.0.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.0.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.0.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.0.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.0.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.0.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.1.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.1.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.1.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.1.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.1.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.1.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.1.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.1.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.1.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.10.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.10.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.10.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.10.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.10.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.10.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.10.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.10.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.10.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.11.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.11.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.11.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.11.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.11.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.11.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.11.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.11.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.11.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.12.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.12.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.12.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.12.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.12.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.12.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.12.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.12.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.12.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.13.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.13.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.13.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.13.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.13.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.13.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.13.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.13.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.13.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.14.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.14.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.14.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.14.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.14.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.14.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.14.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.14.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.14.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.15.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.15.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.15.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.15.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.15.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.15.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.15.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.15.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.15.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.16.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.16.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.16.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.16.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.16.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.16.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.16.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.16.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.16.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.17.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.17.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.17.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.17.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.17.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.17.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.17.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.17.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.17.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.18.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.18.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.18.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.18.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.18.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.18.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.18.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.18.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.18.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.19.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.19.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.19.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.19.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.19.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.19.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.19.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.19.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.19.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.2.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.2.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.2.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.2.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.2.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.2.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.2.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.2.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.2.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.20.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.20.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.20.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.20.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.20.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.20.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.20.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.20.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.20.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.21.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.21.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.21.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.21.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.21.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.21.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.21.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.21.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.21.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.22.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.22.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.22.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.22.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.22.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.22.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.22.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.22.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.22.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.23.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.23.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.23.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.23.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.23.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.23.layer.1.DenseReluDense.wi_0.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.23.layer.1.DenseReluDense.wi_1.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.23.layer.1.DenseReluDense.wo.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.23.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
+    "encoder.block.3.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.3.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.3.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.3.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.3.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.3.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.3.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.3.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.3.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.4.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.4.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.4.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.4.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.4.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.4.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.4.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.4.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.4.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.5.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.5.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.5.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.5.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.5.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.5.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.5.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.5.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.5.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.6.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.6.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.6.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.6.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.6.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.6.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.6.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.6.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.6.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.7.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.7.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.7.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.7.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.7.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.7.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.7.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.7.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.7.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.8.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.8.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.8.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.8.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.8.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.8.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.8.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.8.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.8.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.9.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.9.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.9.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.9.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.9.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.9.layer.1.DenseReluDense.wi_0.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.9.layer.1.DenseReluDense.wi_1.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.9.layer.1.DenseReluDense.wo.weight": "model-00001-of-00002.safetensors",
+    "encoder.block.9.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
+    "encoder.final_layer_norm.weight": "model-00002-of-00002.safetensors",
+    "shared.weight": "model-00001-of-00002.safetensors"
+  }
+}

t5-v1_1-xxl/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,125 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

t5-v1_1-xxl/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

t5-v1_1-xxl/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,940 @@

+{
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<extra_id_99>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32001": {
+      "content": "<extra_id_98>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32002": {
+      "content": "<extra_id_97>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32003": {
+      "content": "<extra_id_96>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32004": {
+      "content": "<extra_id_95>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32005": {
+      "content": "<extra_id_94>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32006": {
+      "content": "<extra_id_93>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32007": {
+      "content": "<extra_id_92>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32008": {
+      "content": "<extra_id_91>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32009": {
+      "content": "<extra_id_90>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32010": {
+      "content": "<extra_id_89>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32011": {
+      "content": "<extra_id_88>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32012": {
+      "content": "<extra_id_87>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32013": {
+      "content": "<extra_id_86>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32014": {
+      "content": "<extra_id_85>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32015": {
+      "content": "<extra_id_84>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32016": {
+      "content": "<extra_id_83>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32017": {
+      "content": "<extra_id_82>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32018": {
+      "content": "<extra_id_81>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32019": {
+      "content": "<extra_id_80>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32020": {
+      "content": "<extra_id_79>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32021": {
+      "content": "<extra_id_78>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32022": {
+      "content": "<extra_id_77>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32023": {
+      "content": "<extra_id_76>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32024": {
+      "content": "<extra_id_75>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32025": {
+      "content": "<extra_id_74>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32026": {
+      "content": "<extra_id_73>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32027": {
+      "content": "<extra_id_72>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32028": {
+      "content": "<extra_id_71>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32029": {
+      "content": "<extra_id_70>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32030": {
+      "content": "<extra_id_69>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32031": {
+      "content": "<extra_id_68>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32032": {
+      "content": "<extra_id_67>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32033": {
+      "content": "<extra_id_66>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32034": {
+      "content": "<extra_id_65>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32035": {
+      "content": "<extra_id_64>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32036": {
+      "content": "<extra_id_63>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32037": {
+      "content": "<extra_id_62>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32038": {
+      "content": "<extra_id_61>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32039": {
+      "content": "<extra_id_60>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32040": {
+      "content": "<extra_id_59>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32041": {
+      "content": "<extra_id_58>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32042": {
+      "content": "<extra_id_57>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32043": {
+      "content": "<extra_id_56>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32044": {
+      "content": "<extra_id_55>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32045": {
+      "content": "<extra_id_54>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32046": {
+      "content": "<extra_id_53>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32047": {
+      "content": "<extra_id_52>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32048": {
+      "content": "<extra_id_51>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32049": {
+      "content": "<extra_id_50>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32050": {
+      "content": "<extra_id_49>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32051": {
+      "content": "<extra_id_48>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32052": {
+      "content": "<extra_id_47>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32053": {
+      "content": "<extra_id_46>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32054": {
+      "content": "<extra_id_45>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32055": {
+      "content": "<extra_id_44>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32056": {
+      "content": "<extra_id_43>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32057": {
+      "content": "<extra_id_42>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32058": {
+      "content": "<extra_id_41>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32059": {
+      "content": "<extra_id_40>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32060": {
+      "content": "<extra_id_39>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32061": {
+      "content": "<extra_id_38>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32062": {
+      "content": "<extra_id_37>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32063": {
+      "content": "<extra_id_36>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32064": {
+      "content": "<extra_id_35>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32065": {
+      "content": "<extra_id_34>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32066": {
+      "content": "<extra_id_33>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32067": {
+      "content": "<extra_id_32>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32068": {
+      "content": "<extra_id_31>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32069": {
+      "content": "<extra_id_30>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32070": {
+      "content": "<extra_id_29>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32071": {
+      "content": "<extra_id_28>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32072": {
+      "content": "<extra_id_27>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32073": {
+      "content": "<extra_id_26>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32074": {
+      "content": "<extra_id_25>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32075": {
+      "content": "<extra_id_24>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32076": {
+      "content": "<extra_id_23>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32077": {
+      "content": "<extra_id_22>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32078": {
+      "content": "<extra_id_21>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32079": {
+      "content": "<extra_id_20>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32080": {
+      "content": "<extra_id_19>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32081": {
+      "content": "<extra_id_18>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32082": {
+      "content": "<extra_id_17>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32083": {
+      "content": "<extra_id_16>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32084": {
+      "content": "<extra_id_15>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32085": {
+      "content": "<extra_id_14>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32086": {
+      "content": "<extra_id_13>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32087": {
+      "content": "<extra_id_12>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32088": {
+      "content": "<extra_id_11>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32089": {
+      "content": "<extra_id_10>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32090": {
+      "content": "<extra_id_9>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32091": {
+      "content": "<extra_id_8>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32092": {
+      "content": "<extra_id_7>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32093": {
+      "content": "<extra_id_6>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32094": {
+      "content": "<extra_id_5>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32095": {
+      "content": "<extra_id_4>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32096": {
+      "content": "<extra_id_3>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32097": {
+      "content": "<extra_id_2>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32098": {
+      "content": "<extra_id_1>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    },
+    "32099": {
+      "content": "<extra_id_0>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "legacy": true,
+  "model_max_length": 226,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

wav2vec/wav2vec2-base-960h/.gitattributes ADDED Viewed

	@@ -0,0 +1,18 @@

+*.bin.* filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tar.gz filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+model.safetensors filter=lfs diff=lfs merge=lfs -text

wav2vec/wav2vec2-base-960h/README.md ADDED Viewed

	@@ -0,0 +1,128 @@

+---
+language: en
+datasets:
+- librispeech_asr
+tags:
+- audio
+- automatic-speech-recognition
+- hf-asr-leaderboard
+license: apache-2.0
+widget:
+- example_title: Librispeech sample 1
+  src: https://cdn-media.huggingface.co/speech_samples/sample1.flac
+- example_title: Librispeech sample 2
+  src: https://cdn-media.huggingface.co/speech_samples/sample2.flac
+model-index:
+- name: wav2vec2-base-960h
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: LibriSpeech (clean)
+      type: librispeech_asr
+      config: clean
+      split: test
+      args:
+        language: en
+    metrics:
+    - name: Test WER
+      type: wer
+      value: 3.4
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: LibriSpeech (other)
+      type: librispeech_asr
+      config: other
+      split: test
+      args:
+        language: en
+    metrics:
+    - name: Test WER
+      type: wer
+      value: 8.6
+---
+# Wav2Vec2-Base-960h
+[Facebook's Wav2Vec2](https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/)
+The base model pretrained and fine-tuned on 960 hours of Librispeech on 16kHz sampled speech audio. When using the model
+make sure that your speech input is also sampled at 16Khz.
+[Paper](https://arxiv.org/abs/2006.11477)
+Authors: Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli
+**Abstract**
+We show for the first time that learning powerful representations from speech audio alone followed by fine-tuning on transcribed speech can outperform the best semi-supervised methods while being conceptually simpler. wav2vec 2.0 masks the speech input in the latent space and solves a contrastive task defined over a quantization of the latent representations which are jointly learned. Experiments using all labeled data of Librispeech achieve 1.8/3.3 WER on the clean/other test sets. When lowering the amount of labeled data to one hour, wav2vec 2.0 outperforms the previous state of the art on the 100 hour subset while using 100 times less labeled data. Using just ten minutes of labeled data and pre-training on 53k hours of unlabeled data still achieves 4.8/8.2 WER. This demonstrates the feasibility of speech recognition with limited amounts of labeled data.
+The original model can be found under https://github.com/pytorch/fairseq/tree/master/examples/wav2vec#wav2vec-20.
+# Usage
+To transcribe audio files the model can be used as a standalone acoustic model as follows:
+```python
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
+ from datasets import load_dataset
+ import torch
+ # load model and tokenizer
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+ # load dummy dataset and read soundfiles
+ ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
+ # tokenize
+ input_values = processor(ds[0]["audio"]["array"], return_tensors="pt", padding="longest").input_values  # Batch size 1
+ # retrieve logits
+ logits = model(input_values).logits
+ # take argmax and decode
+ predicted_ids = torch.argmax(logits, dim=-1)
+ transcription = processor.batch_decode(predicted_ids)
+ ```
+ ## Evaluation
+ This code snippet shows how to evaluate **facebook/wav2vec2-base-960h** on LibriSpeech's "clean" and "other" test data.
+```python
+from datasets import load_dataset
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import torch
+from jiwer import wer
+librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
+model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h").to("cuda")
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+def map_to_pred(batch):
+    input_values = processor(batch["audio"]["array"], return_tensors="pt", padding="longest").input_values
+    with torch.no_grad():
+        logits = model(input_values.to("cuda")).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)
+    batch["transcription"] = transcription
+    return batch
+result = librispeech_eval.map(map_to_pred, batched=True, batch_size=1, remove_columns=["audio"])
+print("WER:", wer(result["text"], result["transcription"]))
+```
+*Result (WER)*:
+| "clean" | "other" |
+|---|---|
+| 3.4 | 8.6 |

wav2vec/wav2vec2-base-960h/config.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "_name_or_path": "facebook/wav2vec2-base-960h",
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "codevector_dim": 256,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": false,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.1,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_attention_heads": 12,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 12,
+  "num_negatives": 100,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 256,
+  "transformers_version": "4.7.0.dev0",
+  "vocab_size": 32
+}

wav2vec/wav2vec2-base-960h/feature_extractor_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "do_normalize": true,
+  "feature_dim": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

wav2vec/wav2vec2-base-960h/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8aa76ab2243c81747a1f832954586bc566090c83a0ac167df6f31f0fa917d74a
+size 377607901

wav2vec/wav2vec2-base-960h/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "do_normalize": true,
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

wav2vec/wav2vec2-base-960h/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}

wav2vec/wav2vec2-base-960h/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "return_attention_mask": false, "do_normalize": true}

wav2vec/wav2vec2-base-960h/vocab.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "\|": 4, "E": 5, "T": 6, "A": 7, "O": 8, "N": 9, "I": 10, "H": 11, "S": 12, "R": 13, "D": 14, "L": 15, "U": 16, "M": 17, "W": 18, "C": 19, "F": 20, "G": 21, "Y": 22, "P": 23, "B": 24, "V": 25, "K": 26, "'": 27, "X": 28, "J": 29, "Q": 30, "Z": 31}