Fix Whisper transcription for stereo audio by converting to mono
Browse files- RunWhisper.cs +19 -1
RunWhisper.cs
CHANGED
|
@@ -103,8 +103,26 @@ public class RunWhisper : MonoBehaviour
|
|
| 103 |
{
|
| 104 |
numSamples = audioClip.samples;
|
| 105 |
var data = new float[maxSamples];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
numSamples = maxSamples;
|
| 107 |
-
audioClip.GetData(data, 0);
|
| 108 |
audioInput = new Tensor<float>(new TensorShape(1, numSamples), data);
|
| 109 |
}
|
| 110 |
|
|
|
|
| 103 |
{
|
| 104 |
numSamples = audioClip.samples;
|
| 105 |
var data = new float[maxSamples];
|
| 106 |
+
|
| 107 |
+
// Handle stereo to mono conversion
|
| 108 |
+
if (audioClip.channels == 2)
|
| 109 |
+
{
|
| 110 |
+
var stereoData = new float[numSamples * 2];
|
| 111 |
+
audioClip.GetData(stereoData, 0);
|
| 112 |
+
|
| 113 |
+
int monoSamples = Mathf.Min(numSamples, maxSamples);
|
| 114 |
+
for (int i = 0; i < monoSamples; i++)
|
| 115 |
+
{
|
| 116 |
+
data[i] = (stereoData[i * 2] + stereoData[i * 2 + 1]) / 2f;
|
| 117 |
+
}
|
| 118 |
+
}
|
| 119 |
+
else
|
| 120 |
+
{
|
| 121 |
+
numSamples = Mathf.Min(numSamples, maxSamples);
|
| 122 |
+
audioClip.GetData(data, 0);
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
numSamples = maxSamples;
|
|
|
|
| 126 |
audioInput = new Tensor<float>(new TensorShape(1, numSamples), data);
|
| 127 |
}
|
| 128 |
|