8000 GitHub - DimQ1/SileroVad
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

DimQ1/SileroVad

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

44 Commits
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Voice Activity Detection for .Net

will uppdated soon

Quick Start

using NAudio.Wave;
using NAudio.Wave.SampleProviders;
using SileroVad;

 public static class FileReader
    {
        private static int SAMPLE_RATE = 16000;
        private static Vad vad = new Vad();

        public static void VadFile(string filePath)
        {
            var ext = Path.GetExtension(filePath).ToLower();
            WaveStream waveFileReader;

            switch (ext)
            {
                case ".wav":
                    waveFileReader = new WaveFileReader(filePath);
                    break;
                case ".mp3":
                    waveFileReader = new Mp3FileReader(filePath);
                    break;
                default:
                    throw new NotSupportedException($"not supported {ext}");
            }

            var TotalTime = waveFileReader.TotalTime;

            ISampleProvider sampleProvider;

            if (waveFileReader.WaveFormat.SampleRate != SAMPLE_RATE)
            {
                sampleProvider = new WdlResamplingSampleProvider(waveFileReader.ToSampleProvider(), SAMPLE_RATE).ToMono();
            }
            else
            {
                sampleProvider = waveFileReader.ToSampleProvider();
            }

            var array = new float[CountSamples(TotalTime)];

            sampleProvider.Read(array, 0, array.Length);

            List<VadSpeech> resul = vad.GetSpeechTimestamps(array, min_silence_duration_ms: 500, threshold: 0.5f);

            var audioSpeech = VadHelper.GetSpeechSamples(array, resul);

            var fileTrim = Path.ChangeExtension(filePath, "speech") + ".wav";

            using var fileWriter = new WaveFileWriter(fileTrim, new WaveFormat(16000, 1));
            foreach (var sample in audioSpeech)
            {
                fileWriter.WriteSample(sample);
            }
            fileWriter.Flush();
            waveFileReader.Dispose();

        }

        private static int CountSamples(TimeSpan time)
        {
            WaveFormat waveFormat = new WaveFormat(16000, 1);

            return TimeSpanToSamples(time, waveFormat);
        }

        private static int TimeSpanToSamples(TimeSpan time, WaveFormat waveFormat)
        {
            return (int)(time.TotalSeconds * (double)waveFormat.SampleRate) * waveFormat.Channels;
        }
    }

References

[1] Silero Team. (2021). Silero VAD: pre-trained enterprise-grade Voice Activity Detector (VAD), Number Detector and Language Classifier. GitHub, GitHub repository, https://github.com/snakers4/silero-vad, hello@silero.ai.

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Packages

No packages published

Languages

0