AI.cs
using System.Management.Automation;
using NAudio.Wave.SampleProviders; using NAudio.Wave; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using Whisper.net; using Whisper.net.Ggml; using Whisper.net.Logger; using Org.BouncyCastle.Crypto.IO; using System.CodeDom; using System.Runtime.Intrinsics.X86; using System.Drawing.Drawing2D; using System.IO; using SpotifyAPI.Web; using Org.BouncyCastle.Utilities.Zlib; using System.Management; using Microsoft.AspNetCore.Components.Forms; using System.Linq.Expressions; namespace GenXdev.Helpers { [Cmdlet(VerbsCommon.Get, "SpeechToText")] public class GetSpeechToText : Cmdlet { [Parameter(Position = 0, Mandatory = true)] public string ModelFilePath { get; set; } = null; [Parameter(Position = 1, Mandatory = false)] public string WaveFile { get; set; } = null; [Parameter(Position = 2, Mandatory = false, HelpMessage = "Sets the language to detect, defaults to 'auto'")] public string Language { get; set; } = "auto"; [Parameter(Position = 3, Mandatory = false, HelpMessage = "Returns objects instead of strings")] public SwitchParameter Passthru { get; set; } protected override void BeginProcessing() { base.BeginProcessing(); } // Rest of the code... protected override void ProcessRecord() { base.ProcessRecord(); var results = new StringBuilder(); var objects = new List<object>(); int physicalCoreCount = 0; var searcher = new ManagementObjectSearcher("select NumberOfCores from Win32_Processor"); foreach (var item in searcher.Get()) { physicalCoreCount += Convert.ToInt32(item["NumberOfCores"]); } Task.Run(async () => { // We declare three variables which we will use later, ggmlType, modelFileName and inputFileName var ggmlType = GgmlType.LargeV3Turbo; var modelFileName = Path.GetFullPath(Path.Combine(ModelFilePath, "ggml-largeV3Turbo.bin")); // This section detects whether the "ggml-base.bin" file exists in our project disk. If it doesn't, it downloads it from the internet if (!File.Exists(modelFileName)) { await DownloadModel(modelFileName, ggmlType); } // This section creates the whisperFactory object which is used to create the processor object. using var whisperFactory = WhisperFactory.FromPath(modelFileName); // This section creates the processor object which is used to process the audio data sampled from the default microphone, it uses language `auto` to detect the language of the audio. using var processor = whisperFactory.CreateBuilder() .WithLanguage(Language) .WithSegmentEventHandler((segment) => { // Do whetever you want with your segment here. lock (results) { results.Append($"{segment.Text} "); objects.Add(segment); } }) .Build(); // Optional logging from the native library //LogProvider.Instance.OnLog += (level, message) => // { // Console.WriteLine($"{level}: {message}"); // }; // This section initializes the default microphone input // This examples shows how to use Whisper.net to create a transcription from audio data sampled from the default microphone with 16Khz sample rate. // This section initializes the default microphone input if (WaveFile == null) { using var waveIn = new WaveInEvent(); waveIn.WaveFormat = new WaveFormat(16000, 1); // 16Khz sample rate, mono channel bool started = true; using var wavStream = new MemoryStream(); // Add logging to console to display the selected input audio device // Console.WriteLine($"Selected input audio device: {waveIn.DeviceNumber} - {WaveIn.GetCapabilities(waveIn.DeviceNumber).ProductName}"); waveIn.DataAvailable += (sender, args) => { if (!started) return; // This section processes the audio data and writes it to the MemoryStream lock (wavStream) { wavStream.Write(args.Buffer, 0, args.BytesRecorded); wavStream.Flush(); } }; // This section starts recording from the default microphone waveIn.StartRecording(); // This section waits for the user to press any key to stop recording Console.WriteLine("Press any key to stop recording..."); while (Console.KeyAvailable) { Console.ReadKey(); } while (!Console.KeyAvailable) { System.Threading.Thread.Sleep(100); if (Passthru) { lock (results) { foreach (var segment in objects) { WriteObject(segment); } objects.Clear(); results.Clear(); } } } while (Console.KeyAvailable) { Console.ReadKey(); } try { started = false; waveIn.StopRecording(); } catch { } Console.WriteLine("recording stopped, processing..."); lock (wavStream) { using var outputStream = new MemoryStream(); using var waveFileWriter = new WaveFileWriter(outputStream, waveIn.WaveFormat); wavStream.Position = 0; wavStream.CopyTo(waveFileWriter); wavStream.Flush(); wavStream.Position = 0; wavStream.SetLength(0); waveFileWriter.Flush(); outputStream.Position = 0; processor.Process(outputStream); } } else { using var stream = File.OpenRead(WaveFile); processor.Process(stream); } }).Wait(); if (Passthru) { foreach (var o in objects) { WriteObject(o); } return; } WriteObject(results.ToString()); } protected override void EndProcessing() { base.EndProcessing(); } private static async Task DownloadModel(string fileName, GgmlType ggmlType) { Console.WriteLine($"Downloading Model {fileName}"); using var modelStream = await WhisperGgmlDownloader.GetGgmlModelAsync(ggmlType); using var fileWriter = File.OpenWrite(fileName); await modelStream.CopyToAsync(fileWriter); } } } |