ReceiveRealTimeSpeechToText.cs

using System.Management.Automation;
using NAudio.Wave;
using Whisper.net;
using Whisper.net.Ggml;
using System.Management;
using System.Collections.Concurrent;
[Cmdlet(VerbsCommunications.Receive, "RealTimeSpeechToText")]
public class ReceiveRealTimeSpeechToText : PSCmdlet
{
    #region Cmdlet Parameters
    [Parameter(Mandatory = false, HelpMessage = "Path to the model file")]
    public string ModelFileDirectoryPath { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Whether to use desktop audio capture instead of microphone")]
    public SwitchParameter UseDesktopAudioCapture { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Whether to use both desktop audio capture and recording device simultaneously")]
    public SwitchParameter UseDesktopAndRecordingDevice { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Use both desktop and recording device")]
    public string AudioDevice { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Returns objects instead of strings")]
    public SwitchParameter Passthru { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Whether to include token timestamps")]
    public SwitchParameter WithTokenTimestamps { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Sum threshold for token timestamps, defaults to 0.5")]
    public float TokenTimestampsSumThreshold { get; set; } = 0.5f;
    [Parameter(Mandatory = false, HelpMessage = "Whether to split on word boundaries")]
    public SwitchParameter SplitOnWord { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Maximum number of tokens per segment")]
    public int? MaxTokensPerSegment { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Whether to ignore silence (will mess up timestamps)")]
    public SwitchParameter IgnoreSilence { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Maximum duration of silence before automatically stopping recording")]
    public TimeSpan? MaxDurationOfSilence { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Silence detect threshold (0..32767 defaults to 30)")]
    [ValidateRange(0, 32767)]
    public int? SilenceThreshold { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Sets the input language to detect, defaults to 'en'")]
    public string LanguageIn { get; set; } = "en";
    [Parameter(Mandatory = false, HelpMessage = "Sets the output language")]
    public int CpuThreads { get; set; } = 0;
    [Parameter(Mandatory = false, HelpMessage = "Temperature for speech detection")]
    [ValidateRange(0, 1)]
    public float? Temperature { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Temperature increment")]
    [ValidateRange(0, 1)]
    public float? TemperatureInc { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Whether to translate the output")]
    public SwitchParameter WithTranslate { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Prompt to use for the model")]
    public string Prompt { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Regex to suppress tokens from the output")]
    public string SuppressRegex { get; set; } = null;
    [Parameter(Mandatory = false, HelpMessage = "Whether to show progress")]
    public SwitchParameter WithProgress { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Size of the audio context")]
    public int? AudioContextSize { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Whether to NOT suppress blank lines")]
    public SwitchParameter DontSuppressBlank { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Maximum duration of the audio")]
    public TimeSpan? MaxDuration { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Offset for the audio")]
    public TimeSpan? Offset { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Maximum number of last text tokens")]
    public int? MaxLastTextTokens { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Whether to use single segment only")]
    public SwitchParameter SingleSegmentOnly { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Whether to print special tokens")]
    public SwitchParameter PrintSpecialTokens { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Maximum segment length")]
    public int? MaxSegmentLength { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Start timestamps at this moment")]
    public TimeSpan? MaxInitialTimestamp { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Length penalty")]
    [ValidateRange(0, 1)]
    public float? LengthPenalty { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Entropy threshold")]
    [ValidateRange(0, 1)]
    public float? EntropyThreshold { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Log probability threshold")]
    [ValidateRange(0, 1)]
    public float? LogProbThreshold { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "No speech threshold")]
    [ValidateRange(0, 1)]
    public float? NoSpeechThreshold { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Don't use context")]
    public SwitchParameter NoContext { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Use beam search sampling strategy")]
    public SwitchParameter WithBeamSearchSamplingStrategy { get; set; }
    [Parameter(Mandatory = false, HelpMessage = "Whisper model type to use, defaults to Small")]
    public GgmlType ModelType { get; set; } = GgmlType.Small;
    #endregion
    private readonly ConcurrentQueue<SegmentData> _results = new();
    private readonly ConcurrentQueue<byte[]> _bufferQueue = new();
    private readonly ConcurrentQueue<ErrorRecord> _errorQueue = new();
    private readonly ConcurrentQueue<string> _verboseQueue = new();
    private CancellationTokenSource _cts;
    private WhisperProcessor _processor;
    private WhisperFactory _whisperFactory; // Keep reference for proper disposal
    private bool _isRecordingStarted = true;
    private bool _isDisposed = false;
    private readonly object _disposeLock = new object();
    private Task _processingTask;
    // Fields for dual audio stream support
    private IWaveIn _primaryWaveIn;
    private IWaveIn _secondaryWaveIn;
    private readonly ConcurrentQueue<byte[]> _primaryQueue = new();
    private readonly ConcurrentQueue<byte[]> _secondaryQueue = new();
    private Task _mixingTask;
    private readonly object _audioMixingLock = new object();
    private int _audioCallbackCount = 0;
    private int _bufferQueueCount = 0;
    private bool hadAudio;
    private bool everHadAudio;
    private double totalSilenceSeconds;
    private double seconds;
    private double sumSq;
    private long count;
    private int threshold;
    private MemoryStream wavBufferStream;
    private bool _isFallback = false;
    protected override void BeginProcessing()
    {
        base.BeginProcessing();
        if (string.IsNullOrEmpty(ModelFileDirectoryPath) || !Directory.Exists(ModelFileDirectoryPath))
        {
            var localAppData = System.Environment.GetEnvironmentVariable("LOCALAPPDATA");
            if (!string.IsNullOrEmpty(localAppData))
            {
                ModelFileDirectoryPath = Path.Combine(localAppData, "GenXdev.PowerShell");
            }
            if (!Directory.Exists(ModelFileDirectoryPath))
            {
                try
                {
                    Directory.CreateDirectory(ModelFileDirectoryPath);
                }
                catch (Exception ex)
                {
                    ThrowTerminatingError(new ErrorRecord(ex, "ModelPathCreationFailed", ErrorCategory.ResourceUnavailable, ModelFileDirectoryPath));
                }
            }
        }
        if (!MyInvocation.BoundParameters.ContainsKey("LanguageIn"))
        {
            LanguageIn = "auto";
        }
        // Only log parameters that were actually set by the user
        WriteVerbose($"ModelFileDirectoryPath: {ModelFileDirectoryPath}");
        if (MyInvocation.BoundParameters.ContainsKey("UseDesktopAudioCapture"))
            WriteVerbose($"UseDesktopAudioCapture: {UseDesktopAudioCapture}");
        if (MyInvocation.BoundParameters.ContainsKey("UseDesktopAndRecordingDevice"))
            WriteVerbose($"UseDesktopAndRecordingDevice: {UseDesktopAndRecordingDevice}");
        if (MyInvocation.BoundParameters.ContainsKey("AudioDevice"))
            WriteVerbose($"AudioDevice: {AudioDevice}");
        if (MyInvocation.BoundParameters.ContainsKey("Passthru"))
            WriteVerbose($"Passthru: {Passthru}");
        if (MyInvocation.BoundParameters.ContainsKey("WithTokenTimestamps"))
            WriteVerbose($"WithTokenTimestamps: {WithTokenTimestamps}");
        if (MyInvocation.BoundParameters.ContainsKey("TokenTimestampsSumThreshold"))
            WriteVerbose($"TokenTimestampsSumThreshold: {TokenTimestampsSumThreshold}");
        if (MyInvocation.BoundParameters.ContainsKey("SplitOnWord"))
            WriteVerbose($"SplitOnWord: {SplitOnWord}");
        if (MyInvocation.BoundParameters.ContainsKey("MaxTokensPerSegment"))
            WriteVerbose($"MaxTokensPerSegment: {MaxTokensPerSegment}");
        if (MyInvocation.BoundParameters.ContainsKey("IgnoreSilence"))
            WriteVerbose($"IgnoreSilence: {IgnoreSilence}");
        if (MyInvocation.BoundParameters.ContainsKey("MaxDurationOfSilence"))
            WriteVerbose($"MaxDurationOfSilence: {MaxDurationOfSilence}");
        if (MyInvocation.BoundParameters.ContainsKey("SilenceThreshold"))
            WriteVerbose($"SilenceThreshold: {SilenceThreshold}");
        if (MyInvocation.BoundParameters.ContainsKey("LanguageIn"))
            WriteVerbose($"LanguageIn: {LanguageIn}");
        if (MyInvocation.BoundParameters.ContainsKey("CpuThreads"))
            WriteVerbose($"CpuThreads: {CpuThreads}");
        if (MyInvocation.BoundParameters.ContainsKey("Temperature"))
            WriteVerbose($"Temperature: {Temperature}");
        if (MyInvocation.BoundParameters.ContainsKey("TemperatureInc"))
            WriteVerbose($"TemperatureInc: {TemperatureInc}");
        if (MyInvocation.BoundParameters.ContainsKey("WithTranslate"))
            WriteVerbose($"WithTranslate: {WithTranslate}");
        if (MyInvocation.BoundParameters.ContainsKey("Prompt"))
            WriteVerbose($"Prompt: {Prompt}");
        if (MyInvocation.BoundParameters.ContainsKey("SuppressRegex"))
            WriteVerbose($"SuppressRegex: {SuppressRegex}");
        if (MyInvocation.BoundParameters.ContainsKey("WithProgress"))
            WriteVerbose($"WithProgress: {WithProgress}");
        if (MyInvocation.BoundParameters.ContainsKey("AudioContextSize"))
            WriteVerbose($"AudioContextSize: {AudioContextSize}");
        if (MyInvocation.BoundParameters.ContainsKey("DontSuppressBlank"))
            WriteVerbose($"DontSuppressBlank: {DontSuppressBlank}");
        if (MyInvocation.BoundParameters.ContainsKey("MaxDuration"))
            WriteVerbose($"MaxDuration: {MaxDuration}");
        if (MyInvocation.BoundParameters.ContainsKey("Offset"))
            WriteVerbose($"Offset: {Offset}");
        if (MyInvocation.BoundParameters.ContainsKey("MaxLastTextTokens"))
            WriteVerbose($"MaxLastTextTokens: {MaxLastTextTokens}");
        if (MyInvocation.BoundParameters.ContainsKey("SingleSegmentOnly"))
            WriteVerbose($"SingleSegmentOnly: {SingleSegmentOnly}");
        if (MyInvocation.BoundParameters.ContainsKey("PrintSpecialTokens"))
            WriteVerbose($"PrintSpecialTokens: {PrintSpecialTokens}");
        if (MyInvocation.BoundParameters.ContainsKey("MaxSegmentLength"))
            WriteVerbose($"MaxSegmentLength: {MaxSegmentLength}");
        if (MyInvocation.BoundParameters.ContainsKey("MaxInitialTimestamp"))
            WriteVerbose($"MaxInitialTimestamp: {MaxInitialTimestamp}");
        if (MyInvocation.BoundParameters.ContainsKey("LengthPenalty"))
            WriteVerbose($"LengthPenalty: {LengthPenalty}");
        if (MyInvocation.BoundParameters.ContainsKey("EntropyThreshold"))
            WriteVerbose($"EntropyThreshold: {EntropyThreshold}");
        if (MyInvocation.BoundParameters.ContainsKey("LogProbThreshold"))
            WriteVerbose($"LogProbThreshold: {LogProbThreshold}");
        if (MyInvocation.BoundParameters.ContainsKey("NoSpeechThreshold"))
            WriteVerbose($"NoSpeechThreshold: {NoSpeechThreshold}");
        if (MyInvocation.BoundParameters.ContainsKey("NoContext"))
            WriteVerbose($"NoContext: {NoContext}");
        if (MyInvocation.BoundParameters.ContainsKey("WithBeamSearchSamplingStrategy"))
            WriteVerbose($"WithBeamSearchSamplingStrategy: {WithBeamSearchSamplingStrategy}");
        if (MyInvocation.BoundParameters.ContainsKey("ModelType"))
            WriteVerbose($"ModelType: {ModelType}");
        _cts = new CancellationTokenSource();
    }
    protected override void ProcessRecord()
    {
        base.ProcessRecord();
        // Initialize Whisper
        var ggmlType = ModelType;
        var modelFileName = Path.GetFullPath(Path.Combine(ModelFileDirectoryPath, GetModelFileName(ModelType)));
        if (!File.Exists(modelFileName))
        {
            DownloadModel(modelFileName, ggmlType).GetAwaiter().GetResult();
        }
        _whisperFactory = WhisperFactory.FromPath(modelFileName);
        var builder = ConfigureWhisperBuilder(_whisperFactory.CreateBuilder());
        _processor = builder.Build();
        // Create audio input(s) based on parameters
        if (UseDesktopAndRecordingDevice.IsPresent)
        {
            CreateDualAudioInputs();
        }
        else
        {
            _primaryWaveIn = CreateAudioInput();
        }
        // Handle single or dual audio inputs
        if (_secondaryWaveIn != null)
        {
            ProcessDualAudioInputs();
        }
        else
        {
            ProcessSingleAudioInput(_primaryWaveIn);
        }
    }
    private void CreateDualAudioInputs()
    {
        WriteVerbose("Setting up dual audio inputs: desktop audio capture and recording device");
        try
        {
            // Create desktop audio capture
            _primaryWaveIn = new WasapiLoopbackCapture();
            WriteVerbose("Desktop audio capture initialized");
            // Create recording device input
            _secondaryWaveIn = CreateRecordingDeviceInput();
            WriteVerbose("Recording device input initialized");
        }
        catch (Exception ex)
        {
            WriteError(new ErrorRecord(ex, "DualAudioSetupError", ErrorCategory.DeviceError, null));
            // Fallback to single input
            _primaryWaveIn?.Dispose();
            _secondaryWaveIn?.Dispose();
            _primaryWaveIn = CreateAudioInput();
            _secondaryWaveIn = null;
        }
    }
    private IWaveIn CreateRecordingDeviceInput()
    {
        if (!string.IsNullOrWhiteSpace(AudioDevice))
        {
            // Find microphone device by name/GUID with wildcard support
            WriteVerbose($"Looking for microphone device matching: {AudioDevice}");
            for (int i = 0; i < WaveIn.DeviceCount; i++)
            {
                try
                {
                    var deviceInfo = WaveIn.GetCapabilities(i);
                    if (IsDeviceMatch(deviceInfo.ProductName, AudioDevice) ||
                        IsDeviceMatch(deviceInfo.ProductGuid.ToString(), AudioDevice))
                    {
                        WriteVerbose($"Selected microphone device: {deviceInfo.ProductName}");
                        var waveIn = new WaveInEvent { DeviceNumber = i };
                        return waveIn;
                    }
                }
                catch (Exception ex)
                {
                    WriteVerbose($"Could not check device {i}: {ex.Message}");
                }
            }
            WriteWarning($"Microphone device '{AudioDevice}' not found, using default");
        }
        return new WaveInEvent();
    }
    private void ProcessDualAudioInputs()
    {
        _primaryWaveIn.WaveFormat = new WaveFormat(16000, 1);
        _secondaryWaveIn.WaveFormat = new WaveFormat(16000, 1);
        var processingTask = Task.Run(() => ProcessAudioBuffer());
        _processingTask = processingTask;
        _mixingTask = Task.Run(() => MixAudioBuffers());
        // Variables for silence detection
        hadAudio = false;
        everHadAudio = false;
        totalSilenceSeconds = 0;
        seconds = 0;
        sumSq = 0;
        count = 0;
        threshold = SilenceThreshold.HasValue ? SilenceThreshold.Value : 30;
        wavBufferStream = new MemoryStream();
        // Set up event handlers for both audio inputs
        _primaryWaveIn.DataAvailable += (sender, args) =>
        {
            if (!_isRecordingStarted || _isDisposed) return;
            lock (_audioMixingLock)
            {
                if (args.BytesRecorded > 0)
                {
                    var buffer = new byte[args.BytesRecorded];
                    Array.Copy(args.Buffer, buffer, args.BytesRecorded);
                    _primaryQueue.Enqueue(buffer);
                    _audioCallbackCount++;
                }
            }
        };
        _secondaryWaveIn.DataAvailable += (sender, args) =>
        {
            if (!_isRecordingStarted || _isDisposed) return;
            lock (_audioMixingLock)
            {
                if (args.BytesRecorded > 0)
                {
                    var buffer = new byte[args.BytesRecorded];
                    Array.Copy(args.Buffer, buffer, args.BytesRecorded);
                    _secondaryQueue.Enqueue(buffer);
                    _audioCallbackCount++;
                }
            }
        };
        try
        {
            _primaryWaveIn.StartRecording();
            _secondaryWaveIn.StartRecording();
            ProcessMainLoop();
        }
        finally
        {
            try
            {
                _primaryWaveIn?.StopRecording();
                _secondaryWaveIn?.StopRecording();
            }
            catch (Exception ex)
            {
                WriteVerbose($"Error stopping dual audio inputs: {ex.Message}");
            }
            WaitForProcessingCompletion();
        }
    }
    private async Task MixAudioBuffers()
    {
        while (!_cts.IsCancellationRequested && !_isDisposed)
        {
            byte[] primaryBuffer = null;
            byte[] secondaryBuffer = null;
            // Try to dequeue from both, with timeout
            var dequeueTasks = new[]
            {
                    Task.Run(() => _primaryQueue.TryDequeue(out primaryBuffer)),
                    Task.Run(() => _secondaryQueue.TryDequeue(out secondaryBuffer))
                };
            await Task.WhenAny(Task.WhenAll(dequeueTasks), Task.Delay(100, _cts.Token));
            if (primaryBuffer == null && secondaryBuffer == null)
            {
                await Task.Delay(50, _cts.Token);
                continue;
            }
            // If only one is available, create a zero buffer for the other
            int targetLength = Math.Max(primaryBuffer?.Length ?? 0, secondaryBuffer?.Length ?? 0);
            if (primaryBuffer == null)
            {
                primaryBuffer = new byte[targetLength];
            }
            else if (secondaryBuffer == null)
            {
                secondaryBuffer = new byte[targetLength];
            }
            else if (primaryBuffer.Length != secondaryBuffer.Length)
            {
                // Resize to max and pad with zeros
                if (primaryBuffer.Length < targetLength)
                {
                    Array.Resize(ref primaryBuffer, targetLength);
                }
                else if (secondaryBuffer.Length < targetLength)
                {
                    Array.Resize(ref secondaryBuffer, targetLength);
                }
            }
            // Mix
            var mixedBuffer = MixAudioStreams(primaryBuffer, secondaryBuffer);
            WriteVerbose($"Mixed buffer length: {mixedBuffer.Length}, primary: {primaryBuffer.Length}, secondary: {secondaryBuffer.Length}");
            // Process the mixed buffer
            lock (_audioMixingLock)
            {
                if (!_isRecordingStarted || _isDisposed) return;
                if (MaxDurationOfSilence.HasValue || IgnoreSilence.IsPresent)
                {
                    seconds += mixedBuffer.Length / 32000d;
                    count += mixedBuffer.Length / 2;
                    unsafe
                    {
                        fixed (byte* buffer = mixedBuffer)
                        {
                            var floatBuffer = (Int16*)buffer;
                            var sampleCount = mixedBuffer.Length / 2;
                            for (var i = 0; i < sampleCount; i++)
                            {
                                sumSq += floatBuffer[i] * floatBuffer[i];
                            }
                        }
                    }
                    wavBufferStream.Write(mixedBuffer, 0, mixedBuffer.Length);
                    wavBufferStream.Flush();
                    var rms = Math.Sqrt(sumSq / count);
                    if (rms > threshold)
                    {
                        hadAudio = true;
                        totalSilenceSeconds = 0;
                        everHadAudio = true;
                    }
                    if (seconds > 0.85)
                    {
                        if (!_isRecordingStarted) return;
                        if (rms < threshold)
                        {
                            totalSilenceSeconds += seconds;
                            if (everHadAudio && MaxDurationOfSilence.HasValue && (totalSilenceSeconds > MaxDurationOfSilence.Value.TotalSeconds))
                            {
                                _isRecordingStarted = false;
                                _cts.Cancel();
                                return;
                            }
                            if (IgnoreSilence.IsPresent && !hadAudio)
                            {
                                count = 0;
                                sumSq = 0;
                                seconds = 0;
                                hadAudio = false;
                                wavBufferStream.Position = 0;
                                wavBufferStream.SetLength(0);
                                return;
                            }
                            hadAudio = false;
                        }
                        // Add buffer to queue for processing
                        wavBufferStream.Position = 0;
                        var buffer = new byte[wavBufferStream.Length];
                        wavBufferStream.Read(buffer, 0, buffer.Length);
                        _bufferQueue.Enqueue(buffer);
                        _bufferQueueCount++;
                        wavBufferStream.Position = 0;
                        wavBufferStream.SetLength(0);
                        count = 0;
                        sumSq = 0;
                        seconds = 0;
                    }
                }
                else
                {
                    // When not using silence detection, directly add to buffer queue
                    if (mixedBuffer.Length > 0)
                    {
                        _bufferQueue.Enqueue(mixedBuffer);
                        _bufferQueueCount++;
                    }
                }
            }
        }
    }
    private byte[] MixAudioStreams(byte[] primaryBuffer, byte[] secondaryBuffer)
    {
        int length = Math.Min(primaryBuffer.Length, secondaryBuffer.Length);
        var mixedBuffer = new byte[length];
        unsafe
        {
            fixed (byte* primaryPtr = primaryBuffer)
            fixed (byte* secondaryPtr = secondaryBuffer)
            fixed (byte* mixedPtr = mixedBuffer)
            {
                var primarySamples = (Int16*)primaryPtr;
                var secondarySamples = (Int16*)secondaryPtr;
                var mixedSamples = (Int16*)mixedPtr;
                int sampleCount = length / 2;
                for (int i = 0; i < sampleCount; i++)
                {
                    int mixed = (int)primarySamples[i] + (int)secondarySamples[i];
                    // Clamp to 16-bit range
                    if (mixed > Int16.MaxValue) mixed = Int16.MaxValue;
                    if (mixed < Int16.MinValue) mixed = Int16.MinValue;
                    mixedSamples[i] = (Int16)mixed;
                }
            }
        }
        // Simple volume normalization: compute RMS and scale if too loud/quiet
        double rmsSum = 0;
        unsafe
        {
            fixed (byte* mixedPtr = mixedBuffer)
            {
                var samples = (Int16*)mixedPtr;
                for (int i = 0; i < length / 2; i++)
                {
                    rmsSum += samples[i] * samples[i];
                }
            }
        }
        double rms = Math.Sqrt(rmsSum / (length / 2));
        double targetRms = 10000; // Arbitrary target, ~30% of max amplitude
        if (rms > 0)
        {
            double gain = targetRms / rms;
            if (gain < 1.0 || gain > 2.0) // Limit gain to avoid amplifying noise too much
            {
                gain = Math.Clamp(gain, 0.5, 2.0);
                unsafe
                {
                    fixed (byte* mixedPtr = mixedBuffer)
                    {
                        var samples = (Int16*)mixedPtr;
                        for (int i = 0; i < length / 2; i++)
                        {
                            int adjusted = (int)(samples[i] * gain);
                            if (adjusted > Int16.MaxValue) adjusted = Int16.MaxValue;
                            if (adjusted < Int16.MinValue) adjusted = Int16.MinValue;
                            samples[i] = (Int16)adjusted;
                        }
                    }
                }
            }
        }
        return mixedBuffer;
    }
    private void ProcessSingleAudioInput(IWaveIn waveIn)
    {
        using (waveIn)
        {
            waveIn.WaveFormat = new WaveFormat(16000, 1);
            var processingTask = Task.Run(() => ProcessAudioBuffer());
            _processingTask = processingTask;
            // Variables for silence detection
            hadAudio = false;
            everHadAudio = false;
            totalSilenceSeconds = 0;
            seconds = 0;
            sumSq = 0;
            count = 0;
            threshold = SilenceThreshold.HasValue ? SilenceThreshold.Value : 30;
            wavBufferStream = new MemoryStream();
            waveIn.DataAvailable += (sender, args) =>
            {
                if (!_isRecordingStarted || _isDisposed) return;
                _audioCallbackCount++;
                lock (_audioMixingLock)
                {
                    if (!_isRecordingStarted || _isDisposed) return;
                    // Validate buffer bounds before processing
                    if (args.Buffer == null || args.BytesRecorded <= 0 || args.BytesRecorded > args.Buffer.Length)
                    {
                        return;
                    }
                    if (MaxDurationOfSilence.HasValue || IgnoreSilence.IsPresent)
                    {
                        seconds += args.BytesRecorded / 32000d;
                        count += args.BytesRecorded / 2;
                        unsafe
                        {
                            fixed (byte* buffer = args.Buffer)
                            {
                                var floatBuffer = (Int16*)buffer;
                                var sampleCount = Math.Min(args.BytesRecorded / 2, args.Buffer.Length / 2);
                                for (var i = 0; i < sampleCount; i++)
                                {
                                    sumSq += floatBuffer[i] * floatBuffer[i];
                                }
                            }
                        }
                        wavBufferStream.Write(args.Buffer, 0, args.BytesRecorded);
                        wavBufferStream.Flush();
                        var rms = Math.Sqrt(sumSq / count);
                        if (rms > threshold)
                        {
                            hadAudio = true;
                            totalSilenceSeconds = 0;
                            everHadAudio = true;
                        }
                        if (seconds > 0.85)
                        {
                            if (!_isRecordingStarted) return;
                            if (rms < threshold)
                            {
                                totalSilenceSeconds += seconds;
                                if (everHadAudio && MaxDurationOfSilence.HasValue && (totalSilenceSeconds > MaxDurationOfSilence.Value.TotalSeconds))
                                {
                                    _isRecordingStarted = false;
                                    _cts.Cancel();
                                    return;
                                }
                                if (IgnoreSilence.IsPresent && !hadAudio)
                                {
                                    // Ignoring silence
                                    count = 0;
                                    sumSq = 0;
                                    seconds = 0;
                                    hadAudio = false;
                                    wavBufferStream.Position = 0;
                                    wavBufferStream.SetLength(0);
                                    return;
                                }
                                hadAudio = false;
                            }
                            // Add buffer to queue for processing
                            wavBufferStream.Position = 0;
                            var buffer = new byte[wavBufferStream.Length];
                            wavBufferStream.Read(buffer, 0, buffer.Length);
                            _bufferQueue.Enqueue(buffer);
                            _bufferQueueCount++;
                            wavBufferStream.Position = 0;
                            wavBufferStream.SetLength(0);
                            count = 0;
                            sumSq = 0;
                            seconds = 0;
                        }
                    }
                    else
                    {
                        // When not using silence detection, directly add to buffer queue
                        if (args.BytesRecorded > 0 && args.Buffer != null)
                        {
                            var buffer = new byte[args.BytesRecorded];
                            Array.Copy(args.Buffer, buffer, args.BytesRecorded);
                            _bufferQueue.Enqueue(buffer);
                            _bufferQueueCount++;
                        }
                    }
                }
            };
            try
            {
                waveIn.StartRecording();
                ProcessMainLoop();
            }
            catch (Exception ex)
            {
                if (!_isFallback && !UseDesktopAudioCapture.IsPresent && string.IsNullOrWhiteSpace(AudioDevice))
                {
                    WriteWarning("No standard input device was found and switching to desktop audio.");
                    waveIn.Dispose();
                    _isFallback = true;
                    var desktopAudio = new WasapiLoopbackCapture();
                    ProcessSingleAudioInput(desktopAudio);
                    return;
                }
                WriteError(new ErrorRecord(ex, "AudioInputError", ErrorCategory.DeviceError, null));
                throw;
            }
            finally
            {
                try
                {
                    waveIn.StopRecording();
                }
                catch (Exception ex)
                {
                    WriteVerbose($"Error stopping wave input: {ex.Message}");
                }
                WaitForProcessingCompletion();
            }
        }
    }
    private void ProcessMainLoop()
    {
        Console.WriteLine("Recording started. Press Q to stop...");
        var startTime = System.DateTime.UtcNow;
        while (!_cts.IsCancellationRequested && _isRecordingStarted)
        {
            try
            {
                if (Console.KeyAvailable)
                {
                    var keyInfo = Console.ReadKey(true);
                    if (keyInfo.Key == ConsoleKey.Q)
                    {
                        _isRecordingStarted = false;
                        _cts.Cancel();
                        break;
                    }
                }
                if (MaxDuration.HasValue && (System.DateTime.UtcNow - startTime) > MaxDuration.Value)
                {
                    Console.WriteLine($"Max recording time of {MaxDuration.Value.TotalSeconds} seconds reached.");
                    _isRecordingStarted = false;
                    _cts.Cancel();
                    break;
                }
                // Process all queued messages in the main thread
                while (_errorQueue.TryDequeue(out var errorRecord))
                {
                    WriteError(errorRecord);
                }
                while (_verboseQueue.TryDequeue(out var verboseMessage))
                {
                    WriteVerbose(verboseMessage);
                }
                while (_results.TryDequeue(out var segment))
                {
                    WriteObject(Passthru ? segment : segment.Text.Trim());
                }
                Thread.Sleep(100);
            }
            catch (InvalidOperationException)
            {
                // Console input not available, continue
                Thread.Sleep(100);
            }
            catch (Exception ex)
            {
                WriteError(new ErrorRecord(ex, "MainLoopError", ErrorCategory.OperationStopped, null));
                break;
            }
        }
    }
    private void WaitForProcessingCompletion()
    {
        // Wait for processing to complete with proper timeout and error handling
        bool taskCompleted = false;
        try
        {
            // First try to wait gracefully
            taskCompleted = _processingTask.Wait(TimeSpan.FromSeconds(5));
            if (!taskCompleted)
            {
                // If task doesn't complete, cancel and wait a bit more
                _cts.Cancel();
                taskCompleted = _processingTask.Wait(TimeSpan.FromSeconds(3));
            }
        }
        catch (AggregateException ex)
        {
            // Handle task exceptions
            foreach (var innerEx in ex.InnerExceptions)
            {
                if (!(innerEx is OperationCanceledException))
                {
                    WriteVerbose($"Processing task error: {innerEx.Message}");
                }
            }
        }
        catch (Exception ex)
        {
            WriteVerbose($"Error waiting for processing task: {ex.Message}");
        }
        // Wait for mixing task if exists
        if (_mixingTask != null)
        {
            try
            {
                _mixingTask.Wait(TimeSpan.FromSeconds(5));
            }
            catch (Exception ex)
            {
                WriteVerbose($"Error waiting for mixing task: {ex.Message}");
            }
        }
        // Process any remaining results regardless of task completion
        int timeout = 0;
        while (timeout < 50) // Max 5 seconds
        {
            bool hasResults = false;
            // Process all queued messages in the main thread during cleanup
            while (_errorQueue.TryDequeue(out var errorRecord))
            {
                WriteError(errorRecord);
                hasResults = true;
            }
            while (_verboseQueue.TryDequeue(out var verboseMessage))
            {
                hasResults = true;
            }
            while (_results.TryDequeue(out var segment))
            {
                WriteObject(Passthru ? segment : segment.Text.Trim());
                hasResults = true;
            }
            if (!hasResults && taskCompleted)
            {
                break; // No more results and task is done
            }
            Thread.Sleep(100);
            timeout++;
        }
    }
    private IWaveIn CreateAudioInput()
    {
        if (UseDesktopAudioCapture.IsPresent)
        {
            if (!string.IsNullOrWhiteSpace(AudioDevice))
            {
                WriteVerbose($"Looking for desktop audio device matching: {AudioDevice}");
                // For desktop audio capture, we use the default device but can log the attempt
                WriteWarning($"Desktop audio device selection by name is not supported in this NAudio version. Using default desktop audio capture.");
            }
            return new WasapiLoopbackCapture();
        }
        else
        {
            if (!string.IsNullOrWhiteSpace(AudioDevice))
            {
                // Find microphone device by name/GUID with wildcard support
                WriteVerbose($"Looking for microphone device matching: {AudioDevice}");
                for (int i = 0; i < WaveIn.DeviceCount; i++)
                {
                    try
                    {
                        var deviceInfo = WaveIn.GetCapabilities(i);
                        if (IsDeviceMatch(deviceInfo.ProductName, AudioDevice) ||
                            IsDeviceMatch(deviceInfo.ProductGuid.ToString(), AudioDevice))
                        {
                            WriteVerbose($"Selected microphone device: {deviceInfo.ProductName}");
                            var waveIn = new WaveInEvent { DeviceNumber = i };
                            return waveIn;
                        }
                    }
                    catch (Exception ex)
                    {
                        WriteVerbose($"Could not check device {i}: {ex.Message}");
                    }
                }
                WriteWarning($"Microphone device '{AudioDevice}' not found, using default");
            }
            return new WaveInEvent();
        }
    }
    private bool IsDeviceMatch(string deviceName, string pattern)
    {
        if (string.IsNullOrWhiteSpace(deviceName) || string.IsNullOrWhiteSpace(pattern))
            return false;
        // Convert wildcards to regex pattern
        string regexPattern = "^" + System.Text.RegularExpressions.Regex.Escape(pattern)
            .Replace("\\*", ".*")
            .Replace("\\?", ".") + "$";
        return System.Text.RegularExpressions.Regex.IsMatch(deviceName, regexPattern,
            System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    }
    private WhisperProcessorBuilder ConfigureWhisperBuilder(WhisperProcessorBuilder builder)
    {
        int physicalCoreCount = 0;
        var searcher = new ManagementObjectSearcher("select NumberOfCores from Win32_Processor");
        foreach (var item in searcher.Get())
        {
            physicalCoreCount += Convert.ToInt32(item["NumberOfCores"]);
        }
        builder.WithLanguage(LanguageIn)
               .WithThreads(CpuThreads > 0 ? CpuThreads : physicalCoreCount);
        // Check for LanguageIn to enable WithTranslate
        if (MyInvocation.BoundParameters.ContainsKey("LanguageIn"))
        {
            builder.WithTranslate();
        }
        // Improved speech detection settings
        if (Temperature.HasValue)
        {
            builder.WithTemperature(Temperature.Value);
        }
        else
        {
            builder.WithTemperature(0.0f); // Lower temperature for more consistent results
        }
        if (TemperatureInc.HasValue) builder.WithTemperatureInc(TemperatureInc.Value);
        if (WithTokenTimestamps.IsPresent) builder.WithTokenTimestamps().WithTokenTimestampsSumThreshold(TokenTimestampsSumThreshold);
        if (WithTranslate.IsPresent) builder.WithTranslate();
        if (!string.IsNullOrWhiteSpace(Prompt)) builder.WithPrompt(Prompt);
        if (!string.IsNullOrWhiteSpace(SuppressRegex)) builder.WithSuppressRegex(SuppressRegex);
        if (WithProgress.IsPresent)
        {
            builder.WithProgressHandler(progress => WriteProgress(new ProgressRecord(1, "Processing", $"Progress: {progress}%") { PercentComplete = progress }));
        }
        if (SplitOnWord.IsPresent) builder.SplitOnWord();
        if (MaxTokensPerSegment.HasValue) builder.WithMaxTokensPerSegment(MaxTokensPerSegment.Value);
        // Improved silence/speech detection
        if (IgnoreSilence.IsPresent)
        {
            builder.WithNoSpeechThreshold(0.4f); // More sensitive to speech (lower = more sensitive)
        }
        else if (NoSpeechThreshold.HasValue)
        {
            builder.WithNoSpeechThreshold(NoSpeechThreshold.Value);
        }
        else
        {
            builder.WithNoSpeechThreshold(0.6f); // Default Whisper threshold
        }
        if (AudioContextSize.HasValue) builder.WithAudioContextSize(AudioContextSize.Value);
        if (DontSuppressBlank.IsPresent) builder.WithoutSuppressBlank();
        if (MaxDuration.HasValue) builder.WithDuration(MaxDuration.Value);
        if (Offset.HasValue) builder.WithOffset(Offset.Value);
        if (MaxLastTextTokens.HasValue) builder.WithMaxLastTextTokens(MaxLastTextTokens.Value);
        if (SingleSegmentOnly.IsPresent) builder.WithSingleSegment();
        if (PrintSpecialTokens.IsPresent) builder.WithPrintSpecialTokens();
        if (MaxSegmentLength.HasValue) builder.WithMaxSegmentLength(MaxSegmentLength.Value);
        if (MaxInitialTimestamp.HasValue) builder.WithMaxInitialTs((int)MaxInitialTimestamp.Value.TotalSeconds);
        if (LengthPenalty.HasValue) builder.WithLengthPenalty(LengthPenalty.Value);
        if (EntropyThreshold.HasValue) builder.WithEntropyThreshold(EntropyThreshold.Value);
        if (LogProbThreshold.HasValue) builder.WithLogProbThreshold(LogProbThreshold.Value);
        if (NoContext.IsPresent) builder.WithNoContext();
        if (WithBeamSearchSamplingStrategy.IsPresent) builder.WithBeamSearchSamplingStrategy();
        return builder;
    }
    private async Task ProcessAudioBuffer()
    {
        using var processingStream = new MemoryStream();
        bool isProcessing = false;
        try
        {
            while ((!_cts.IsCancellationRequested || _bufferQueue.Count > 0) && !_isDisposed)
            {
                try
                {
                    if (_bufferQueue.TryDequeue(out var buffer))
                    {
                        // Validate buffer before processing
                        if (buffer == null || buffer.Length == 0)
                        {
                            continue;
                        }
                        processingStream.Write(buffer, 0, buffer.Length);
                        // Increased threshold for better speech recognition
                        // 48000 bytes = ~3 seconds of audio (16kHz * 1 channel * 2 bytes * 3 seconds)
                        if (!isProcessing && processingStream.Length >= 48000)
                        {
                            var audioDurationSeconds = processingStream.Length / 32000.0; // 16kHz * 2 bytes
                            isProcessing = true;
                            // Convert raw PCM data to WAV format that Whisper can understand
                            using var wavStream = ConvertPcmToWav(processingStream.ToArray(), 16000, 1, 16);
                            wavStream.Position = 0;
                            try
                            {
                                int segmentCount = 0;
                                await foreach (var segment in _processor.ProcessAsync(wavStream, _cts.Token))
                                {
                                    if (_cts.IsCancellationRequested || _isDisposed)
                                    {
                                        break;
                                    }
                                    segmentCount++;
                                    if (!string.IsNullOrWhiteSpace(segment.Text))
                                    {
                                        if (!(segment.Text.Trim("\r\n\t ".ToCharArray()) == "[BLANK_AUDIO]"))
                                        {
                                            _results.Enqueue(segment);
                                        }
                                    }
                                }
                            }
                            catch (OperationCanceledException)
                            {
                                break;
                            }
                            catch (Exception ex) when (!_isDisposed)
                            {
                                // Queue error for main thread to process
                                _errorQueue.Enqueue(new ErrorRecord(ex, "WhisperProcessingError", ErrorCategory.OperationStopped, null));
                            }
                            processingStream.SetLength(0);
                            isProcessing = false;
                        }
                    }
                    else
                    {
                        // If we have data but not enough for a full segment, process it anyway when stopping
                        if (!_isRecordingStarted && processingStream.Length > 0 && !isProcessing && !_isDisposed)
                        {
                            var audioDurationSeconds = processingStream.Length / 32000.0;
                            isProcessing = true;
                            // Convert raw PCM data to WAV format for final processing
                            using var wavStream = ConvertPcmToWav(processingStream.ToArray(), 16000, 1, 16);
                            wavStream.Position = 0;
                            try
                            {
                                int segmentCount = 0;
                                await foreach (var segment in _processor.ProcessAsync(wavStream, _cts.Token))
                                {
                                    if (_cts.IsCancellationRequested || _isDisposed)
                                    {
                                        break;
                                    }
                                    segmentCount++;
                                    if (!string.IsNullOrWhiteSpace(segment.Text))
                                    {
                                        _results.Enqueue(segment);
                                    }
                                }
                            }
                            catch (OperationCanceledException)
                            {
                                break;
                            }
                            catch (Exception ex) when (!_isDisposed)
                            {
                                // Queue error for main thread to process
                                _errorQueue.Enqueue(new ErrorRecord(ex, "FinalProcessingError", ErrorCategory.OperationStopped, null));
                            }
                            processingStream.SetLength(0);
                            isProcessing = false;
                        }
                        await Task.Delay(50, _cts.Token);
                    }
                }
                catch (OperationCanceledException)
                {
                    break;
                }
                catch (Exception ex) when (!(ex is OperationCanceledException) && !_isDisposed)
                {
                    // Queue error for main thread to process instead of calling WriteError directly
                    _errorQueue.Enqueue(new ErrorRecord(ex, "ProcessingError", ErrorCategory.OperationStopped, null));
                    break;
                }
            }
        }
        catch when (!_isDisposed)
        {
        }
    }
    private MemoryStream ConvertPcmToWav(byte[] pcmData, int sampleRate, int channels, int bitsPerSample)
    {
        var wavStream = new MemoryStream();
        int bytesPerSample = bitsPerSample / 8;
        int byteRate = sampleRate * channels * bytesPerSample;
        int blockAlign = channels * bytesPerSample;
        // Write WAV header
        // "RIFF" chunk descriptor
        wavStream.Write(System.Text.Encoding.ASCII.GetBytes("RIFF"), 0, 4);
        wavStream.Write(BitConverter.GetBytes(36 + pcmData.Length), 0, 4); // File size - 8
        wavStream.Write(System.Text.Encoding.ASCII.GetBytes("WAVE"), 0, 4);
        // "fmt " sub-chunk
        wavStream.Write(System.Text.Encoding.ASCII.GetBytes("fmt "), 0, 4);
        wavStream.Write(BitConverter.GetBytes(16), 0, 4); // Sub-chunk size
        wavStream.Write(BitConverter.GetBytes((short)1), 0, 2); // Audio format (1 = PCM)
        wavStream.Write(BitConverter.GetBytes((short)channels), 0, 2); // Number of channels
        wavStream.Write(BitConverter.GetBytes(sampleRate), 0, 4); // Sample rate
        wavStream.Write(BitConverter.GetBytes(byteRate), 0, 4); // Byte rate
        wavStream.Write(BitConverter.GetBytes((short)blockAlign), 0, 2); // Block align
        wavStream.Write(BitConverter.GetBytes((short)bitsPerSample), 0, 2); // Bits per sample
                                                                            // "data" sub-chunk
        wavStream.Write(System.Text.Encoding.ASCII.GetBytes("data"), 0, 4);
        wavStream.Write(BitConverter.GetBytes(pcmData.Length), 0, 4); // Data size
        wavStream.Write(pcmData, 0, pcmData.Length); // The actual audio data
        return wavStream;
    }
    protected override void EndProcessing()
    {
        lock (_disposeLock)
        {
            if (_isDisposed) return;
            _isDisposed = true;
        }
        try
        {
            // Stop recording first to prevent new audio data
            _isRecordingStarted = false;
            // Cancel any ongoing operations
            if (_cts != null && !_cts.IsCancellationRequested)
            {
                _cts.Cancel();
            }
            // Wait for processing task to complete with timeout
            if (_processingTask != null && !_processingTask.IsCompleted)
            {
                try
                {
                    var completed = _processingTask.Wait(TimeSpan.FromSeconds(50));
                    if (!completed)
                    {
                        WriteVerbose("Processing task did not complete within timeout");
                    }
                }
                catch (AggregateException ex)
                {
                    // Expected when task is cancelled
                    WriteVerbose($"Processing task cancelled: {ex.InnerExceptions.FirstOrDefault()?.Message}");
                }
                catch (Exception ex)
                {
                    WriteVerbose($"Error waiting for processing task: {ex.Message}");
                }
            }
            // Wait for mixing task if exists
            if (_mixingTask != null && !_mixingTask.IsCompleted)
            {
                try
                {
                    _mixingTask.Wait(TimeSpan.FromSeconds(50));
                }
                catch (AggregateException ex)
                {
                    WriteVerbose($"Mixing task cancelled: {ex.InnerExceptions.FirstOrDefault()?.Message}");
                }
                catch (Exception ex)
                {
                    WriteVerbose($"Error waiting for mixing task: {ex.Message}");
                }
            }
            // Dispose audio inputs
            try
            {
                _primaryWaveIn?.Dispose();
                _secondaryWaveIn?.Dispose();
            }
            catch (Exception ex)
            {
                WriteVerbose($"Error disposing audio inputs: {ex.Message}");
            }
            // Dispose wavBufferStream
            try
            {
                wavBufferStream?.Dispose();
            }
            catch (Exception ex)
            {
                WriteVerbose($"Error disposing wavBufferStream: {ex.Message}");
            }
            // Dispose Whisper processor first (it uses the factory)
            if (_processor != null)
            {
                try
                {
                    // Properly dispose async resources
                    if (_processor is IAsyncDisposable asyncDisposable)
                    {
                        // Wait for async disposal to complete
                        asyncDisposable.DisposeAsync().AsTask().Wait(TimeSpan.FromSeconds(50));
                    }
                    else if (_processor is IDisposable disposable)
                    {
                        disposable.Dispose();
                    }
                }
                catch (Exception ex)
                {
                    WriteVerbose($"Error disposing Whisper processor: {ex.Message}");
                }
                finally
                {
                    _processor = null;
                }
            }
            // Then dispose Whisper factory
            if (_whisperFactory != null)
            {
                try
                {
                    _whisperFactory.Dispose();
                }
                catch (Exception ex)
                {
                    WriteVerbose($"Error disposing Whisper factory: {ex.Message}");
                }
                finally
                {
                    _whisperFactory = null;
                }
            }
            // Finally dispose cancellation token source
            if (_cts != null)
            {
                try
                {
                    _cts.Dispose();
                }
                catch (Exception ex)
                {
                    WriteVerbose($"Error disposing cancellation token source: {ex.Message}");
                }
                finally
                {
                    _cts = null;
                }
            }
        }
        catch (Exception ex)
        {
            WriteVerbose($"Error in EndProcessing: {ex.Message}");
        }
        base.EndProcessing();
    }
    private static async Task DownloadModel(string fileName, GgmlType ggmlType)
    {
        Console.WriteLine($"Downloading Model {fileName}");
        using var modelStream = await WhisperGgmlDownloader.GetGgmlModelAsync(ggmlType);
        using var fileWriter = File.OpenWrite(fileName);
        await modelStream.CopyToAsync(fileWriter);
    }
    private static string GetModelFileName(GgmlType modelType)
    {
        return $"ggml-{modelType}.bin";
    }
}