Functions/GenXdev.AI.Queries/Start-AudioTranscription.ps1

<##############################################################################
Part of PowerShell module : GenXdev.AI.Queries
Original cmdlet filename : Start-AudioTranscription.ps1
Original author : René Vaessen / GenXdev
Version : 3.24.2026
################################################################################
Copyright (c) René Vaessen / GenXdev
 
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
 
    http://www.apache.org/licenses/LICENSE-2.0
 
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
################################################################################>

###############################################################################
<#
.SYNOPSIS
Transcribes an audio file, video file, or a recording device to text
 
.DESCRIPTION
Transcribes an audio file, video file, or a recording device to text using
the Whisper AI model. The function can handle various audio and video formats,
convert them to the appropriate format for transcription, and optionally
translate the output to a different language. Supports SRT subtitle format
output and various audio processing parameters for fine-tuning the
transcription quality.
 
.PARAMETER Input
The file path of the audio or video file to transcribe. Accepts FileInfo
objects or file paths from pipeline. If not provided, records from microphone.
 
.PARAMETER AudioDevice
Audio device name or GUID (supports wildcards, picks first match).
 
.PARAMETER LanguageIn
The language to expect in the audio. E.g. "English", "French", "German",
"Dutch"
 
.PARAMETER LanguageOut
The language to translate to. E.g. "french", "german", "dutch"
 
.PARAMETER WithTokenTimestamps
Whether to include token timestamps in the output.
 
.PARAMETER TokenTimestampsSumThreshold
Sum threshold for token timestamps, defaults to 0.5.
 
.PARAMETER SplitOnWord
Whether to split on word boundaries.
 
.PARAMETER MaxTokensPerSegment
Maximum number of tokens per segment.
 
.PARAMETER IgnoreSilence
Whether to ignore silence (will mess up timestamps).
 
.PARAMETER MaxDurationOfSilence
Maximum duration of silence before automatically stopping recording.
 
.PARAMETER SilenceThreshold
Silence detect threshold (0..32767 defaults to 30)
 
.PARAMETER CpuThreads
Number of CPU threads to use, defaults to 0 (auto).
 
.PARAMETER Temperature
Temperature for speech recognition.
 
.PARAMETER TemperatureInc
Temperature increment.
 
.PARAMETER Prompt
Prompt to use for the model.
 
.PARAMETER SuppressRegex
Regex to suppress tokens from the output.
 
.PARAMETER WithProgress
Whether to show progress.
 
.PARAMETER AudioContextSize
Size of the audio context.
 
.PARAMETER DontSuppressBlank
Whether to NOT suppress blank lines.
 
.PARAMETER MaxDuration
Maximum duration of the audio.
 
.PARAMETER Offset
Offset for the audio.
 
.PARAMETER MaxLastTextTokens
Maximum number of last text tokens.
 
.PARAMETER SingleSegmentOnly
Whether to use single segment only.
 
.PARAMETER PrintSpecialTokens
Whether to print special tokens.
 
.PARAMETER MaxSegmentLength
Maximum segment length.
 
.PARAMETER MaxInitialTimestamp
Start timestamps at this moment.
 
.PARAMETER LengthPenalty
Length penalty.
 
.PARAMETER EntropyThreshold
Entropy threshold.
 
.PARAMETER LogProbThreshold
Log probability threshold.
 
.PARAMETER NoSpeechThreshold
No speech threshold.
 
.PARAMETER NoContext
Don't use context.
 
.PARAMETER WithBeamSearchSamplingStrategy
Use beam search sampling strategy.
 
.PARAMETER ModelType
Whisper model type to use, defaults to LargeV3Turbo.
 
.PARAMETER SRT
Output in SRT format.
 
.PARAMETER PassThru
Returns objects instead of strings.
 
.PARAMETER UseDesktopAudioCapture
Whether to use desktop audio capture instead of microphone input
 
.PARAMETER SessionOnly
Use alternative settings stored in session for AI preferences like Language,
Image collections, etc.
 
.PARAMETER ClearSession
Clear alternative settings stored in session for AI preferences like Language,
Image collections, etc.
 
.PARAMETER PreferencesDatabasePath
Database path for preference data files.
 
.PARAMETER SkipSession
Dont use alternative settings stored in session for AI preferences like
Language, Image collections, etc.
 
.PARAMETER VOX
Use silence detection to automatically stop recording
 
.EXAMPLE
Start-AudioTranscription -Input "C:\path\to\audio.wav" `
    -LanguageIn "English" -LanguageOut "French" -SRT
 
.EXAMPLE
transcribefile "C:\video.mp4" "English"
 
.EXAMPLE
Get-ChildItem "*.mp4" | Start-AudioTranscription -LanguageIn "English"
 
.EXAMPLE
Start-AudioTranscription # Records from microphone when no file specified
###############################################################################>

function Start-AudioTranscription {

    [CmdletBinding(SupportsShouldProcess = $true)]
    [Alias('transcribefile', 'transcribe')]

    param(
        ###########################################################################
        [Alias("WaveFile", "FilePath", "MediaFile")]
        [Parameter(
            Mandatory = $false,
            Position = 0,
            ValueFromPipeline = $true,
            HelpMessage = 'The file path of the audio or video file to transcribe. If not provided, records from microphone.'
        )]
        [object] $Input,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Audio device name or GUID (supports wildcards, picks first match)'
        )]
        [string] $AudioDevice,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            Position = 1,
            HelpMessage = 'The language to expect in the audio.'
        )]
        [ValidateSet(
            'Afrikaans',
            'Akan',
            'Albanian',
            'Amharic',
            'Arabic',
            'Armenian',
            'Azerbaijani',
            'Basque',
            'Belarusian',
            'Bemba',
            'Bengali',
            'Bihari',
            'Bork, bork, bork!',
            'Bosnian',
            'Breton',
            'Bulgarian',
            'Cambodian',
            'Catalan',
            'Cherokee',
            'Chichewa',
            'Chinese (Simplified)',
            'Chinese (Traditional)',
            'Corsican',
            'Croatian',
            'Czech',
            'Danish',
            'Dutch',
            'Elmer Fudd',
            'English',
            'Esperanto',
            'Estonian',
            'Ewe',
            'Faroese',
            'Filipino',
            'Finnish',
            'French',
            'Frisian',
            'Ga',
            'Galician',
            'Georgian',
            'German',
            'Greek',
            'Guarani',
            'Gujarati',
            'Hacker',
            'Haitian Creole',
            'Hausa',
            'Hawaiian',
            'Hebrew',
            'Hindi',
            'Hungarian',
            'Icelandic',
            'Igbo',
            'Indonesian',
            'Interlingua',
            'Irish',
            'Italian',
            'Japanese',
            'Javanese',
            'Kannada',
            'Kazakh',
            'Kinyarwanda',
            'Kirundi',
            'Klingon',
            'Kongo',
            'Korean',
            'Krio (Sierra Leone)',
            'Kurdish',
            'Kurdish (Soranî)',
            'Kyrgyz',
            'Laothian',
            'Latin',
            'Latvian',
            'Lingala',
            'Lithuanian',
            'Lozi',
            'Luganda',
            'Luo',
            'Macedonian',
            'Malagasy',
            'Malay',
            'Malayalam',
            'Maltese',
            'Maori',
            'Marathi',
            'Mauritian Creole',
            'Moldavian',
            'Mongolian',
            'Montenegrin',
            'Nepali',
            'Nigerian Pidgin',
            'Northern Sotho',
            'Norwegian',
            'Norwegian (Nynorsk)',
            'Occitan',
            'Oriya',
            'Oromo',
            'Pashto',
            'Persian',
            'Pirate',
            'Polish',
            'Portuguese (Brazil)',
            'Portuguese (Portugal)',
            'Punjabi',
            'Quechua',
            'Romanian',
            'Romansh',
            'Runyakitara',
            'Russian',
            'Scots Gaelic',
            'Serbian',
            'Serbo-Croatian',
            'Sesotho',
            'Setswana',
            'Seychellois Creole',
            'Shona',
            'Sindhi',
            'Sinhalese',
            'Slovak',
            'Slovenian',
            'Somali',
            'Spanish',
            'Spanish (Latin American)',
            'Sundanese',
            'Swahili',
            'Swedish',
            'Tajik',
            'Tamil',
            'Tatar',
            'Telugu',
            'Thai',
            'Tigrinya',
            'Tonga',
            'Tshiluba',
            'Tumbuka',
            'Turkish',
            'Turkmen',
            'Twi',
            'Uighur',
            'Ukrainian',
            'Urdu',
            'Uzbek',
            'Vietnamese',
            'Welsh',
            'Wolof',
            'Xhosa',
            'Yiddish',
            'Yoruba',
            'Zulu'
        )]
        [string] $LanguageIn,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            Position = 2,
            HelpMessage = 'Sets the language to translate to.'
        )]
        [ValidateSet(
            'Afrikaans',
            'Akan',
            'Albanian',
            'Amharic',
            'Arabic',
            'Armenian',
            'Azerbaijani',
            'Basque',
            'Belarusian',
            'Bemba',
            'Bengali',
            'Bihari',
            'Bork, bork, bork!',
            'Bosnian',
            'Breton',
            'Bulgarian',
            'Cambodian',
            'Catalan',
            'Cherokee',
            'Chichewa',
            'Chinese (Simplified)',
            'Chinese (Traditional)',
            'Corsican',
            'Croatian',
            'Czech',
            'Danish',
            'Dutch',
            'Elmer Fudd',
            'English',
            'Esperanto',
            'Estonian',
            'Ewe',
            'Faroese',
            'Filipino',
            'Finnish',
            'French',
            'Frisian',
            'Ga',
            'Galician',
            'Georgian',
            'German',
            'Greek',
            'Guarani',
            'Gujarati',
            'Hacker',
            'Haitian Creole',
            'Hausa',
            'Hawaiian',
            'Hebrew',
            'Hindi',
            'Hungarian',
            'Icelandic',
            'Igbo',
            'Indonesian',
            'Interlingua',
            'Irish',
            'Italian',
            'Japanese',
            'Javanese',
            'Kannada',
            'Kazakh',
            'Kinyarwanda',
            'Kirundi',
            'Klingon',
            'Kongo',
            'Korean',
            'Krio (Sierra Leone)',
            'Kurdish',
            'Kurdish (Soranî)',
            'Kyrgyz',
            'Laothian',
            'Latin',
            'Latvian',
            'Lingala',
            'Lithuanian',
            'Lozi',
            'Luganda',
            'Luo',
            'Macedonian',
            'Malagasy',
            'Malay',
            'Malayalam',
            'Maltese',
            'Maori',
            'Marathi',
            'Mauritian Creole',
            'Moldavian',
            'Mongolian',
            'Montenegrin',
            'Nepali',
            'Nigerian Pidgin',
            'Northern Sotho',
            'Norwegian',
            'Norwegian (Nynorsk)',
            'Occitan',
            'Oriya',
            'Oromo',
            'Pashto',
            'Persian',
            'Pirate',
            'Polish',
            'Portuguese (Brazil)',
            'Portuguese (Portugal)',
            'Punjabi',
            'Quechua',
            'Romanian',
            'Romansh',
            'Runyakitara',
            'Russian',
            'Scots Gaelic',
            'Serbian',
            'Serbo-Croatian',
            'Sesotho',
            'Setswana',
            'Seychellois Creole',
            'Shona',
            'Sindhi',
            'Sinhalese',
            'Slovak',
            'Slovenian',
            'Somali',
            'Spanish',
            'Spanish (Latin American)',
            'Sundanese',
            'Swahili',
            'Swedish',
            'Tajik',
            'Tamil',
            'Tatar',
            'Telugu',
            'Thai',
            'Tigrinya',
            'Tonga',
            'Tshiluba',
            'Tumbuka',
            'Turkish',
            'Turkmen',
            'Twi',
            'Uighur',
            'Ukrainian',
            'Urdu',
            'Uzbek',
            'Vietnamese',
            'Welsh',
            'Wolof',
            'Xhosa',
            'Yiddish',
            'Yoruba',
            'Zulu'
        )]
        [string] $LanguageOut = $null,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Sum threshold for token timestamps, defaults to 0.5'
        )]
        [float] $TokenTimestampsSumThreshold = 0.5,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Maximum number of tokens per segment'
        )]
        [int] $MaxTokensPerSegment,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = ('Maximum duration of silence before automatically ' +
                'stopping recording')
        )]
        [object] $MaxDurationOfSilence,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Silence detect threshold (0..32767 defaults to 30)'
        )]
        [ValidateRange(0, 32767)]
        [int] $SilenceThreshold,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Number of CPU threads to use, defaults to 0 (auto)'
        )]
        [int] $CpuThreads = 0,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Temperature for speech recognition'
        )]
        [ValidateRange(0, 1)]
        [float] $Temperature = 0.5,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Temperature increment'
        )]
        [ValidateRange(0, 1)]
        [float] $TemperatureInc,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Prompt to use for the model'
        )]
        [string] $Prompt,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Regex to suppress tokens from the output'
        )]
        [string] $SuppressRegex = $null,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Size of the audio context'
        )]
        [int] $AudioContextSize,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Maximum duration of the audio'
        )]
        [object] $MaxDuration,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Offset for the audio'
        )]
        [object] $Offset,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Maximum number of last text tokens'
        )]
        [int] $MaxLastTextTokens,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Maximum segment length'
        )]
        [int] $MaxSegmentLength,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Start timestamps at this moment'
        )]
        [object] $MaxInitialTimestamp,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Length penalty'
        )]
        [ValidateRange(0, 1)]
        [float] $LengthPenalty,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Entropy threshold'
        )]
        [ValidateRange(0, 1)]
        [float] $EntropyThreshold,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Log probability threshold'
        )]
        [ValidateRange(0, 1)]
        [float] $LogProbThreshold,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'No speech threshold'
        )]
        [ValidateRange(0, 1)]
        [float] $NoSpeechThreshold,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Database path for preference data files'
        )]
        [Alias('DatabasePath')]
        [string] $PreferencesDatabasePath,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Whether to include token timestamps in the output'
        )]
        [switch] $WithTokenTimestamps,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Whether to split on word boundaries'
        )]
        [switch] $SplitOnWord,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Whether to ignore silence (will mess up timestamps)'
        )]
        [switch] $IgnoreSilence,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Whether to show progress'
        )]
        [switch] $WithProgress,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Whether to NOT suppress blank lines'
        )]
        [switch] $DontSuppressBlank,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Whether to use single segment only'
        )]
        [switch] $SingleSegmentOnly,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Whether to print special tokens'
        )]
        [switch] $PrintSpecialTokens,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Don't use context"
        )]
        [switch] $NoContext,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Use beam search sampling strategy'
        )]
        [switch] $WithBeamSearchSamplingStrategy,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Whisper model type to use, defaults to LargeV3Turbo'
        )]
        [ValidateSet('Tiny', 'TinyEn', 'Base', 'BaseEn', 'Small', 'SmallEn', 'Medium', 'MediumEn', 'LargeV1', 'LargeV2', 'LargeV3', 'LargeV3Turbo')]
        [string] $ModelType,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Output in SRT format.'
        )]
        [switch] $SRT,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Returns objects instead of strings'
        )]
        [Alias('pt')]
        [switch]$PassThru,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = ('Whether to use desktop audio capture instead of ' +
                'microphone input')
        )]
        [switch] $UseDesktopAudioCapture,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Use both desktop and recording device"
        )]
        [switch] $UseDesktopAndRecordingDevice,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = ('Use alternative settings stored in session for AI ' +
                'preferences like Language, Image collections, etc')
        )]
        [switch] $SessionOnly,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = ('Clear alternative settings stored in session for ' +
                'AI preferences like Language, Image collections, etc')
        )]
        [switch] $ClearSession,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = ('Dont use alternative settings stored in session ' +
                'for AI preferences like Language, Image ' +
                'collections, etc')
        )]
        [Alias('FromPreferences')]
        [switch] $SkipSession,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = 'Use silence detection to automatically stop recording'
        )]
        [switch] $VOX
    )

    begin {

        # store PSBoundParameters in a variable to avoid nested function issues
        $myPSBoundParameters = $PSBoundParameters

        # configure voice activation detection (VOX) settings
        if ($VOX -eq $true) {

            # output vox configuration message for verbose logging
            Microsoft.PowerShell.Utility\Write-Verbose (
                'Configuring VOX settings'
            )

            # set default max duration of silence for vox
            if (-not $myPSBoundParameters.ContainsKey('MaxDurationOfSilence')) {

                $null = $myPSBoundParameters.Add(
                    'MaxDurationOfSilence',
                    [System.TimeSpan]::FromSeconds(4)
                )
            }
            else {
                $myPSBoundParameters['MaxDurationOfSilence'] = (
                    [System.TimeSpan]::FromSeconds(4)
                )
            }

            # # enable ignore silence for vox mode
            # if (-not $myPSBoundParameters.ContainsKey('IgnoreSilence')) {

            # $null = $myPSBoundParameters.Add('IgnoreSilence', $true)
            # }
            # else {
            # $myPSBoundParameters['IgnoreSilence'] = $true
            # }

            # remove vox parameter as it's processed
            if ($myPSBoundParameters.ContainsKey('VOX')) {

                $null = $myPSBoundParameters.Remove('VOX')
            }
        }

        # determine if translation should be performed based on user intent
        # only translate when languageout parameter is explicitly provided by user
        # and it's different from languagein
        if ($PSBoundParameters.ContainsKey('LanguageOut') -and `
            ($LanguageIn -ne $LanguageOut)) {

            $skipTranslation = $false
            Microsoft.PowerShell.Utility\Write-Verbose (
                'Translation enabled: LanguageOut parameter provided and ' +
                'differs from LanguageIn'
            )
        }
        else {

            $skipTranslation = $true
            if ($PSBoundParameters.ContainsKey('LanguageOut')) {

                Microsoft.PowerShell.Utility\Write-Verbose (
                    "Translation skipped: LanguageIn and LanguageOut are " +
                    "identical ('${LanguageIn}')"
                )
            }
            else {

                Microsoft.PowerShell.Utility\Write-Verbose (
                    'Translation skipped: LanguageOut parameter not provided'
                )
            }
        }

        # copy identical parameter values for ai meta language helper function
        $params = GenXdev.FileSystem\Copy-IdenticalParamValues `
            -BoundParameters $myPSBoundParameters `
            -FunctionName 'GenXdev.AI\Get-AIMetaLanguage' `
            -DefaultValues (Microsoft.PowerShell.Utility\Get-Variable `
                -Scope Local -ErrorAction SilentlyContinue)

        # resolve the input language to a standard format
        if (-not [string]::IsNullOrWhiteSpace($LanguageIn)) {

            $LanguageIn = GenXdev.AI\Get-AIMetaLanguage @params `
                -Language $LanguageIn
        }
        else {

            $LanguageIn = GenXdev.AI\Get-AIMetaLanguage @params
        }

        # resolve the output language to a standard format (only if translation is needed)
        if (-not $skipTranslation) {

            if (-not [string]::IsNullOrWhiteSpace($LanguageOut)) {

                try {

                    $LanguageOut = GenXdev.AI\Get-AIMetaLanguage @params `
                        -Language $LanguageOut
                }
                catch {

                    Microsoft.PowerShell.Utility\Write-Verbose (
                        "Failed to resolve LanguageOut '$LanguageOut': $PSItem"
                    )
                    $skipTranslation = $true
                }
            }
        }

        # convert maxdurationofsilence to timespan if it's not already
        if ($myPSBoundParameters.ContainsKey('MaxDurationOfSilence') -and `
            (-not ($MaxDurationOfSilence -is [System.TimeSpan]))) {

            $MaxDurationOfSilence = [System.TimeSpan]::FromSeconds(
                $MaxDurationOfSilence
            )
            $myPSBoundParameters['MaxDurationOfSilence'] = $MaxDurationOfSilence
        }

        # convert maxduration to timespan if it's not already
        if ($myPSBoundParameters.ContainsKey('MaxDuration') -and `
            (-not ($MaxDuration -is [System.TimeSpan]))) {

            $MaxDuration = [System.TimeSpan]::FromSeconds($MaxDuration)
            $myPSBoundParameters['MaxDuration'] = $MaxDuration
        }

        # convert offset to timespan if it's not already
        if ($myPSBoundParameters.ContainsKey('Offset') -and `
            (-not ($Offset -is [System.TimeSpan]))) {

            $Offset = [System.TimeSpan]::FromSeconds($Offset)
            $myPSBoundParameters['Offset'] = $Offset
        }

        # convert maxinitialtimestamp to timespan if it's not already
        if ($myPSBoundParameters.ContainsKey('MaxInitialTimestamp') -and `
            (-not ($MaxInitialTimestamp -is [System.TimeSpan]))) {

            $MaxInitialTimestamp = [System.TimeSpan]::FromSeconds(
                $MaxInitialTimestamp
            )
            $myPSBoundParameters['MaxInitialTimestamp'] = $MaxInitialTimestamp
        }

        # locate the ffmpeg executable path in winget installation directory
        # try multiple possible locations for ffmpeg
        $ffmpegPath = $null

        # first try the symlink location (fastest approach)
        $symlinkPath = "${env:LOCALAPPDATA}\Microsoft\WinGet\Links\ffmpeg.exe"
        if ([System.IO.File]::Exists($symlinkPath)) {

            $ffmpegPath = $symlinkPath
        }

        # fallback to recursive search in winget directory
        if ([string]::IsNullOrEmpty($ffmpegPath)) {

            $ffmpegPath = (Microsoft.PowerShell.Management\Get-ChildItem `
                    -LiteralPath "${env:LOCALAPPDATA}\Microsoft\WinGet" `
                    -Filter "ffmpeg.exe" `
                    -Recurse -ErrorAction SilentlyContinue |
                    Microsoft.PowerShell.Utility\Select-Object -First 1 |
                    Microsoft.PowerShell.Core\ForEach-Object FullName)
        }

        # initialize script-scope variables for input tracking
        $script:InputProvided = $false

        # add language parameter if languagein was specified
        if ($myPSBoundParameters.ContainsKey('LanguageIn')) {

            $null = $myPSBoundParameters.Add('Language', $LanguageIn)
        }

        # remove withtranslate parameter if it exists (legacy cleanup)
        if ($myPSBoundParameters.ContainsKey('WithTranslate')) {

            $null = $myPSBoundParameters.Remove('WithTranslate', $true)
        }

        # handle srt format parameter dependencies
        if (($SRT -eq $true) -and `
            (-not $myPSBoundParameters.ContainsKey('PassThru'))) {

            $null = $myPSBoundParameters.Add('PassThru', $true)
        }
        else {

            if ((-not $SRT) -and $myPSBoundParameters.ContainsKey('PassThru')) {

                $null = $myPSBoundParameters.Remove('PassThru')
            }
        }

        # ensure error action is set to stop for proper error handling
        # if (-not $myPSBoundParameters.ContainsKey('ErrorAction')) {

        # $null = $myPSBoundParameters.Add('ErrorAction', 'Stop')
        # }

        # set cpu threads if not specified by user
        if (-not $myPSBoundParameters.ContainsKey('CpuThreads')) {

            $null = $myPSBoundParameters.Add('CpuThreads', `
                (GenXdev.AI\Get-NumberOfCpuCores))
        }
    }

    process {

        # collect input items from both parameter and pipeline
        $inputItems = @()

        # first, check if input parameter was provided
        if ($myPSBoundParameters.ContainsKey('Input')) {

            $inputItems += $myPSBoundParameters['Input']
        }

        # then, collect any pipeline input
        $Input |
            Microsoft.PowerShell.Core\ForEach-Object {

                $inputItems += $PSItem
            }

        # process each input item
        $inputItems |
            Microsoft.PowerShell.Core\ForEach-Object {

                $currentInput = $PSItem

                # convert input to file path based on object type
                $filePathString = if ($currentInput -is [string] -and `
                        -not [string]::IsNullOrWhiteSpace($currentInput)) {

                    GenXdev.FileSystem\Expand-Path $currentInput
                }
                elseif ($currentInput -is [System.IO.FileInfo]) {

                    $currentInput.FullName
                }
                elseif ($currentInput -and `
                        $currentInput.PSObject.Properties['FullName']) {

                    $currentInput.FullName
                }
                elseif ($currentInput -and `
                        $currentInput.PSObject.Properties['Path']) {

                    GenXdev.FileSystem\Expand-Path $currentInput.Path
                }
                else {

                    $null
                }

                # skip if no valid file path
                if ([string]::IsNullOrWhiteSpace($filePathString)) {

                    return
                }

                # mark that we have input, user does not want to record
                $script:InputProvided = $true

                # check if file exists
                if (-not [System.IO.File]::Exists($filePathString)) {

                    Microsoft.PowerShell.Utility\Write-Warning (
                        "File not found: '${filePathString}'"
                    )
                    return
                }

                # define helper function to check if winget powershell client is installed
                function IsWinGetInstalled {

                    # try to import the winget client module
                    Microsoft.PowerShell.Core\Import-Module `
                        'Microsoft.WinGet.Client' `
                        -ErrorAction SilentlyContinue

                    # check if the module was successfully loaded
                    $moduleObj = Microsoft.PowerShell.Core\Get-Module `
                        'Microsoft.WinGet.Client' -ErrorAction SilentlyContinue

                    if ($null -eq $moduleObj) {

                        return $false
                    }

                    return $true
                }

                # define helper function to install winget powershell client
                function InstallWinGet {

                    Microsoft.PowerShell.Utility\Write-Verbose `
                        'Installing WinGet PowerShell client..'

                    # check for installation consent before proceeding
                    $consent = GenXdev.FileSystem\Confirm-InstallationConsent `
                        -ApplicationName 'Microsoft.WinGet.Client' `
                        -Source 'PowerShell Gallery' `
                        -Description 'PowerShell module for WinGet package management, required for automated FFmpeg installation' `
                        -Publisher 'Microsoft'

                    if (-not $consent) {
                        throw 'Installation consent denied for Microsoft.WinGet.Client module. Cannot proceed with automatic package installation.'
                    }

                    # install the winget client module
                    PowerShellGet\Install-Module 'Microsoft.WinGet.Client' `
                        -Force -AllowClobber

                    # import the newly installed module
                    Microsoft.PowerShell.Core\Import-Module `
                        'Microsoft.WinGet.Client'
                }

                # define helper function to install ffmpeg using winget
                function InstallFFmpeg {

                    # check if ffmpeg is already installed
                    if ([System.IO.File]::Exists($ffmpegPath)) {

                        return
                    }

                    # ensure winget is installed before proceeding
                    if (-not (IsWinGetInstalled)) {

                        InstallWinGet
                    }

                    # define the ffmpeg package identifier
                    $ffmpeg = 'Gyan.FFmpeg'

                    # check if ffmpeg package is available
                    $ffmpegPackage = Microsoft.WinGet.Client\Get-WinGetPackage `
                        -Id $ffmpeg

                    # install ffmpeg if not found
                    if ($null -eq $ffmpegPackage) {

                        Microsoft.PowerShell.Utility\Write-Verbose `
                            'Installing ffmpeg..'

                        # check for installation consent before proceeding
                        $consent = GenXdev.FileSystem\Confirm-InstallationConsent `
                            -ApplicationName 'FFmpeg' `
                            -Source 'WinGet' `
                            -Description 'Audio/video processing library required for converting media files to formats compatible with speech recognition' `
                            -Publisher 'Gyan'

                        if (-not $consent) {
                            throw 'Installation consent denied for FFmpeg. Cannot proceed with media file conversion for transcription.'
                        }

                        try {

                            # attempt to install using winget client module
                            Microsoft.WinGet.Client\Install-WinGetPackage `
                                -Id $ffmpeg `
                                -Force
                        }
                        catch {

                            # fallback to winget command line tool
                            winget install $ffmpeg
                        }

                        # update the ffmpeg path after installation
                        # try multiple possible locations for ffmpeg
                        $ffmpegPath = $null

                        # first try the symlink location (fastest)
                        $symlinkPath = (
                            "${env:LOCALAPPDATA}\Microsoft\WinGet\Links\ffmpeg.exe"
                        )
                        if ([System.IO.File]::Exists($symlinkPath)) {

                            $ffmpegPath = $symlinkPath
                        }

                        # fallback to recursive search in winget directory
                        if ([string]::IsNullOrEmpty($ffmpegPath)) {

                            $ffmpegPath = (
                                Microsoft.PowerShell.Management\Get-ChildItem `
                                    -Path "${env:LOCALAPPDATA}\Microsoft\WinGet" `
                                    -Filter "ffmpeg.exe" `
                                    -Recurse -ErrorAction SilentlyContinue |
                                    Microsoft.PowerShell.Utility\Select-Object `
                                        -First 1 |
                                    Microsoft.PowerShell.Core\ForEach-Object `
                                        FullName
                                )
                            }
                        }
                    }

                    # ensure ffmpeg is installed before proceeding
                    $null = InstallFFmpeg

                    # expand the input file path to absolute path
                    $inputFile = GenXdev.FileSystem\Expand-Path $filePathString

                    # create a temporary wav file for conversion
                    $outputFile = [System.IO.Path]::GetTempFileName() + '.wav'

                    # inform user about the conversion process
                    Microsoft.PowerShell.Utility\Write-Verbose (
                        "Converting the file '$inputFile' to WAV format.."
                    )

                    # locate ffmpeg path in case it's not passed correctly
                    # try multiple possible locations for ffmpeg
                    if ([string]::IsNullOrEmpty($ffmpegPath)) {

                        # first try the symlink location (fastest)
                        $symlinkPath = (
                            "${env:LOCALAPPDATA}\Microsoft\WinGet\Links\ffmpeg.exe"
                        )
                        if ([System.IO.File]::Exists($symlinkPath)) {

                            $ffmpegPath = $symlinkPath
                        }

                        # fallback to recursive search in winget directory
                        if ([string]::IsNullOrEmpty($ffmpegPath)) {

                            $ffmpegPath = (
                                Microsoft.PowerShell.Management\Get-ChildItem `
                                    -Path "${env:LOCALAPPDATA}\Microsoft\WinGet" `
                                    -Filter "ffmpeg.exe" `
                                    -Recurse -ErrorAction SilentlyContinue |
                                    Microsoft.PowerShell.Utility\Select-Object `
                                        -First 1 |
                                    Microsoft.PowerShell.Core\ForEach-Object `
                                        FullName
                                )
                            }
                        }

                        # convert file to wav with specific audio parameters for whisper
                        & $ffmpegPath -i "$inputFile" -ac 1 -ar 16000 `
                            -sample_fmt s16 "$outputFile" -loglevel quiet -y

                        # check if the conversion was successful
                        $success = $LASTEXITCODE -eq 0

                        # handle conversion failure
                        if (-not $success) {

                            Microsoft.PowerShell.Utility\Write-Verbose (
                                "Failed to convert the file '$inputFile' to WAV format."
                            )

                            # clean up the temporary file if it exists
                            if ([System.IO.File]::Exists($outputFile)) {

                                $null = Microsoft.PowerShell.Management\Remove-Item `
                                    -LiteralPath $outputFile -Force
                            }

                            return
                        }

                        # inform user about the transcription process
                        Microsoft.PowerShell.Utility\Write-Verbose (
                            "Processing audio file: " +
                            "$(GenXdev.FileSystem\Find-Item $inputFile -NoRecurse)"
                        )

                        # check if modeltype was not explicitly set by user
                        if (-not $myPSBoundParameters.ContainsKey("ModelType")) {

                            # use most accurate model for batch processing
                            $ModelType = 'LargeV3Turbo'

                            # add modeltype to bound parameters for downstream functions
                            $null = $myPSBoundParameters.Add('ModelType', $ModelType)
                        }

                        # set the input file for the transcription engine
                        $invocationArguments = GenXdev.FileSystem\Copy-IdenticalParamValues `
                            -BoundParameters $myPSBoundParameters `
                            -FunctionName 'GenXdev.Helpers\Get-SpeechToText'

                        # set language using the resolved language format
                        if ($PSBoundParameters.ContainsKey("LanguageIn")) {
                            $invocationArguments.LanguageIn = (
                                GenXdev.Helpers\Get-WebLanguageDictionary
                            )[$LanguageIn]
                        }
                        else {
                            if ($invocationArguments.ContainsKey("LanguageIn")) {
                                $null = $invocationArguments.Remove("LanguageIn")
                            }
                        }

                        if ($LanguageOut -eq "English") {

                            $invocationArguments.WithTranslate = $true;
                            $skipTranslation = $true;
                        }

                        $invocationArguments.Input = $outputFile

                        try {
                            # add shouldprocess check before executing the operation
                            if (-not $PSCmdlet.ShouldProcess(
                                    "Start processing file '$outputFile'", 'Start'
                                )) {

                                continue
                            }

                            # check if output should be in srt subtitle format
                            if ($SRT) {

                                # initialize subtitle counter for srt format
                                $i = 1

                                GenXdev.Helpers\Get-SpeechToText `
                                    @invocationArguments |
                                    Microsoft.PowerShell.Core\ForEach-Object {

                                        $result = $PSItem

                                        # check if translation to output language is required
                                        if (-not [string]::IsNullOrWhiteSpace($LanguageOut) `
                                                -and -not $skipTranslation) {

                                            Microsoft.PowerShell.Utility\Write-Verbose (
                                                "Translating text to $LanguageOut for: " +
                                                "`"$($result.Text)`".."
                                            )

                                            try {

                                                # prepare parameters for text translation
                                                $translateParams = `
                                                    GenXdev.FileSystem\Copy-IdenticalParamValues `
                                                    -BoundParameters $myPSBoundParameters `
                                                    -FunctionName `
                                                    'GenXdev.AI\Get-TextTranslation'

                                                if ($translateParams.ContainsKey('Prompt')) {
                                                    $null = $translateParams.Remove('Prompt')
                                                }
                                                if ($translateParams.ContainsKey('Instructions')) {
                                                    $null = $translateParams.Remove('Instructions')
                                                }

                                                # create new result with translated text
                                                $result = @{
                                                    Text  = (GenXdev.AI\Get-TextTranslation `
                                                            @translateParams `
                                                            -Text:($result.Text) `
                                                            -Language:$LanguageOut
                                                    )
                                                    Start = $result.Start
                                                    End   = $result.End
                                                }

                                                Microsoft.PowerShell.Utility\Write-Verbose (
                                                    "Text translated to: " +
                                                    "`"$($result.Text)`".."
                                                )
                                            }
                                            catch {

                                                Microsoft.PowerShell.Utility\Write-Verbose (
                                                    "Translating text to $LanguageOut, " +
                                                    "failed: $PSItem"
                                                )
                                            }
                                        }

                                        # format timestamps for srt output
                                        $start = $result.Start.ToString(
                                            'hh\:mm\:ss\,fff',
                                            [System.Globalization.CultureInfo]::InvariantCulture
                                        )
                                        $end = $result.end.ToString(
                                            'hh\:mm\:ss\,fff',
                                            [System.Globalization.CultureInfo]::InvariantCulture
                                        )

                                        # return srt formatted subtitle entry to pipeline
                                        Microsoft.PowerShell.Utility\Write-Output "$i`r`n$start --> $end`r`n$($result.Text)`r`n`r`n"

                                        # increment subtitle counter
                                        $i++
                                    }

                                    # exit early for srt format processing
                                    return
                                }

                                # check if translation is needed for non-srt output
                                if (-not [string]::IsNullOrWhiteSpace($LanguageOut) `
                                        -and -not $skipTranslation) {

                                    # transcribe the audio file to get raw text
                                    $results = GenXdev.Helpers\Get-SpeechToText @invocationArguments

                                    # prepare parameters for text translation
                                    $translateParams = `
                                        GenXdev.FileSystem\Copy-IdenticalParamValues `
                                        -BoundParameters $myPSBoundParameters `
                                        -FunctionName 'GenXdev.AI\Get-TextTranslation'

                                    if ($translateParams.ContainsKey('Prompt')) {
                                        $null = $translateParams.Remove('Prompt')
                                    }
                                    if ($translateParams.ContainsKey('Instructions')) {
                                        $null = $translateParams.Remove('Instructions')
                                    }

                                    # translate the complete transcribed text and return result
                                    $translationResult = GenXdev.AI\Get-TextTranslation `
                                        @translateParams `
                                        -Text "$results" -Language $LanguageOut

                                    # return the translation result
                                    return $translationResult
                                }

                                GenXdev.Helpers\Get-SpeechToText @invocationArguments
                            }
                            catch {

                                # only show error if it's not a user abort
                                if ($PSItem.Exception.Message -notlike '*aborted*') {

                                    Microsoft.PowerShell.Utility\Write-Error $PSItem
                                }
                            }
                            finally {

                                # always clean up temporary files
                                if ([System.IO.File]::Exists($outputFile)) {

                                    $null = Microsoft.PowerShell.Management\Remove-Item `
                                        -LiteralPath $outputFile -Force -ErrorAction SilentlyContinue
                                }
                            }
                        }
    }

    end {

        # if no input was provided, start recording from microphone
        if (-not $script:InputProvided) {

            # add shouldprocess check before executing the operation
            if (-not $PSCmdlet.ShouldProcess(
                    "Start recording", 'Start'
                )) {

                continue
            }

            if (-not $myPSBoundParameters.ContainsKey("ModelType")) {

                # use most accurate model for batch processing
                $ModelType = 'tiny'

                # add modeltype to bound parameters for downstream functions
                $null = $myPSBoundParameters.Add('ModelType', $ModelType)
            }

            $invocationArguments = GenXdev.FileSystem\Copy-IdenticalParamValues `
                -BoundParameters $myPSBoundParameters `
                -FunctionName 'GenXdev.Helpers\Receive-RealTimeSpeechToText'

            # set language using the resolved language format
            if ($PSBoundParameters.ContainsKey("LanguageIn")) {
                $invocationArguments.LanguageIn = (
                    GenXdev.Helpers\Get-WebLanguageDictionary
                )[$LanguageIn]
            }
            else {
                if ($invocationArguments.ContainsKey("LanguageIn")) {
                    $null = $invocationArguments.Remove("LanguageIn")
                }
            }

            if ($LanguageOut -eq "English") {

                $invocationArguments.WithTranslate = $true;
                $skipTranslation = $true;
            }

            # check if output should be in srt subtitle format
            if ($SRT) {

                # initialize subtitle counter for srt format
                $i = 1

                GenXdev.Helpers\Receive-RealTimeSpeechToText `
                    @invocationArguments |
                    Microsoft.PowerShell.Core\ForEach-Object {

                        $result = $PSItem

                        # check if translation to output language is required
                        if (-not [string]::IsNullOrWhiteSpace($LanguageOut) `
                                -and -not $skipTranslation) {

                            Microsoft.PowerShell.Utility\Write-Verbose (
                                "Translating text to $LanguageOut for: " +
                                "`"$($result.Text)`".."
                            )

                            try {

                                # prepare parameters for text translation
                                $translateParams = `
                                    GenXdev.FileSystem\Copy-IdenticalParamValues `
                                    -BoundParameters $myPSBoundParameters `
                                    -FunctionName `
                                    'GenXdev.AI\Get-TextTranslation'

                                if ($translateParams.ContainsKey('Prompt')) {
                                    $null = $translateParams.Remove('Prompt')
                                }
                                if ($translateParams.ContainsKey('Instructions')) {
                                    $null = $translateParams.Remove('Instructions')
                                }

                                # create new result with translated text
                                $result = @{
                                    Text  = (GenXdev.AI\Get-TextTranslation `
                                            @translateParams `
                                            -Text:($result.Text) `
                                            -Language:$LanguageOut
                                    )
                                    Start = $result.Start
                                    End   = $result.End
                                }

                                Microsoft.PowerShell.Utility\Write-Verbose (
                                    "Text translated to: " +
                                    "`"$($result.Text)`".."
                                )
                            }
                            catch {

                                Microsoft.PowerShell.Utility\Write-Verbose (
                                    "Translating text to $LanguageOut, " +
                                    "failed: $PSItem"
                                )
                            }
                        }

                        # format timestamps for srt output
                        $start = $result.Start.ToString(
                            'hh\:mm\:ss\,fff',
                            [System.Globalization.CultureInfo]::InvariantCulture
                        )
                        $end = $result.end.ToString(
                            'hh\:mm\:ss\,fff',
                            [System.Globalization.CultureInfo]::InvariantCulture
                        )

                        # return srt formatted subtitle entry to pipeline
                        Microsoft.PowerShell.Utility\Write-Output "$i`r`n$start --> $end`r`n$($result.Text)`r`n`r`n"

                        # increment subtitle counter
                        $i++
                    }

                # exit early for srt format processing
                return
            }

            # check if translation is needed for microphone recording output
            if (-not [string]::IsNullOrWhiteSpace($LanguageOut) `
                    -and -not $skipTranslation) {

                # prepare parameters for text translation
                $translateParams = GenXdev.FileSystem\Copy-IdenticalParamValues `
                    -BoundParameters $myPSBoundParameters `
                    -FunctionName 'GenXdev.AI\Get-TextTranslation'

                if ($translateParams.ContainsKey('Prompt')) {
                    $null = $translateParams.Remove('Prompt')
                }
                if ($translateParams.ContainsKey('Instructions')) {
                    $null = $translateParams.Remove('Instructions')
                }

                # transcribe the microphone recording to get raw text
                GenXdev.Helpers\Receive-RealTimeSpeechToText @invocationArguments | Microsoft.PowerShell.Core\ForEach-Object {

                    # translate the complete transcribed text and return result
                    GenXdev.AI\Get-TextTranslation `
                        @translateParams `
                        -Text "$PSItem" -Language $LanguageOut
                }
                return
            }

            GenXdev.Helpers\Receive-RealTimeSpeechToText @invocationArguments
        }
    }
}
###############################################################################