Functions/GenXdev.AI.Queries/Get-MediaFileAudioTranscription.ps1

###############################################################################
<#
.SYNOPSIS
Transcribes an audio or video file to text.
 
.DESCRIPTION
Transcribes an audio or video file to text using the Whisper AI model. The
function can handle various audio and video formats, convert them to the
appropriate format for transcription, and optionally translate the output
to a different language. Supports SRT subtitle format output and various
audio processing parameters for fine-tuning the transcription quality.
 
.PARAMETER FilePath
The file path of the audio or video file to transcribe.
 
.PARAMETER LanguageIn
The language to expect in the audio. E.g. "English", "French", "German", "Dutch"
 
.PARAMETER LanguageOut
The language to translate to. E.g. "french", "german", "dutch"
 
.PARAMETER SRT
Output in SRT format.
 
.PARAMETER PassThru
Returns objects instead of strings.
 
.PARAMETER UseDesktopAudioCapture
Whether to use desktop audio capture instead of microphone input
 
.PARAMETER WithTokenTimestamps
Whether to include token timestamps in the output.
 
.PARAMETER TokenTimestampsSumThreshold
Sum threshold for token timestamps, defaults to 0.5.
 
.PARAMETER SplitOnWord
Whether to split on word boundaries.
 
.PARAMETER MaxTokensPerSegment
Maximum number of tokens per segment.
 
.PARAMETER IgnoreSilence
Whether to ignore silence (will mess up timestamps).
 
.PARAMETER MaxDurationOfSilence
Maximum duration of silence before automatically stopping recording.
 
.PARAMETER SilenceThreshold
Silence detect threshold (0..32767 defaults to 30)
 
.PARAMETER CpuThreads
Number of CPU threads to use, defaults to 0 (auto).
 
.PARAMETER Temperature
Temperature for speech recognition.
 
.PARAMETER TemperatureInc
Temperature increment.
 
.PARAMETER Prompt
Prompt to use for the model.
 
.PARAMETER SuppressRegex
Regex to suppress tokens from the output.
 
.PARAMETER WithProgress
Whether to show progress.
 
.PARAMETER AudioContextSize
Size of the audio context.
 
.PARAMETER DontSuppressBlank
Whether to NOT suppress blank lines.
 
.PARAMETER MaxDuration
Maximum duration of the audio.
 
.PARAMETER Offset
Offset for the audio.
 
.PARAMETER MaxLastTextTokens
Maximum number of last text tokens.
 
.PARAMETER SingleSegmentOnly
Whether to use single segment only.
 
.PARAMETER PrintSpecialTokens
Whether to print special tokens.
 
.PARAMETER MaxSegmentLength
Maximum segment length.
 
.PARAMETER MaxInitialTimestamp
Start timestamps at this moment.
 
.PARAMETER LengthPenalty
Length penalty.
 
.PARAMETER EntropyThreshold
Entropy threshold.
 
.PARAMETER LogProbThreshold
Log probability threshold.
 
.PARAMETER NoSpeechThreshold
No speech threshold.
 
.PARAMETER NoContext
Don't use context.
 
.PARAMETER WithBeamSearchSamplingStrategy
Use beam search sampling strategy.
 
.PARAMETER SessionOnly
Use alternative settings stored in session for AI preferences like Language,
Image collections, etc.
 
.PARAMETER ClearSession
Clear alternative settings stored in session for AI preferences like Language,
Image collections, etc.
 
.PARAMETER PreferencesDatabasePath
Database path for preference data files.
 
.PARAMETER SkipSession
Dont use alternative settings stored in session for AI preferences like
Language, Image collections, etc.
 
.EXAMPLE
Get-MediaFileAudioTranscription -FilePath "C:\path\to\audio.wav" `
    -LanguageIn "English" -LanguageOut "French" -SRT
 
.EXAMPLE
transcribefile "C:\video.mp4" "English"
###############################################################################>

function Get-MediaFileAudioTranscription {

    [CmdletBinding()]
    [Alias("transcribefile")]
    param (
        ###########################################################################
        [Parameter(
            Mandatory = $true,
            Position = 0,
            HelpMessage = "The file path of the audio or video file to transcribe."
        )]
        [string] $FilePath,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            Position = 1,
            HelpMessage = "The language to expect in the audio."
        )]
        [PSDefaultValue(Value = "English")]
        [ValidateSet(
            "Afrikaans",
            "Akan",
            "Albanian",
            "Amharic",
            "Arabic",
            "Armenian",
            "Azerbaijani",
            "Basque",
            "Belarusian",
            "Bemba",
            "Bengali",
            "Bihari",
            "Bork, bork, bork!",
            "Bosnian",
            "Breton",
            "Bulgarian",
            "Cambodian",
            "Catalan",
            "Cherokee",
            "Chichewa",
            "Chinese (Simplified)",
            "Chinese (Traditional)",
            "Corsican",
            "Croatian",
            "Czech",
            "Danish",
            "Dutch",
            "Elmer Fudd",
            "English",
            "Esperanto",
            "Estonian",
            "Ewe",
            "Faroese",
            "Filipino",
            "Finnish",
            "French",
            "Frisian",
            "Ga",
            "Galician",
            "Georgian",
            "German",
            "Greek",
            "Guarani",
            "Gujarati",
            "Hacker",
            "Haitian Creole",
            "Hausa",
            "Hawaiian",
            "Hebrew",
            "Hindi",
            "Hungarian",
            "Icelandic",
            "Igbo",
            "Indonesian",
            "Interlingua",
            "Irish",
            "Italian",
            "Japanese",
            "Javanese",
            "Kannada",
            "Kazakh",
            "Kinyarwanda",
            "Kirundi",
            "Klingon",
            "Kongo",
            "Korean",
            "Krio (Sierra Leone)",
            "Kurdish",
            "Kurdish (Soranî)",
            "Kyrgyz",
            "Laothian",
            "Latin",
            "Latvian",
            "Lingala",
            "Lithuanian",
            "Lozi",
            "Luganda",
            "Luo",
            "Macedonian",
            "Malagasy",
            "Malay",
            "Malayalam",
            "Maltese",
            "Maori",
            "Marathi",
            "Mauritian Creole",
            "Moldavian",
            "Mongolian",
            "Montenegrin",
            "Nepali",
            "Nigerian Pidgin",
            "Northern Sotho",
            "Norwegian",
            "Norwegian (Nynorsk)",
            "Occitan",
            "Oriya",
            "Oromo",
            "Pashto",
            "Persian",
            "Pirate",
            "Polish",
            "Portuguese (Brazil)",
            "Portuguese (Portugal)",
            "Punjabi",
            "Quechua",
            "Romanian",
            "Romansh",
            "Runyakitara",
            "Russian",
            "Scots Gaelic",
            "Serbian",
            "Serbo-Croatian",
            "Sesotho",
            "Setswana",
            "Seychellois Creole",
            "Shona",
            "Sindhi",
            "Sinhalese",
            "Slovak",
            "Slovenian",
            "Somali",
            "Spanish",
            "Spanish (Latin American)",
            "Sundanese",
            "Swahili",
            "Swedish",
            "Tajik",
            "Tamil",
            "Tatar",
            "Telugu",
            "Thai",
            "Tigrinya",
            "Tonga",
            "Tshiluba",
            "Tumbuka",
            "Turkish",
            "Turkmen",
            "Twi",
            "Uighur",
            "Ukrainian",
            "Urdu",
            "Uzbek",
            "Vietnamese",
            "Welsh",
            "Wolof",
            "Xhosa",
            "Yiddish",
            "Yoruba",
            "Zulu")]
        [string] $LanguageIn,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            Position = 2,
            HelpMessage = "Sets the language to translate to."
        )]
        [ValidateSet(
            "Afrikaans",
            "Akan",
            "Albanian",
            "Amharic",
            "Arabic",
            "Armenian",
            "Azerbaijani",
            "Basque",
            "Belarusian",
            "Bemba",
            "Bengali",
            "Bihari",
            "Bork, bork, bork!",
            "Bosnian",
            "Breton",
            "Bulgarian",
            "Cambodian",
            "Catalan",
            "Cherokee",
            "Chichewa",
            "Chinese (Simplified)",
            "Chinese (Traditional)",
            "Corsican",
            "Croatian",
            "Czech",
            "Danish",
            "Dutch",
            "Elmer Fudd",
            "English",
            "Esperanto",
            "Estonian",
            "Ewe",
            "Faroese",
            "Filipino",
            "Finnish",
            "French",
            "Frisian",
            "Ga",
            "Galician",
            "Georgian",
            "German",
            "Greek",
            "Guarani",
            "Gujarati",
            "Hacker",
            "Haitian Creole",
            "Hausa",
            "Hawaiian",
            "Hebrew",
            "Hindi",
            "Hungarian",
            "Icelandic",
            "Igbo",
            "Indonesian",
            "Interlingua",
            "Irish",
            "Italian",
            "Japanese",
            "Javanese",
            "Kannada",
            "Kazakh",
            "Kinyarwanda",
            "Kirundi",
            "Klingon",
            "Kongo",
            "Korean",
            "Krio (Sierra Leone)",
            "Kurdish",
            "Kurdish (Soranî)",
            "Kyrgyz",
            "Laothian",
            "Latin",
            "Latvian",
            "Lingala",
            "Lithuanian",
            "Lozi",
            "Luganda",
            "Luo",
            "Macedonian",
            "Malagasy",
            "Malay",
            "Malayalam",
            "Maltese",
            "Maori",
            "Marathi",
            "Mauritian Creole",
            "Moldavian",
            "Mongolian",
            "Montenegrin",
            "Nepali",
            "Nigerian Pidgin",
            "Northern Sotho",
            "Norwegian",
            "Norwegian (Nynorsk)",
            "Occitan",
            "Oriya",
            "Oromo",
            "Pashto",
            "Persian",
            "Pirate",
            "Polish",
            "Portuguese (Brazil)",
            "Portuguese (Portugal)",
            "Punjabi",
            "Quechua",
            "Romanian",
            "Romansh",
            "Runyakitara",
            "Russian",
            "Scots Gaelic",
            "Serbian",
            "Serbo-Croatian",
            "Sesotho",
            "Setswana",
            "Seychellois Creole",
            "Shona",
            "Sindhi",
            "Sinhalese",
            "Slovak",
            "Slovenian",
            "Somali",
            "Spanish",
            "Spanish (Latin American)",
            "Sundanese",
            "Swahili",
            "Swedish",
            "Tajik",
            "Tamil",
            "Tatar",
            "Telugu",
            "Thai",
            "Tigrinya",
            "Tonga",
            "Tshiluba",
            "Tumbuka",
            "Turkish",
            "Turkmen",
            "Twi",
            "Uighur",
            "Ukrainian",
            "Urdu",
            "Uzbek",
            "Vietnamese",
            "Welsh",
            "Wolof",
            "Xhosa",
            "Yiddish",
            "Yoruba",
            "Zulu")]
        [string] $LanguageOut = $null,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Whether to include token timestamps in the output"
        )]
        [switch] $WithTokenTimestamps,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Sum threshold for token timestamps, defaults to 0.5"
        )]
        [float] $TokenTimestampsSumThreshold = 0.5,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Whether to split on word boundaries"
        )]
        [switch] $SplitOnWord,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Maximum number of tokens per segment"
        )]
        [int] $MaxTokensPerSegment,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Whether to ignore silence (will mess up timestamps)"
        )]
        [switch] $IgnoreSilence,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = ("Maximum duration of silence before automatically " +
                           "stopping recording")
        )]
        [object] $MaxDurationOfSilence,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Silence detect threshold (0..32767 defaults to 30)"
        )]
        [ValidateRange(0, 32767)]
        [int] $SilenceThreshold,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Number of CPU threads to use, defaults to 0 (auto)"
        )]
        [int] $CpuThreads = 0,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Temperature for speech recognition"
        )]
        [ValidateRange(0, 100)]
        [float] $Temperature = 0.01,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Temperature increment"
        )]
        [ValidateRange(0, 1)]
        [float] $TemperatureInc,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Prompt to use for the model"
        )]
        [string] $Prompt,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Regex to suppress tokens from the output"
        )]
        [string] $SuppressRegex = $null,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Whether to show progress"
        )]
        [switch] $WithProgress,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Size of the audio context"
        )]
        [int] $AudioContextSize,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Whether to NOT suppress blank lines"
        )]
        [switch] $DontSuppressBlank,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Maximum duration of the audio"
        )]
        [object] $MaxDuration,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Offset for the audio"
        )]
        [object] $Offset,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Maximum number of last text tokens"
        )]
        [int] $MaxLastTextTokens,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Whether to use single segment only"
        )]
        [switch] $SingleSegmentOnly,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Whether to print special tokens"
        )]
        [switch] $PrintSpecialTokens,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Maximum segment length"
        )]
        [int] $MaxSegmentLength,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Start timestamps at this moment"
        )]
        [object] $MaxInitialTimestamp,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Length penalty"
        )]
        [ValidateRange(0, 1)]
        [float] $LengthPenalty,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Entropy threshold"
        )]
        [ValidateRange(0, 1)]
        [float] $EntropyThreshold,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Log probability threshold"
        )]
        [ValidateRange(0, 1)]
        [float] $LogProbThreshold,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "No speech threshold"
        )]
        [ValidateRange(0, 1)]
        [float] $NoSpeechThreshold,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Don't use context"
        )]
        [switch] $NoContext,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Use beam search sampling strategy"
        )]
        [switch] $WithBeamSearchSamplingStrategy,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Output in SRT format."
        )]
        [switch] $SRT,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Returns objects instead of strings"
        )]
        [switch] $PassThru,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = ("Whether to use desktop audio capture instead of " +
                           "microphone input")
        )]
        [switch] $UseDesktopAudioCapture,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = ("Use alternative settings stored in session for AI " +
                           "preferences like Language, Image collections, etc")
        )]
        [switch] $SessionOnly,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = ("Clear alternative settings stored in session for " +
                           "AI preferences like Language, Image collections, etc")
        )]
        [switch] $ClearSession,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = "Database path for preference data files"
        )]
        [string] $PreferencesDatabasePath,
        ###########################################################################
        [Parameter(
            Mandatory = $false,
            HelpMessage = ("Dont use alternative settings stored in session " +
                           "for AI preferences like Language, Image " +
                           "collections, etc")
        )]
        [Alias("FromPreferences")]
        [switch] $SkipSession
        ###########################################################################
    )

    begin {

        # copy identical parameter values for ai meta language helper function
        $params = GenXdev.Helpers\Copy-IdenticalParamValues `
            -BoundParameters $PSBoundParameters `
            -FunctionName "GenXdev.AI\Get-AIMetaLanguage" `
            -DefaultValues (Microsoft.PowerShell.Utility\Get-Variable `
                -Scope Local -ErrorAction SilentlyContinue)

        # resolve the input language to a standard format
        $LanguageIn = GenXdev.AI\Get-AIMetaLanguage @params -Language $LanguageIn

        # resolve the output language to a standard format
        $LanguageOut = GenXdev.AI\Get-AIMetaLanguage @params -Language $LanguageOut

        # convert maxdurationofsilence to timespan if it's not already
        if ($PSBoundParameters.ContainsKey("MaxDurationOfSilence") -and `
            (-not ($MaxDurationOfSilence -is [System.TimeSpan]))) {

            $MaxDurationOfSilence = [System.TimeSpan]::FromSeconds(`
                $MaxDurationOfSilence)
            $PSBoundParameters["MaxDurationOfSilence"] = $MaxDurationOfSilence
        }

        # convert maxduration to timespan if it's not already
        if ($PSBoundParameters.ContainsKey("MaxDuration") -and `
            (-not ($MaxDuration -is [System.TimeSpan]))) {

            $MaxDuration = [System.TimeSpan]::FromSeconds($MaxDuration)
            $PSBoundParameters["MaxDuration"] = $MaxDuration
        }

        # convert offset to timespan if it's not already
        if ($PSBoundParameters.ContainsKey("Offset") -and `
            (-not ($Offset -is [System.TimeSpan]))) {

            $Offset = [System.TimeSpan]::FromSeconds($Offset)
            $PSBoundParameters["Offset"] = $Offset
        }

        # convert maxinitialtimestamp to timespan if it's not already
        if ($PSBoundParameters.ContainsKey("MaxInitialTimestamp") -and `
            (-not ($MaxInitialTimestamp -is [System.TimeSpan]))) {

            $MaxInitialTimestamp = [System.TimeSpan]::FromSeconds(`
                $MaxInitialTimestamp)
            $PSBoundParameters["MaxInitialTimestamp"] = $MaxInitialTimestamp
        }

        # locate the ffmpeg executable path in winget installation directory
        $ffmpegPath = (Microsoft.PowerShell.Management\Get-ChildItem `
            "${env:LOCALAPPDATA}\Microsoft\WinGet\ffmpeg.exe" `
            -File -rec -ErrorAction SilentlyContinue | `
            Microsoft.PowerShell.Utility\Select-Object -First 1 | `
            Microsoft.PowerShell.Core\ForEach-Object FullName)
    }


process {

        # ensure maxsrtchars is between 20 and 200 characters
        $MaxSrtChars = [System.Math]::Min(200, [System.Math]::Max(20, `
            $MaxSrtChars))

        # helper function to check if winget powershell client is installed
        function IsWinGetInstalled {

            # try to import the winget client module
            Microsoft.PowerShell.Core\Import-Module "Microsoft.WinGet.Client" `
                -ErrorAction SilentlyContinue

            # check if the module was successfully loaded
            $module = Microsoft.PowerShell.Core\Get-Module "Microsoft.WinGet.Client" `
                -ErrorAction SilentlyContinue

            if ($null -eq $module) {

                return $false
            }

            return $true
        }

        # helper function to install winget powershell client
        function InstallWinGet {

            Microsoft.PowerShell.Utility\Write-Verbose `
                "Installing WinGet PowerShell client.."

            # install the winget client module
            PowerShellGet\Install-Module "Microsoft.WinGet.Client" `
                -Force -AllowClobber

            # import the newly installed module
            Microsoft.PowerShell.Core\Import-Module "Microsoft.WinGet.Client"
        }

        # helper function to install ffmpeg using winget
        function Installffmpeg {

            # check if ffmpeg is already installed
            if ([IO.File]::Exists($ffmpegPath)) {

                return
            }

            # ensure winget is installed before proceeding
            if (-not (IsWinGetInstalled)) {

                InstallWinGet
            }

            # define the ffmpeg package identifier
            $ffmpeg = "Gyan.FFmpeg"

            # check if ffmpeg package is available
            $ffmpegPackage = Microsoft.WinGet.Client\Get-WinGetPackage `
                -Id $ffmpeg

            # install ffmpeg if not found
            if ($null -eq $ffmpegPackage) {

                Microsoft.PowerShell.Utility\Write-Verbose "Installing ffmpeg.."

                try {
                    # attempt to install using winget client module
                    Microsoft.WinGet.Client\Install-WinGetPackage -Id $ffmpeg `
                        -Force
                }
                catch {
                    # fallback to winget command line tool
                    winget install $ffmpeg
                }

                # update the ffmpeg path after installation
                $ffmpegPath = (Microsoft.PowerShell.Management\Get-ChildItem `
                    "${env:LOCALAPPDATA}\Microsoft\WinGet\ffmpeg.exe" `
                    -File -rec -ErrorAction SilentlyContinue | `
                    Microsoft.PowerShell.Utility\Select-Object -First 1).FullName
            }
        }

        # ensure ffmpeg is installed before proceeding
        $null = Installffmpeg

        # expand the input file path to absolute path
        $inputFile = GenXdev.FileSystem\Expand-Path $FilePath

        # create a temporary wav file for conversion
        $outputFile = [IO.Path]::GetTempFileName() + ".wav"

        # inform user about the conversion process
        Microsoft.PowerShell.Utility\Write-Verbose `
            ("Converting the file '$inputFile' to WAV format..")

        # start background job to convert media file to wav format
        $job = Microsoft.PowerShell.Core\Start-Job `
            -ArgumentList $ffmpegPath, $inputFile, $outputFile -ScriptBlock {

            param($ffmpegPath, $inputFile, $outputFile)

            # locate ffmpeg path in case it's not passed correctly
            $ffmpegPath = (Microsoft.PowerShell.Management\Get-ChildItem `
                "${env:LOCALAPPDATA}\Microsoft\WinGet\ffmpeg.exe" `
                -File -rec -ErrorAction SilentlyContinue | `
                Microsoft.PowerShell.Utility\Select-Object -First 1 | `
                Microsoft.PowerShell.Core\ForEach-Object FullName)

            try {
                # convert file to wav with specific audio parameters for whisper
                & $ffmpegPath -i "$inputFile" -ac 1 -ar 16000 `
                    -sample_fmt s16 "$outputFile" -loglevel quiet -y | `
                    Microsoft.PowerShell.Core\Out-Null
            }
            finally {
                # clear the terminal line to remove ffmpeg output
                [System.Console]::Write("`e[1A`e[2K")
            }

            # return the exit code for success/failure checking
            return $LASTEXITCODE
        }

        # wait for the conversion job to complete
        $job | Microsoft.PowerShell.Core\Wait-Job | `
            Microsoft.PowerShell.Core\Out-Null

        # check if the conversion was successful
        $success = ($job | Microsoft.PowerShell.Core\Receive-Job) -eq 0

        # clean up the completed job
        Microsoft.PowerShell.Core\Remove-Job -Job $job | `
            Microsoft.PowerShell.Core\Out-Null

        # handle conversion failure
        if (-not $success) {

            Microsoft.PowerShell.Utility\Write-Warning `
                ("Failed to convert the file '$inputFile' to WAV format.")

            # clean up the temporary file if it exists
            if ([IO.File]::Exists($outputFile)) {

                $null = Microsoft.PowerShell.Management\Remove-Item `
                    -Path $outputFile -Force
            }

            return
        }

        # inform user about the transcription process
        Microsoft.PowerShell.Utility\Write-Verbose `
            ("Transcribing the audio file '$inputFile'..")

        # add language parameter if languagein was specified
        if ($PSBoundParameters.ContainsKey("LanguageIn")) {

            $null = $PSBoundParameters.Add("Language", $LanguageIn)
        }

        # remove withtranslate parameter if it exists (legacy cleanup)
        if ($PSBoundParameters.ContainsKey("WithTranslate")) {

            $null = $PSBoundParameters.Remove("WithTranslate", $true)
        }

        # handle srt format parameter dependencies
        if (($SRT -eq $true) -and `
            (-not $PSBoundParameters.ContainsKey("PassThru"))) {

            $null = $PSBoundParameters.Add("PassThru", $true)
        }
        else {

            if ((-not $SRT) -and $PSBoundParameters.ContainsKey("PassThru")) {

                $null = $PSBoundParameters.Remove("PassThru")
            }
        }

        # add the converted wav file path to parameters
        if (-not $PSBoundParameters.ContainsKey("WaveFile")) {

            $null = $PSBoundParameters.Add("WaveFile", $outputFile)
        }

        # ensure error action is set to stop for proper error handling
        if (-not $PSBoundParameters.ContainsKey("ErrorAction")) {

            $null = $PSBoundParameters.Add("ErrorAction", "Stop")
        }

        # handle model file path parameter
        if (-not $PSBoundParameters.ContainsKey("ModelFilePath")) {

            $null = $PSBoundParameters.Add("ModelFilePath", $ModelFilePath)
        }
        else {

            $PSBoundParameters["ModelFilePath"] = $ModelFilePath
        }

        # optimize cpu thread usage based on gpu availability
        if (-not (GenXdev.AI\Get-HasCapableGpu)) {

            if (-not $PSBoundParameters.ContainsKey("CpuThreads")) {

                $null = $PSBoundParameters.Add("CpuThreads", `
                    (GenXdev.AI\Get-NumberOfCpuCores))
            }
        }

        try {

            # check if output should be in srt subtitle format
            if ($SRT) {

                # initialize subtitle counter for srt format
                $i = 1

                # copy parameters for audio transcription function
                $invocationArguments = GenXdev.Helpers\Copy-IdenticalParamValues `
                    -BoundParameters $PSBoundParameters `
                    -FunctionName "GenXdev.AI\Start-AudioTranscription"

                # process each transcription segment for srt output
                GenXdev.AI\Start-AudioTranscription @invocationArguments | `
                    Microsoft.PowerShell.Core\ForEach-Object {

                    $result = $PSItem

                    # check if translation to output language is required
                    if (-not [string]::IsNullOrWhiteSpace($LanguageOut)) {

                        Microsoft.PowerShell.Utility\Write-Verbose `
                            ("Translating text to $LanguageOut for: " +
                             "`"$($result.Text)`"..")

                        try {
                            # prepare parameters for text translation
                            $translateParams = `
                                GenXdev.Helpers\Copy-IdenticalParamValues `
                                -BoundParameters $PSBoundParameters `
                                -FunctionName "GenXdev.AI\Get-TextTranslation" `
                                -DefaultValues `
                                (Microsoft.PowerShell.Utility\Get-Variable `
                                -Scope Local -ErrorAction SilentlyContinue)

                            # create new result with translated text
                            $result = @{
                                Text  = (GenXdev.AI\Get-TextTranslation `
                                    @translateParams -Text:($result.Text) `
                                    -Language:$LanguageOut `
                                    -Instructions ("Translate this partial " +
                                        "subtitle text, into the [Language] " +
                                        "language. ommit only the translation " +
                                        "no yapping or chatting. return in " +
                                        "json format like so: " +
                                        "{`"Translation`":`"Translated text " +
                                        "here`"}") | `
                                    Microsoft.PowerShell.Utility\ConvertFrom-Json).Translation
                                Start = $result.Start
                                End   = $result.End
                            }

                            Microsoft.PowerShell.Utility\Write-Verbose `
                                ("Text translated to: `"$($result.Text)`"..")
                        }
                        catch {

                            Microsoft.PowerShell.Utility\Write-Verbose `
                                ("Translating text to $LanguageOut, " +
                                 "failed: $PSItem")
                        }
                    }

                    # format timestamps for srt output
                    $start = $result.Start.ToString("hh\:mm\:ss\,fff", `
                        [CultureInfo]::InvariantCulture)
                    $end = $result.end.ToString("hh\:mm\:ss\,fff", `
                        [CultureInfo]::InvariantCulture)

                    # output srt formatted subtitle entry
                    "$i`r`n$start --> $end`r`n$($result.Text)`r`n`r`n"

                    # increment subtitle counter
                    $i++
                }

                # exit early for srt format processing
                return
            }

            # check if translation is needed for non-srt output
            if (-not [string]::IsNullOrWhiteSpace($LanguageOut)) {

                # copy parameters for audio transcription function
                $invocationArguments = GenXdev.Helpers\Copy-IdenticalParamValues `
                    -BoundParameters $PSBoundParameters `
                    -FunctionName "GenXdev.AI\Start-AudioTranscription"

                # transcribe the audio file to get raw text
                $results = GenXdev.AI\Start-AudioTranscription `
                    @invocationArguments

                # prepare parameters for text translation
                $translateParams = GenXdev.Helpers\Copy-IdenticalParamValues `
                    -BoundParameters $PSBoundParameters `
                    -FunctionName "GenXdev.AI\Get-TextTranslation" `
                    -DefaultValues (Microsoft.PowerShell.Utility\Get-Variable `
                        -Scope Local -ErrorAction SilentlyContinue)

                # translate the complete transcribed text
                GenXdev.AI\Get-TextTranslation @translateParams `
                    -Text "$results" -Language $LanguageOut

                # exit early for translation processing
                return
            }

            # handle standard transcription without translation
            $invocationArguments = GenXdev.Helpers\Copy-IdenticalParamValues `
                -BoundParameters $PSBoundParameters `
                -FunctionName "GenXdev.AI\Start-AudioTranscription"

            # return transcribed text without translation
            GenXdev.AI\Start-AudioTranscription @invocationArguments
        }
        catch {

            # only show error if it's not a user abort
            if ("$PSItem" -notlike "*aborted*") {

                Microsoft.PowerShell.Utility\Write-Error $PSItem
            }
        }
        finally {

            # always clean up temporary files
            if ([IO.File]::Exists($outputFile)) {

                Microsoft.PowerShell.Management\Remove-Item -Path $outputFile `
                    -Force
            }
        }
    }

    end {
    }
}
###############################################################################