Functions/GenXdev.AI/New-LLMAudioChat.ps1
################################################################################ <# .SYNOPSIS Creates an interactive audio chat session with an LLM model. .DESCRIPTION Initiates a voice-based conversation with a language model, supporting audio input and output. The function handles audio recording, transcription, model queries, and text-to-speech responses. Supports multiple language models and various configuration options. .PARAMETER Query Initial text query to send to the model. Can be empty to start with voice input. .PARAMETER Model The model name/path to use. Supports -like pattern matching. Default: "*-tool-use" .PARAMETER ModelLMSGetIdentifier Model identifier for LM Studio. Default: "llama-3-groq-8b-tool-use" .PARAMETER Instructions System instructions/prompt to guide the model's behavior. .PARAMETER Attachments Array of file paths to attach to the conversation for context. .PARAMETER AudioTemperature Temperature setting for audio input recognition. Range: 0.0-1.0. Default: 0.0 .PARAMETER Temperature Temperature for response randomness. Range: 0.0-1.0. Default: 0.0 .PARAMETER MaxToken Maximum tokens in model response. Default: 8192 .PARAMETER ShowWindow Switch to show the LM Studio window during operation. .PARAMETER TTLSeconds Time-to-live in seconds for models loaded via API requests. Default: -1 .PARAMETER Gpu GPU offloading configuration. -2=Auto, -1=LM Studio decides, 0-1=fraction of layers Default: -1 .PARAMETER Force Switch to force stop LM Studio before initialization. .PARAMETER ImageDetail Image detail level setting. Options: "low", "medium", "high". Default: "low" .PARAMETER IncludeThoughts Switch to include model's thought process in output. .PARAMETER ContinueLast Switch to continue from last conversation context. .PARAMETER ExposedCmdLets Array of PowerShell command definitions available as tools to the model. .PARAMETER DontSpeak Switch to disable text-to-speech for AI responses. .PARAMETER DontSpeakThoughts Switch to disable text-to-speech for AI thought responses. .PARAMETER NoVOX Switch to disable silence detection for automatic recording stop. .PARAMETER UseDesktopAudioCapture Switch to use desktop audio capture instead of microphone input. .PARAMETER TemperatureResponse Temperature for controlling response randomness. Range: 0.0-1.0. Default: 0.01 .PARAMETER Language Language to detect in audio input. Default: "English" .PARAMETER CpuThreads Number of CPU threads to use. 0=auto. Default: 0 .PARAMETER SuppressRegex Regex pattern to suppress tokens from output. .PARAMETER AudioContextSize Size of the audio context window. .PARAMETER SilenceThreshold Threshold for silence detection. Range: 0.0-1.0. Default: 0.3 .PARAMETER LengthPenalty Penalty factor for response length. Range: 0-1 .PARAMETER EntropyThreshold Threshold for entropy in responses. Range: 0-1 .PARAMETER LogProbThreshold Threshold for log probability in responses. Range: 0-1 .PARAMETER NoSpeechThreshold Threshold for no-speech detection. Range: 0-1. Default: 0.1 .PARAMETER NoContext Switch to disable context usage in conversation. .PARAMETER WithBeamSearchSamplingStrategy Switch to enable beam search sampling strategy. .PARAMETER OnlyResponses Switch to suppress recognized text in output. .PARAMETER NoSessionCaching Switch to disable session caching. .PARAMETER ApiEndpoint API endpoint URL. Default: http://localhost:1234/v1/chat/completions .PARAMETER ApiKey API key for authentication. .EXAMPLE New-LLMAudioChat -Query "Tell me about PowerShell" ` -Model "*-tool-use" ` -Temperature 0.7 ` -MaxToken 4096 .EXAMPLE llmaudiochat "What's the weather?" -DontSpeak #> function New-LLMAudioChat { [CmdletBinding()] [Alias("llmaudiochat")] param( ######################################################################## [Parameter( ValueFromPipeline = $true, Mandatory = $false, Position = 0, HelpMessage = "Initial query text to send to the model" )] [AllowEmptyString()] [string] $query = "", ######################################################################## [Parameter( Mandatory = $false, Position = 1, HelpMessage = "The LM-Studio model to use" )] [string] $Model = "*-tool-use", ######################################################################## [Parameter( Mandatory = $false, Position = 2, HelpMessage = "The LM-Studio model identifier" )] [string] $ModelLMSGetIdentifier = "llama-3-groq-8b-tool-use", ######################################################################## [Parameter( Mandatory = $false, Position = 3, HelpMessage = "System instructions for the model")] [string] $Instructions, ######################################################################## [Parameter( Position = 4, Mandatory = $false, HelpMessage = "Array of file paths to attach")] [string[]] $Attachments = @(), ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Temperature for audio input recognition (0.0-1.0)")] [ValidateRange(0.0, 1.0)] [double] $AudioTemperature = 0.0, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Temperature for response randomness (0.0-1.0)")] [ValidateRange(0.0, 1.0)] [double] $Temperature = 0.0, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Maximum tokens in response (-1 for default)")] [Alias("MaxTokens")] [int] $MaxToken = 8192, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Show the LM Studio window")] [switch] $ShowWindow, ######################################################################## [Alias("ttl")] [Parameter( Mandatory = $false, HelpMessage = "Set a TTL (in seconds) for models loaded via API requests")] [int] $TTLSeconds = -1, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "How much to offload to the GPU. If `"off`", GPU offloading is disabled. If `"max`", all layers are offloaded to GPU. If a number between 0 and 1, that fraction of layers will be offloaded to the GPU. -1 = LM Studio will decide how much to offload to the GPU. -2 = Auto " )] [int]$Gpu = -1, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Force stop LM Studio before initialization" )] [switch]$Force, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Image detail level")] [ValidateSet("low", "medium", "high")] [string] $ImageDetail = "low", ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Include model's thoughts in output")] [switch] $IncludeThoughts, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Continue from last conversation")] [switch] $ContinueLast, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Array of PowerShell command definitions to use as tools")] [GenXdev.Helpers.ExposedCmdletDefinition[]] $ExposedCmdLets = @(), ########################################################################### [Parameter( HelpMessage = "Disable text-to-speech for AI responses", Mandatory = $false )] [switch] $DontSpeak, ########################################################################### [Parameter( HelpMessage = "Disable text-to-speech for AI thought responses", Mandatory = $false )] [switch] $DontSpeakThoughts, ################################################################################ [Parameter( Mandatory = $false, HelpMessage = "Don't use silence detection to automatically stop recording." )] [switch] $NoVOX, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Whether to use desktop audio capture instead of microphone input")] [switch] $UseDesktopAudioCapture, ################################################################################ [Parameter( Mandatory = $false, HelpMessage = "The temperature parameter for controlling the randomness of the response." )] [ValidateRange(0.0, 1.0)] [double] $TemperatureResponse = 0.01, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Sets the language to detect, defaults to 'English'")] [ValidateSet( "Afrikaans", "Akan", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani", "Basque", "Belarusian", "Bemba", "Bengali", "Bihari", "Bork, bork, bork!", "Bosnian", "Breton", "Bulgarian", "Cambodian", "Catalan", "Cherokee", "Chichewa", "Chinese (Simplified)", "Chinese (Traditional)", "Corsican", "Croatian", "Czech", "Danish", "Dutch", "Elmer Fudd", "English", "Esperanto", "Estonian", "Ewe", "Faroese", "Filipino", "Finnish", "French", "Frisian", "Ga", "Galician", "Georgian", "German", "Greek", "Guarani", "Gujarati", "Hacker", "Haitian Creole", "Hausa", "Hawaiian", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Igbo", "Indonesian", "Interlingua", "Irish", "Italian", "Japanese", "Javanese", "Kannada", "Kazakh", "Kinyarwanda", "Kirundi", "Klingon", "Kongo", "Korean", "Krio (Sierra Leone)", "Kurdish", "Kurdish (Soranî)", "Kyrgyz", "Laothian", "Latin", "Latvian", "Lingala", "Lithuanian", "Lozi", "Luganda", "Luo", "Macedonian", "Malagasy", "Malay", "Malayalam", "Maltese", "Maori", "Marathi", "Mauritian Creole", "Moldavian", "Mongolian", "Montenegrin", "Nepali", "Nigerian Pidgin", "Northern Sotho", "Norwegian", "Norwegian (Nynorsk)", "Occitan", "Oriya", "Oromo", "Pashto", "Persian", "Pirate", "Polish", "Portuguese (Brazil)", "Portuguese (Portugal)", "Punjabi", "Quechua", "Romanian", "Romansh", "Runyakitara", "Russian", "Scots Gaelic", "Serbian", "Serbo-Croatian", "Sesotho", "Setswana", "Seychellois Creole", "Shona", "Sindhi", "Sinhalese", "Slovak", "Slovenian", "Somali", "Spanish", "Spanish (Latin American)", "Sundanese", "Swahili", "Swedish", "Tajik", "Tamil", "Tatar", "Telugu", "Thai", "Tigrinya", "Tonga", "Tshiluba", "Tumbuka", "Turkish", "Turkmen", "Twi", "Uighur", "Ukrainian", "Urdu", "Uzbek", "Vietnamese", "Welsh", "Wolof", "Xhosa", "Yiddish", "Yoruba", "Zulu")] [string] $Language = "English", ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Number of CPU threads to use, defaults to 0 (auto)")] [int] $CpuThreads = 0, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Regex to suppress tokens from the output")] [string] $SuppressRegex = $null, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Size of the audio context")] [int] $AudioContextSize, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Maximum duration of the audio")] [ValidateRange(0.0, 1.0)] [float] $SilenceThreshold = 0.3, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Length penalty")] [ValidateRange(0, 1)] [float] $LengthPenalty, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Entropy threshold")] [ValidateRange(0, 1)] [float] $EntropyThreshold, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Log probability threshold")] [ValidateRange(0, 1)] [float] $LogProbThreshold, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "No speech threshold")] [ValidateRange(0, 1)] [float] $NoSpeechThreshold = 0.1, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Don't use context")] [switch] $NoContext, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Use beam search sampling strategy")] [switch] $WithBeamSearchSamplingStrategy, ################################################################################ [Parameter(Mandatory = $false, HelpMessage = "Whether to suppress reconized text in the output")] [switch] $OnlyResponses, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Don't store session in session cache")] [switch] $NoSessionCaching, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "Api endpoint url, defaults to http://localhost:1234/v1/chat/completions")] [string] $ApiEndpoint = $null, ######################################################################## [Parameter( Mandatory = $false, HelpMessage = "The API key to use for the request")] [string] $ApiKey = $null ######################################################################## ) begin { # initialize stopping flag for chat loop $stopping = $false Write-Verbose "Starting new audio LLM chat session with model $Model" # handle exposed cmdlets configuration Write-Verbose "Configuring exposed cmdlets..." if ($null -eq $ExposedCmdLets) { if ($ContinueLast -and $Global:LMStudioGlobalExposedCmdlets) { Write-Verbose "Using existing exposed cmdlets from last session" $ExposedCmdLets = $Global:LMStudioGlobalExposedCmdlets } else { Write-Verbose "Initializing default exposed cmdlets" # initialize default allowed PowerShell cmdlets $ExposedCmdLets = @( @{ Name = "Get-ChildItem" AllowedParams = @("Path=string", "Recurse=boolean", "Filter=array", "Include=array", "Exclude=array", "Force") OutputText = $false Confirm = $false JsonDepth = 3 }, @{ Name = "Find-Item" AllowedParams = @("SearchMask", "Pattern", "PassThru") OutputText = $false Confirm = $false JsonDepth = 3 }, @{ Name = "Get-Content" AllowedParams = @("Path=string") OutputText = $false Confirm = $false JsonDepth = 2 }, @{ Name = "Approve-NewTextFileContent" AllowedParams = @("ContentPath", "NewContent") OutputText = $false Confirm = $true JsonDepth = 2 }, @{ Name = "Invoke-WebRequest" AllowedParams = @("Uri=string", "Method=string", "Body", "ContentType=string", "Method=string", "UserAgent=string") OutputText = $false Confirm = $false JsonDepth = 4 }, @{ Name = "Invoke-RestMethod" AllowedParams = @("Uri=string", "Method=string", "Body", "ContentType=string", "Method=string", "UserAgent=string") OutputText = $false Confirm = $false JsonDepth = 99 }, @{ Name = "UTCNow" OutputText = $true Confirm = $false }, @{ Name = "Get-LMStudioModelList" OutputText = $false Confirm = $false JsonDepth = 2 }, @{ Name = "Get-LMStudioLoadedModelList" OutputText = $false Confirm = $false JsonDepth = 2 }, @{ Name = "Invoke-LLMQuery" AllowedParams = @("Query", "Model", "Instructions", "Attachments", "IncludeThoughts") ForcedParams = @(@{Name = "NoSessionCaching"; Value = $true }) OutputText = $false Confirm = $false JsonDepth = 99 } ) } } # cache exposed cmdlets if session caching is enabled if (-not $NoSessionCaching) { Write-Verbose "Caching exposed cmdlets for future sessions" $Global:LMStudioGlobalExposedCmdlets = $ExposedCmdLets } Write-Verbose "Successfully initialized with $($ExposedCmdLets.Count) exposed cmdlets" # ensure required parameters are properly set Write-Verbose "Validating and setting required parameters" # ensure required parameters exist if (-not $PSBoundParameters.ContainsKey("Model")) { $null = $PSBoundParameters.Add("Model", $Model) } if (-not $PSBoundParameters.ContainsKey("ModelLMSGetIdentifier") -and $PSBoundParameters.ContainsKey("Model")) { $null = $PSBoundParameters.Add("ModelLMSGetIdentifier", $ModelLMSGetIdentifier) } if (-not $PSBoundParameters.ContainsKey("ContinueLast")) { $null = $PSBoundParameters.Add("ContinueLast", $ContinueLast) } if ([string]::IsNullOrWhiteSpace($ApiEndpoint) -or $ApiEndpoint.Contains("localhost")) { $initializationParams = Copy-IdenticalParamValues -BoundParameters $PSBoundParameters ` -FunctionName 'GenXdev.AI\Initialize-LMStudioModel' ` -DefaultValues (Get-Variable -Scope Local -Name * -ErrorAction SilentlyContinue) $modelInfo = Initialize-LMStudioModel @initializationParams $Model = $modelInfo.identifier } if ($PSBoundParameters.ContainsKey("Force")) { $null = $PSBoundParameters.Remove("Force") $Force = $false } if ($PSBoundParameters.ContainsKey("ShowWindow")) { $null = $PSBoundParameters.Remove("ShowWindow") $ShowWindow = $false } if ($PSBoundParameters.ContainsKey("ChatOnce")) { $null = $PSBoundParameters.Remove("ChatOnce") } if (-not $PSBoundParameters.ContainsKey("ExposedCmdLets")) { $null = $PSBoundParameters.Add("ExposedCmdLets", $ExposedCmdLets); } $hadAQuery = -not [string]::IsNullOrEmpty($query) } process { [string] $recognizedText = $query while (-not $stopping) { # handle initial query vs subsequent voice input if ($hadAQuery) { Write-Verbose "Processing initial query: $query" $hadAQuery = $false $query = [string]::Empty if ($PSBoundParameters.ContainsKey("Query")) { $null = $PSBoundParameters.Remove("Query") } } else { Write-Host "Press any key to start recording or Q to quit" try { # prepare audio transcription parameters Write-Verbose "Preparing audio transcription parameters" $audioParams = Copy-IdenticalParamValues ` -BoundParameters $PSBoundParameters ` -FunctionName "GenXdev.AI\Start-AudioTranscription" ` -DefaultValues (Get-Variable -Scope Local -Name * ` -ErrorAction SilentlyContinue) # configure and execute audio recording Write-Verbose "Configuring audio settings" $audioParams.VOX = -not $NoVOX $audioParams.Temperature = $AudioTemperature $audioParams.ModelFilePath = Expand-Path "..\..\..\..\GenXdev.Local\" ` -CreateDirectory # process text input or start recording $recognizedText = $query ? $query.Trim() : [string]::Empty if ([string]::IsNullOrWhiteSpace($recognizedText)) { Write-Verbose "Starting audio recording and transcription" $recognizedText = Start-AudioTranscription @audioParams } } catch { # handle audio recording errors if ("$PSItem" -notlike "*aborted*") { Write-Error $PSItem } Write-Verbose "Audio transcription failed or was aborted" $query = [string]::Empty $recognizedText = [string]::Empty continue } } # process recognized input if not empty if (-not [string]::IsNullOrWhiteSpace($recognizedText)) { $question = $recognizedText Write-Verbose "Processing recognized input: $question" # prepare LM Studio query parameters Write-Verbose "Preparing LM Studio parameters" $invokeLMStudioParams = Copy-IdenticalParamValues ` -BoundParameters $PSBoundParameters ` -FunctionName "GenXdev.AI\New-LLMTextChat" ` -DefaultValues (Get-Variable -Scope Local -Name * ` -ErrorAction SilentlyContinue) # configure and execute LM Studio query Write-Verbose "Configuring LM Studio query parameters" $invokeLMStudioParams.Query = $question $invokeLMSTudioParams.Speak = -not $DontSpeak $invokeLMStudioParams.SpeakThoughts = -not $DontSpeakThoughts $invokeLMStudioParams.ChatOnce = $true Write-Verbose "Executing LM Studio query" $answer = New-LLMTextChat @invokeLMStudioParams # display formatted response if ($OnlyResponses) { Write-Host "$answer" -ForegroundColor Green } else { Write-Host "<< $answer" -ForegroundColor Green } Write-Host "Press any key to interrupt and start recording or Q to quit" } else { Write-Host "Too short or only silence recorded`r`n" } # monitor for key presses during speech output Write-Verbose "Monitoring for key presses during speech output" $continueWaiting = $true while ($continueWaiting -and (Get-IsSpeaking)) { while ([Console]::KeyAvailable) { $key = [Console]::ReadKey().Key [System.Console]::Write("`e[1G`e[2K") if ($key -eq [ConsoleKey]::Q) { sst Write-Host "---------------" $continueWaiting = $false $stopping = $true return } else { $continueWaiting = $false break } } $null = Start-Sleep -Milliseconds 100 } # clear previous prompt [System.Console]::Write("`e[1A`e[2K") sst Write-Host "---------------" } } end { Write-Verbose "Audio chat session completed" } } ################################################################################ |