SpeechToText_Example.ps1
<#PSScriptInfo .VERSION 1.0.0 .GUID fab4bdf6-b198-4bf4-9eac-87c18122fdac .AUTHOR mikko@lavento.com .COMPANYNAME .COPYRIGHT .TAGS Google, Speech, GoogleCloud, API, Speech-to-text .LICENSEURI .PROJECTURI .ICONURI .EXTERNALMODULEDEPENDENCIES .REQUIREDSCRIPTS .EXTERNALSCRIPTDEPENDENCIES .RELEASENOTES #> <# .DESCRIPTION Speech to Text API example using Google's cloud. pre-requisites: assumption that these are done: https://cloud.google.com/speech-to-text/docs/quickstart-client-libraries API shorter than minute (not used in this example). Strangely experimenting under 30 second audios were accepted, not 30-60sec audios. https://speech.googleapis.com/v1/speech longer than minute audio file https://speech.googleapis.com/v1/speech:longrunningrecognize xxxxxxx gcloud commands for debugging reason xxxxxxx Problem: Using rest-api via powershell, the errors you get, tells nothing: 400 bad request. Debugging with gcloud console gives exact errors like no access to bucket etc. list what account you are using basically should be the same than you are using in your .json key file gcloud auth list setting the usable account speechtotext@speechtotext-<numbers>.iam.gserviceaccount.com gcloud auth activate-service-account speechtotext@speechtotext-<numbers>.iam.gserviceaccount.com --key-file="C:\Skriptit\SpeechToText\speechtotext-<numbers>-72583c2223fe.json" Testing the API helpful site: https://cloud.google.com/speech-to-text/docs/async-recognize speech-async-recognize-gcs-gcloud gcloud ml speech recognize-long-running gs://<yourbucket>_bucket/Test.flac --language-code=en-US --async Getting th status of the work gcloud ml speech operations describe 591271978XXXX801384 gcloud ml speech operations wait 1727881XXXX9022087 #> Param() # Speech to Text example using Google's Speech to Text API # 18.9.2019 M.Lavento $outputfile = "C:\Skriptit\SpeechToText\outputtext.txt" #creds for the project and bucket $env:GOOGLE_APPLICATION_CREDENTIALS="C:\Skriptit\SpeechToText\speechtotext-<numbers>-72583c2223fe.json" $cred = gcloud auth application-default print-access-token $headers = @{ Authorization = "Bearer $cred" } $body = @{ audio = @{ uri = 'gs://<your_bucket>/audio.flac' } config = @{ languageCode = 'en-US' } } #Build JSON body for the request $jbody = ConvertTo-Json ($body) $result = Invoke-WebRequest -Method Post -Headers $headers -ContentType: "application/json; charset=utf-8" -Body $jbody -Uri "https://speech.googleapis.com/v1/speech:longrunningrecognize" Write-Host "Processing.....Webrequest code:" $result.StatusDescription #We get JSON as an answer $JobNameFromJSON = $result | Select-Object -Expand Content | ConvertFrom-Json #query the status of the job $jobnumber = $JobNameFromJSON.name $joburi = "https://speech.googleapis.com/v1/operations/$jobnumber" #Loop to wait audio being processed do { #wait minute intervals to query the status Start-Sleep -Seconds 60 $Jobstatus = Invoke-WebRequest -Method Get -Headers $headers -ContentType: "application/json; charset=utf-8" -Uri $joburi #We get JSON as an answer $JobStatusFromJSON = $Jobstatus| Select-Object -Expand Content | ConvertFrom-Json #percent complete $JobStatusPercentage = $JobStatusFromJSON.metadata.progressPercent write-host "Job progress percentage: $JobStatusPercentage" } until ($JobStatusPercentage -eq "100") #We are interest about response $response = $JobStatus | ConvertFrom-Json $response.response.results | Out-File $outputfile -Force Invoke-Item $outputfile |