TisaneWeb.psm1
#Region '.\Private\Save-TisaneSettings.ps1' 0 Function Save-TisaneSettings([String] $settingName, $settingValue){ [Environment]::SetEnvironmentVariable($settingName, $settingValue, 'User') } #EndRegion '.\Private\Save-TisaneSettings.ps1' 4 #Region '.\Public\Excel-LawWeb.ps1' 0 function Excel-LawWeb{ [CmdletBinding()] Param( [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="Tisane API key: ")][String] $APIkey, # from https://dev.tisane.ai/developer [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="Language code: ")][String] $languageCode, # assuming the spreadsheet is monolingual [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="Spreadsheet path: ")][String] $path, # for both the input and the output [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="Spreadsheet local name: ")][String] $filename # input spreadsheet ) # if (-not($path -like '*\')) { # $path = $path + "\" # } # Replace backslashes with forward slashes if they exist $path = $path -replace '\\', '/' # Add a forward slash to the end of the path if it's missing if (-not($path -match '*/')) { $path = $path + '/' } $SPREADSHEET_PATHNAME = "$path$filename" $outFilename = $path + 'out_' + $filename $ROW_COUNT_IN_SPREADSHEET = 1000 ## overcome the HTTPS error: https://stackoverflow.com/questions/11696944/powershell-v3-invoke-webrequest-https-error # If (-not ("TrustAllCertsPolicy" -as [type])) { # Add-Type @" # using System.Net; # using System.Security.Cryptography.X509Certificates; # public class TrustAllCertsPolicy : ICertificatePolicy { # public bool CheckValidationResult( # ServicePoint srvPoint, X509Certificate certificate, # WebRequest request, int certificateProblem) { # return true; # } # } # "@ # } # [System.Net.ServicePointManager]::CertificatePolicy = New-Object TrustAllCertsPolicy $header = @{} $header.Add('Ocp-Apim-Subscription-Key', "$APIkey") $Excel = New-Object -ComObject Excel.Application $Workbook = $Excel.Workbooks.Open($SPREADSHEET_PATHNAME) $srcSheet = $Workbook.Sheets.Item(1) $startLine = 2 $endLine = $ROW_COUNT_IN_SPREADSHEET $outLine = 2 For ($i=$startLine; $i -le $endLine; $i++) { $content = $srcSheet.Cells.Item($i,1).Text if (-not $content) {continue} $pct = (($i - $startLine) / ($endLine - $startLine)) * 100 Write-Progress -Activity "[$i] $content" -Status "$pct% complete" -PercentComplete $pct $inJsonBody = '{"language": "' + $languageCode + '", "content": "' + $content + '", "settings": {"deterministic": true, "format": "dialogue", "sentiment": false, "snippets":true, "entities": true, "topic_standard":"native", "optimize_topics":true}}' $parsedTisane = Invoke-RestMethod -Uri "https://api.tisane.ai/parse" -Method POST -Headers $header -Body $inJsonBody -UseBasicParsing -SkipCertificateCheck $crimeDomain = '' $criminalActivity = '' $personalAttacks = '' $hateSpeech = '' $contacts = '' $contactDetails = '' $sexualAdvances = '' $people = '' $locations = '' $time_ranges = '' $dates = '' $times = '' $files = '' $phones = '' $orgs = '' $software = '' if ($parsedTisane.abuse) { $parsedTisane.abuse | Foreach-Object { $abuseText = $_.text $abuseTags = $_.tags switch ($_.type) { 'criminal_activity' { $crimePrefix = '' if ($abuseTags) { $abuseTags | Foreach-Object { if ($_ -ne 'addressee' -or $_ -ne 'quantitative') { if ($crimePrefix) { $crimePrefix = $crimePrefix + '/' + $_ } else { $crimePrefix = $_ } switch ($_) { 'scam' { $crimeDomain = "fraud 🤥" } 'soft_drug' { $crimeDomain = "drugs 🌿" } 'hard_drug' { $crimeDomain = "drugs 💉" } 'medication' { $crimeDomain = "drugs 💊" } 'death' { $crimeDomain = "death 💀" } 'data' { $crimeDomain = "identity and data theft 💳" } } } } } if (-not $crimeDomain -and $parsedTisane.topics) { $parsedTisane.topics | Foreach-Object { switch ($_) { 'narcotic' { $crimeDomain = "drugs 💉" } 'drug' { $crimeDomain = "drugs 💉" } 'soft drug' { $crimeDomain = "drugs 🌿" } 'hard drug' { $crimeDomain = "drugs 💉" } 'medication' { $crimeDomain = "drugs 💊" } 'threat' {$crimeDomain = "threat 👿"} 'planning' {$crimeDomain = "planning ✍"} 'sourcing' {$crimeDomain = "procurement 📰"} 'promotion' {$crimeDomain = "promotion 📢"} 'child abuse' {$crimeDomain = "child abuse 🚸"} 'animal' {$crimeDomain = "wildlife and poaching 🦏"} 'identity theft' {$crimeDomain = "identity and data theft 💳"} 'credit card' {$crimeDomain = "identity and data theft 💳"} 'firearm' {$crimeDomain = "firearms 🔫"} 'cryptocurrency' {$crimeDomain = "cryptocurrency ₿"} 'fraud' {$crimeDomain = "fraud 🤥"} 'explosive' {$crimeDomain = "explosives 💣"} 'explosive device' {$crimeDomain = "explosives 💣"} } } } if ($crimePrefix -and $criminalActivity.IndexOf($crimePrefix) -lt 0) { $criminalActivity = $criminalActivity + ' [' + $crimePrefix + '] ' + $abuseText } else { $criminalActivity = $criminalActivity + ' ' + $abuseText } } 'data' { $criminalActivity = $criminalActivity + ' ' + $abuseText $crimeDomain = $crimeDomain + " identity and data theft 💳" } 'personal_attack' { $personalAttacks = $personalAttacks + " " + $abuseText } 'bigotry' { $hateSpeech = $hateSpeech + " " + $abuseText } 'sexual_advances' { $sexualAdvances = $sexualAdvances + " " + $abuseText } 'external_contacts' { $contacts = $contacts + " " + $abuseText } } } } if ($parsedTisane.entities_summary) { $parsedTisane.entities_summary | Foreach-Object { if ($_.type -eq 'software' -or $_.type[0] -eq 'software' -and $_.type[1] -ne 'website' -and $_.type[2] -ne 'website' -or $_.type[1] -eq 'software' -and $_.type[0] -ne 'website' -and $_.type[2] -ne 'website') { if ($software) { $software = $software + ' / ' + $_.name } else { $software = $_.name } } else { if ($_.type -eq 'place' -or $_.type[0] -eq 'place' -or $_.type[1] -eq 'place') { if ($locations) { $locations = $locations + ' / ' + $_.name } else { $locations = $_.name } } else { if ($_.type -eq 'organization' -or $_.type[0] -eq 'organization' -or $_.type[1] -eq 'organization') { if ($orgs) { $orgs = $orgs + ' / ' + $_.name } else { $orgs = $_.name } } else { if ($_.type -eq 'person' -or $_.type -eq 'username') { if ($people) { $people = $people + ' / ' + $_.name } else { $people = $_.name } } else { if ($_.type -eq 'email' -or $_.type -eq 'username') { if ($contactDetails) { $contactDetails = $contactDetails + ' / ' + $_.name } else { $contactDetails = $_.name } } else { $name = $_.name switch ($_.type) { 'crypto' { if (-not($crimeDomain)) { $crimeDomain = "cryptocurrency ₿" } } 'time_range' { if ($time_ranges) { $time_ranges = $time_ranges + ' / ' + $name } else { $time_ranges = $name } } 'date' { if ($dates) { $dates = $dates + ' / ' + $name } else { $dates = $name } } 'time' { if ($times) { $times = $times + ' / ' + $name } else { $times = $name } } 'file' { if ($files) { $files = $files + ' / ' + $name } else { $files = $name } } 'phone' { if ($phones) { $phones = $phones + ' / ' + $name } else { $phones = $name } } } } } } } } } } $srcSheet.Cells.Item($i,2).Value = $criminalActivity $srcSheet.Cells.Item($i,3).Value = $crimeDomain $srcSheet.Cells.Item($i,4).Value = $personalAttacks $srcSheet.Cells.Item($i,5).Value = $hateSpeech $srcSheet.Cells.Item($i,6).Value = $sexualAdvances $srcSheet.Cells.Item($i,7).Value = $contacts $srcSheet.Cells.Item($i,8).Value = $people $srcSheet.Cells.Item($i,9).Value = $orgs $srcSheet.Cells.Item($i,10).Value = $software $srcSheet.Cells.Item($i,11).Value = $locations $srcSheet.Cells.Item($i,12).Value = $time_ranges $srcSheet.Cells.Item($i,13).Value = $dates $srcSheet.Cells.Item($i,14).Value = $times $srcSheet.Cells.Item($i,15).Value = $files $srcSheet.Cells.Item($i,16).Value = $phones } Write-Progress -Activity "Almost done" -Status "Saving the spreadsheet" $Workbook.SaveAs($outFilename) $workbook.Close($false) [void][System.Runtime.InteropServices.Marshal]::ReleaseComObject([System.__ComObject]$Excel) [gc]::Collect() [gc]::WaitForPendingFinalizers() Remove-Variable excel -ErrorAction SilentlyContinue } #EndRegion '.\Public\Excel-LawWeb.ps1' 280 #Region '.\Public\Generate-FamilyCleanupTestCSV.ps1' 0 Function Generate-FamilyCleanupTestCSV{ [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="comma-delimited list of family IDs: ")][int[]] $familyIDs, [Parameter(Mandatory = $true, HelpMessage="comma-delimited list of language codes: ")][String[]] $languages, [Parameter(Mandatory = $true, HelpMessage="Spreadsheet path: ")][int[]] $noiseFamilyIDs, [Parameter(Mandatory = $false, HelpMessage="base url to tisane default (https://api.tisane.ai)")][String] $baseUrl = "https://api.tisane.ai" ) $tisaneApiKey = Set-TisaneApiKey $headers = @{ "Content-Type" = "application/json;charset=utf-8"; "Ocp-Apim-Subscription-Key" = $tisaneApiKey } foreach ($language in $languages) { $csvObjects = @() $allNoiseLexemes = @{} Write-Host "Fetching data for $language families" -ForegroundColor Green foreach ($noiseFamilyID in $noiseFamilyIDs) { $noiseUrl = "$baseUrl/lm/inflections?language=$language&family=$noiseFamilyID" $noiseResponse = Invoke-WebRequest -Uri $noiseUrl -Headers $headers -ContentType 'application/json; charset=utf-8' $noiseResponse = [System.Text.Encoding]::UTF8.GetString($noiseResponse.Content) $noiseLexemes = @(($noiseResponse | ConvertFrom-Json)| ForEach-Object { $_[0].lemma}) # $allNoiseLexemes += $noiseLexemes $allNoiseLexemes[$noiseFamilyID] = $noiseLexemes } $i=0 foreach ($familyID in $familyIDs) { $pct = $i/$familyIds.Length *100 Write-Progress -Activity "Processing family [$familyID]" -Status "$pct%" -PercentComplete $pct $i++ $familyUrl = "$baseUrl/lm/family?id=$familyID" $familyResponse = Invoke-WebRequest -Uri $familyUrl -Headers $headers -ContentType 'application/json; charset=utf-8' $familyResponse = [System.Text.Encoding]::UTF8.GetString($familyResponse.Content) | ConvertFrom-Json $lexemesUrl = "$baseUrl/lm/inflections?language=$language&family=$familyID" $lexemesResponse = Invoke-WebRequest -Uri $lexemesUrl -Headers $headers -ContentType 'application/json; charset=utf-8' $lexemesResponse = [System.Text.Encoding]::UTF8.GetString($lexemesResponse.Content) | ConvertFrom-Json $lexemes = @($lexemesResponse | ForEach-Object { $_.lemma }) $noisyLexemes = $lexemes # $randomNoiseLexemes = Get-Random -InputObject $allNoiseLexemes -Count 4 $randomNoiseFamilyID = Get-Random -InputObject $noiseFamilyIDs $randomNoiseLexemes = $allNoiseLexemes[$randomNoiseFamilyID] $noisyLexemes += $randomNoiseLexemes $row = [PSCustomObject]@{ "Family_ID" = $familyID "Family_Description" = $familyResponse.description "Family_Definition" = $familyResponse.definition "Lexemes" = $lexemes -join ', ' "Noisy_List_of_Lexemes" = $noisyLexemes -join ', ' "GenAI_Response" = $null "Cleaned_GenAI_Response" = $null "Score_for_Original_Input" = $null "Score_for_Noisy_Input" = $null } $csvObjects += $row } $csvObjects | Export-Csv -Path "TisaneFamilyCleanupTest_$language.csv" -NoTypeInformation -Encoding UTF8 } } # Generate-FamilyCleanupTestCSV -familyIDs 43344,55783,113986,115619,60106,112360,125501,109151 -languages 'hi','en','fr' -noiseFamilyIDs 9644,96951,51562,22924 #EndRegion '.\Public\Generate-FamilyCleanupTestCSV.ps1' 64 #Region '.\Public\Rest-TisaneSettings.ps1' 0 # Function Get-TisaneSettings($settingName, $defaultValue) { # $settingValue = [Environment]::GetEnvironmentVariable($settingName, 'User') # if ([string]::IsNullOrEmpty($settingValue)) { # return $defaultValue # } else { # return $settingValue # } # } Function Reset-TisaneSettings(){ Save-TisaneSettings -settingName 'TisaneApiKey' -settingValue $null } #EndRegion '.\Public\Rest-TisaneSettings.ps1' 14 #Region '.\Public\Set-TisaneApiKey.ps1' 0 Function Set-TisaneApiKey(){ $baseUrl = "https://api.tisane.ai" $TisaneApiKey = [Environment]::GetEnvironmentVariable('TisaneApiKey', 'User')#Get-TisaneSettings -settingName 'TisaneApiKey' -defaultValue '' if ([string]::IsNullOrEmpty($TisaneApiKey)){ $TisaneApiKey = Read-Host -Prompt 'Please enter your Tisane API Key' Validate-ApiKey -apiKey $TisaneApiKey -baseUrl $baseUrl Save-TisaneSettings -settingName 'TisaneApiKey' -settingValue $TisaneApiKey return $TisaneApiKey } Validate-ApiKey -apiKey $TisaneApiKey -baseUrl $baseUrl return $TisaneApiKey } #EndRegion '.\Public\Set-TisaneApiKey.ps1' 13 #Region '.\Public\temp.ps1' 0 Function Generate-FamilyCleanupTestCSV{ [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="comma-delimited list of family IDs: ")][int[]] $familyIDs, [Parameter(Mandatory = $true, HelpMessage="comma-delimited list of language codes: ")][String[]] $languages, [Parameter(Mandatory = $true, HelpMessage="Spreadsheet path: ")][int[]] $noiseFamilyIDs, [Parameter(Mandatory = $false, HelpMessage="base url to tisane default (https://api.tisane.ai)")][String] $baseUrl = "https://api.tisane.ai" ) $tisaneApiKey = Set-TisaneApiKey $headers = @{ "Content-Type" = "application/json;charset=utf-8"; "Ocp-Apim-Subscription-Key" = $tisaneApiKey } foreach ($language in $languages) { $csvObjects = @() $allNoiseLexemes = @{} Write-Host "Fetching data for $language families" foreach ($noiseFamilyID in $noiseFamilyIDs) { $noiseUrl = "$baseUrl/lm/inflections?language=$language&family=$noiseFamilyID" $noiseResponse = Invoke-WebRequest -Uri $noiseUrl -Headers $headers -ContentType 'application/json; charset=utf-8' $noiseResponse = [System.Text.Encoding]::UTF8.GetString($noiseResponse.Content) $noiseLexemes = @(($noiseResponse | ConvertFrom-Json)| ForEach-Object { $_[0].lemma}) $allNoiseLexemes[$noiseFamilyID] = $noiseLexemes } $i=0 foreach ($familyID in $familyIDs) { $pct = $i/$familyIds.Length *100 Write-Progress -Activity "Processing family [$familyID]" $familyUrl = "$baseUrl/lm/family?id=$familyID" $familyResponse = Invoke-WebRequest -Uri $familyUrl -Headers $headers -ContentType 'application/json; charset=utf-8' $familyResponse = [System.Text.Encoding]::UTF8.GetString($familyResponse.Content) | ConvertFrom-Json $lexemesUrl = "$baseUrl/lm/inflections?language=$language&family=$familyID" $lexemesResponse = Invoke-WebRequest -Uri $lexemesUrl -Headers $headers -ContentType 'application/json; charset=utf-8' $lexemesResponse = [System.Text.Encoding]::UTF8.GetString($lexemesResponse.Content) | ConvertFrom-Json $lexemes = @($lexemesResponse | ForEach-Object { $_.lemma }) $noisyLexemes = $lexemes $randomNoiseFamilyID = Get-Random -InputObject $noiseFamilyIDs $randomNoiseLexemes = $allNoiseLexemes[$randomNoiseFamilyID] $noisyLexemes += $randomNoiseLexemes $row = [PSCustomObject]@{ "Family_ID" = $familyID "Family_Description" = $familyResponse.description "Family_Definition" = $familyResponse.definition "Lexemes" = $lexemes -join ', ' "Noisy_List_of_Lexemes" = $noisyLexemes -join ', ' "GenAI_Response" = $null "Cleaned_GenAI_Response" = $null "Score_for_Original_Input" = $null "Score_for_Noisy_Input" = $null } $csvObjects += $row } $csvObjects | Export-Csv -Path "TisaneFamilyCleanupTest_$language.csv" -NoTypeInformation -Encoding UTF8 } } Generate-FamilyCleanupTestCSV -familyIDs 43344,55783,113986,115619,60106,112360,125501,109151 -languages 'hi','en','fr' -noiseFamilyIDs 9644,96951,51562,22924 #EndRegion '.\Public\temp.ps1' 61 #Region '.\Public\Tisane-Web.ps1' 0 ## ============================================================================= ## ## This script's purpose is to send a request to Tisane API ## ## ============================================================================= ## overcome the HTTPS error: https://stackoverflow.com/questions/11696944/powershell-v3-invoke-webrequest-https-error function Tisane-Web{ # If (-not ("TrustAllCertsPolicy" -as [type])) { # Add-Type @" # using System.Net; # using System.Security.Cryptography.X509Certificates; # public class TrustAllCertsPolicy : ICertificatePolicy { # public bool CheckValidationResult( # ServicePoint srvPoint, X509Certificate certificate, # WebRequest request, int certificateProblem) { # return true; # } # } # "@ # } # [System.Net.ServicePointManager]::CertificatePolicy = New-Object TrustAllCertsPolicy $header = @{} $header.Add('Ocp-Apim-Subscription-Key', '1b6f39263a1048eaa9975a16bcfe0ccb') $tisaneResponse = Invoke-WebRequest -Uri "https://api.tisane.ai/parse" -Method POST -Headers $header -Body '{"language": "en", "content": "buy ice", "settings": {"deterministic": true, "format": "dialogue", "parses": false, "sentiment": false, "words": false, "snippets":true, "entities": true, "topic_standard":"native", "optimize_topics":true}}' -UseBasicParsing -SkipCertificateCheck $inJson = ConvertFrom-Json -InputObject $tisaneResponse $tisaneResponse } #EndRegion '.\Public\Tisane-Web.ps1' 34 #Region '.\Public\Validate-ApiKey.ps1' 0 Function Validate-ApiKey { Param( [Parameter(Mandatory = $true)][String] $apiKey, [Parameter(Mandatory = $true)][String] $baseUrl ) $headers = @{ "Content-Type" = "application/json;charset=utf-8"; "Ocp-Apim-Subscription-Key" = $apiKey } $languageUrl = "$baseUrl/languages" $familyUrl = "$baseUrl/lm/family?id=43344" try { $response = Invoke-WebRequest -Uri $languageUrl -Headers $headers -Method Get if ($response.StatusCode -ne 200) { Throw "Invalid API key. Error message: $($response.Content)" } } catch { Throw "Invalid API key. Error message: $($_.Exception.Message)" } try { $response = Invoke-WebRequest -Uri $familyUrl -Headers $headers -Method Get if ($response.StatusCode -ne 200) { Throw "API key does not have access to LLMS. Error message: $($response.Content)" } } catch { Throw " Error message: $($_.Exception.Message)" } } #EndRegion '.\Public\Validate-ApiKey.ps1' 33 |