#Region '.\Private\Add-Emoji.ps1' 0 Function Add-Emoji { Param( [Parameter(Mandatory=$true, HelpMessage="Specify the emoji")][string]$emoji, [Parameter(Mandatory=$false, HelpMessage="Specify the long name")][string]$longName, [Parameter(Mandatory=$false, HelpMessage="Specify the short name")][string]$shortName ) $properNoun = '' $hypernym = 0 if ($shortName -like ':flag-*') { $longName = [cultureinfo]::GetCultureInfo("en-US").TextInfo.ToTitleCase($longName) $properNoun = '1' $hypernym = 54389 } if ($longName -like 'woman*') { $assign = "5=F" $longName = $longName -replace "woman" } if ($longName -like 'man*') { $assign = "5=M" $longName = $longName -replace "man" } if ($longName -like '*:*') { $longName = $longName.Substring(0, $longName.IndexOf(':')) } if ($longName -like '*symbol') { $longName = $longName -replace "symbol" } if ($longName -like '*sign') { $longName = $longName -replace "sign" } if ($longName -like '*with*') { $longName = $longName.Substring(0, $longName.IndexOf('with')) } if ($longName -like 'squared*') { $longName = $longName -replace "squared" } if ($longName -like 'open*') { $longName = $longName -replace "open" } if ($longName -like 'closed*') { $longName = $longName -replace "closed"} if ($longName -like 'oncoming*') { $longName = $longName -replace "oncoming" } if ($longName -like 'large*') { $longName = $longName -replace "large" } if ($longName -like 'small*') { $longName = $longName -replace "small" } if ($longName -like '*orange*' ) { $longName = $longName.Substring($longName.IndexOf('orange')) } if ($longName -like '*yellow*') { $longName = $longName.Substring($longName.IndexOf('yellow')) } if ($longName -like '*green*') { $longName = $longName.Substring($longName.IndexOf('green')) } if ($longName -like '*purple*') { $longName = $longName.Substring($longName.IndexOf('purple')) } if ($longName -like '*brown*') { $longName = $longName.Substring($longName.IndexOf('brown')) } if ($longName -like '*face') { $longName = $longName -replace "face" } if ($longName -like '*hand') { $longName = $longName -replace "hand"} if ($longName -like '*ing') { $longName = $longName -replace "ing" $feature = 'VERB' } $longName = [System.Web.HttpUtility]::UrlEncode($longName.Trim()) $shortName = [System.Web.HttpUtility]::UrlEncode($shortName.Trim()) $emoji = $emoji.Trim() $encodedAssign = [System.Web.HttpUtility]::UrlEncode($assign) $uri = "$global:lampHost/import?lexeme=$emoji&reference=$longName&proper=$properNoun&hypernym=$hypernym&source=$source&orgId=$shortName&assign=$assign" #$uri $response = Invoke-WebRequest -Uri $uri -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing #$uri if ($assign) { $assign = "51=EXPL,$assign" } else { $assign = "51=EXPL" } $encodedAssign = [System.Web.HttpUtility]::UrlEncode($assign) $uri = "$global:lampHost/import?lexeme=$shortName&reference=$longName&proper=$properNoun&hypernym=$hypernym&source=$source&orgId=$shortName&assign=$encodedAssign" $uri $response = Invoke-WebRequest -Uri $uri -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing #$response } #EndRegion '.\Private\Add-Emoji.ps1' 63 #Region '.\Private\Add-Lexeme.ps1' 0 Function Add-Lexeme { Param( [Parameter(Mandatory=$true, HelpMessage="Specify the family ID")][int]$familyId, [Parameter(Mandatory=$true, HelpMessage="Specify the word")][string]$word ) Write-Host "Creating lexeme $word and linking it to the newly created family $familyId`r`n" -ForegroundColor Green # 1. Insert a new lexeme $newLexeme = @{ id=0 lemma=$word stem=$word } $lexemeJson = ConvertTo-Json -InputObject $newLexeme $response = Invoke-WebRequest -Uri "$global:lampHost/lexeme" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($lexemeJson)) -UseBasicParsing #'Lexeme insert response: ' + $response $parsedResponse = ConvertFrom-Json -InputObject $response if ($parsedResponse.success -and $parsedResponse.id) { $lexemeId = $parsedResponse.id # 2. Link to the family $whatever = Invoke-WebRequest -Uri "$global:lampHost/lexemeFamilies?lexeme=$lexemeId&families=$familyId" -Method POST -Headers $global:authorizationToken -UseBasicParsing # 3. Tag $newLexeme.id = $lexemeId $lexemeJson = ConvertTo-Json -InputObject $newLexeme #"Tagging: " + $lexemeJson $taggedLexeme = Invoke-RestMethod -Uri "$global:lampHost/tagLemma" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($lexemeJson)) -UseBasicParsing #'Tagged lexeme: ' + $taggedLexeme #$parsedtaggedLexeme = ConvertFrom-Json -InputObject $taggedLexeme # 4. Update from the tag results $newLexeme.grammar = $taggedLexeme.grammar $newLexeme.stem = $taggedLexeme.stem #"Updating: " + $lexemeJson $response = Invoke-RestMethod -Uri "$global:lampHost/lexeme?id=$lexemeId" -Method GET -Headers $global:authorizationToken -UseBasicParsing #'Updating lexeme: ' + $response $newLexeme.requestId = $response.requestId $lexemeJson = ConvertTo-Json -InputObject $newLexeme $response = Invoke-RestMethod -Uri "$global:lampHost/lexeme" -Method PUT -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($lexemeJson)) -UseBasicParsing #$response } } #EndRegion '.\Private\Add-Lexeme.ps1' 42 #Region '.\Private\Convert-UposToPenn.ps1' 0 function Convert-UposToPenn{ Param( [Parameter(Mandatory = $true, HelpMessage="Comma sepereated string of tags")][System.Object[]]$tags ) $UniversalToPenn = @{ "ADJ" = "JJ" "ADP" = "IN" "ADV" = "RB" "CONJ" = "CC" "DET" = "DT" "NOUN" = "NN" "NUM" = "CD" "PRT" = "RP" "PRON" = "PRP" "VERB" = "VB" "." = "." "X" = "NN" "PART" = "RP" "SCONJ" = "IN" "V" = "VB" "VM" = "VB" "INTJ" = "UH" } $output = "" $tags = $tags.Split(",") foreach ($tag in $tags) { $tag=$tag.ToString().Trim() if ($UniversalToPenn.ContainsKey($tag)) { $output += $UniversalToPenn[$tag] + "," } else { $output += $tag + "," } } # Write-Host $output return $output.TrimEnd(",") } # $tags = "NOUN,NOUN,VERB,PUNCT" # Convert-UposToPenn -tags $tags #EndRegion '.\Private\Convert-UposToPenn.ps1' 40 #Region '.\Private\Make-OneRowOfCsv.ps1' 0 function Make-OneRowOfCsv{ Param( [Parameter(Mandatory=$true, HelpMessage="Family Id ")][string]$familyId, [Parameter(Mandatory=$true, HelpMessage="Suggested Expression ")][string]$suggestedExpression ) $response=Invoke-WebRequest -Uri "$global:lampHost/knowledgeGraph?type=id&arg=$familyId&basic=true" -Method GET -Headers $global:authorizationToken -UseBasicParsing if (-not ($response.Content -eq "[]")){ $response = ConvertFrom-Json $response.Content $description = $response.representativeLemma $definition = $response.definition } else { $description = "<family not found>" $definition = "<family not found>" } $csvObject = [PSCustomObject]@{ "description" = $description "definition" = $definition "familyId" = $familyId "suggested_expression" = $suggestedExpression } return $csvObject } # $c= Make-OneRowOfCsv -familyId 347 -suggestedExpression "how you" # Write-Host $c #EndRegion '.\Private\Make-OneRowOfCsv.ps1' 30 #Region '.\Private\Remove-Punctuations.ps1' 0 function Remove-Punctuations { param( [string]$rawString ) $result ="" for ($i = 0; $i -lt $rawString.Length; $i++) { $char = $rawString[$i] if (([System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($char) -eq "UppercaseLetter") -or ([System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($char) -eq "LowercaseLetter") -or ([System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($char) -eq "OtherLetter")) { $result = $rawString.Substring($i) break } } # $result = $rawString -Replace '^[^a-zA-Z0-9]+', '' # $result = $result + $rawString.Substring($result.Length) $result = $result -Replace '[?|!|.|…]','' # $result = $result -replace "^[^a-zA-Z]*", "" $result = $result.Trim() $result = $result.Substring(0,1).ToLower() + $result.Substring(1) return $result } # $result = Remove-Punctuations -rawString "Give me the book." # Write-Host $result #EndRegion '.\Private\Remove-Punctuations.ps1' 26 #Region '.\Public\Check-EnglishTranslation.ps1' 0 function Check-EnglishTranslation { <# .SYNOPSIS Checks the English translations in a CSV file against lexemes under given family in tisane. .DESCRIPTION The Check-EnglishTranslation function verifies the accuracy of English translations in a CSV file by comparing them with a Tisane web service. It retrieves the definitions of the corresponding family IDs and uses the Tisane web service to parse and validate the translated sentences. Any translations that do not belong to the expected family are considered incorrect. .PARAMETER inputCsv The path to the CSV file containing the translations. The file should include a column with the translated sentences. .PARAMETER outputCsv The path to the output CSV file where the results will be saved. .PARAMETER tisaneWebService The URL of the Tisane web service to be used for validation. .PARAMETER language (Optional) The ISO code of the target language from which to verify families. The default value is "en" for English. .EXAMPLE Check-EnglishTranslation -inputCsv "C:\translations.csv" -outputCsv "C:\validation_results.csv" -tisaneWebService "https://api.tisane.ai" -language "hi" This example checks the translations in the "C:\translations.csv" file using the Tisane web service located at "https://api.tisane.ai/parse". The validation results are saved to "C:\validation_results.csv". The translations are verified against the English language families. .NOTES - Make sure you have the necessary modules and permissions to execute the cmdlets used in this function. - This function requires an active internet connection to communicate with the Tisane web service. - Refer to the Tisane web service documentation for more information on its usage and limitations. #> [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Path to csv with transated column : ")][String] $inputCsv, [Parameter(Mandatory = $true, HelpMessage="Path to output csv file : ")][String]$outputCsv, [Parameter(Mandatory = $true, HelpMessage="ip for Tisane web service : ")][String]$tisaneWebService, [Parameter(Mandatory = $false, HelpMessage="iso code of traget language from where to verify families, default is en")][String]$language="en" ) Login-Lamp Set-LampLanguage -languageId 7 $df = Import-Csv $inputCsv -Encoding UTF8 $csvObjects = @() $i=0 foreach ($row in $df){ $pct = $i/ $df.Length *100 Write-Progress -Activity "Checking Lexeme [$lexemeId]" -Status "$pct%" -PercentComplete $pct $i ++ $lexemeId = $row.lexemeID $familyId = $row.familyId $lexeme = $row.word $translatedSentence = $row.englishTranslation.Trim() $language = $language if ([string]::IsNullOrEmpty($translatedSentence)){ continue } $definitionResponse=Invoke-RestMethod -Uri "$global:lampHost/knowledgeGraph?type=id&arg=$familyId&basic=true" -Method GET -Headers $global:authorizationToken -UseBasicParsing $definitionResponse = $definitionResponse.definition $body = @{ "language" = $language "content" = $translatedSentence "settings" = @{"words" = $true "deteministic" = $false} } | ConvertTo-Json $response = Invoke-RestMethod -Uri "$tisaneWebService/parse" -Method POST -ContentType 'application/json' -Body $body -UseBasicParsing $familyList = $response.sentence_list.words.family Write-Host $familyList if (-not($familyList.Contains([int]$familyId))) { Write-Host "bad lexeme : $lexemeId" -ForegroundColor Green $csvObject = [PSCustomObject]@{ "word" = $lexeme "lexemeID" = $lexemeId "familyId" = $familyId "definition" = $definitionResponse } # Write-Host $csvObject $csvObjects += $csvObject } } # Write-Host $csvObjects $csvObjects | Export-Csv $outputCsv -NoTypeInformation -Encoding UTF8 } # Check-EnglishTranslation -inputCsv "tsample.csv" -outputCsv "otsample.csv" -tisaneWebService #EndRegion '.\Public\Check-EnglishTranslation.ps1' 86 #Region '.\Public\Complete-Family.ps1' 0 Function Complete-Family{ Param( [Parameter(Mandatory = $true, HelpMessage="Family Id to process")][int] $familyId, [Parameter(Mandatory = $false, HelpMessage="Allow unverified family links")][bool] $allowUnverified = $false ) Login-Lamp $i = 1 $languageJSON = Invoke-RestMethod -Uri "$global:lampHost/languages" -Method GET -UseBasicParsing $allLanguageIds = $languageJSON.id $languageJSON | foreach { $languageId = $_.id $languageName = $_.englishName $pct = $i / $allLanguageIds.length * 100 Write-Progress -Activity "Processing $languageName" -Status "$pct% $lexemeId" -PercentComplete $pct $i ++ Set-LampLanguage -languageId $languageId Write-Host $languageName -ForegroundColor Magenta try{ $response = Invoke-RestMethod -Uri "$global:lampHost/detectMissingFamilyLinks?family=$familyId&allowUnverified=$allowUnverified" -Method POST -Headers $global:authorizationToken -UseBasicParsing $response }catch{ Write-Host "Family [$familyID] was not processed for language [$languageName]" -ForegroundColor RED Write-Host "$_" -ForegroundColor RED } } } # Complete-Family -familyId 125501 -allowUnverified $true # Complete-Family -familyId 98203 #EndRegion '.\Public\Complete-Family.ps1' 32 #Region '.\Public\Copy-InflectionPatterns.ps1' 0 ## ============================================================================= ## ## Copies a set of inflection patterns, replacing advanced criteria and / or strings ## ## ============================================================================= function Copy-InflectionPatterns { <# .SYNOPSIS Copies inflection patterns based on specified criteria and replaces text in the patterns. .DESCRIPTION The Copy-InflectionPatterns function copies inflection patterns based on specified criteria and replaces text in the patterns. It retrieves a list of inflection rules and applies the specified changes to create new inflection patterns. .PARAMETER languageId Specifies the language ID for which inflection patterns should be copied. This parameter is mandatory. .PARAMETER range Specifies the range ID for which inflection patterns should be copied. This parameter is mandatory. .PARAMETER oldCriteria Specifies an array of advanced criteria IDs to be replaced in the inflection patterns. This parameter is optional. .PARAMETER newCriteria Specifies an array of new advanced criteria IDs to replace the old criteria in the inflection patterns. This parameter is optional. .PARAMETER replaceRegexes Specifies an array of regular expressions to match and replace in the affix text of the inflection patterns. This parameter is optional. .PARAMETER replaceWith Specifies an array of replacement strings to be used for replacing the matched regexes in the affix text. This parameter is optional. .PARAMETER oldTrigger Specifies the old trigger ID to be replaced in the inflection patterns. This parameter is optional. .PARAMETER newTriggers Specifies an array of new trigger IDs to replace the old trigger ID in the inflection patterns. This parameter is optional. .EXAMPLE Copy-InflectionPatterns -languageId 1 -range 2 -oldCriteria 10,20 -newCriteria 100,200 This example copies inflection patterns for language ID 1 and range ID 2, replacing advanced criteria IDs 10 and 20 with 100 and 200 respectively. #> [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Language ID: ")][int] $languageId, [Parameter(Mandatory = $true, HelpMessage="Range ID: ")][int] $range, [Parameter(Mandatory = $false, HelpMessage="Advanced criteria to replace: ")][int[]] $oldCriteria, [Parameter(Mandatory = $false, HelpMessage="Advanced criteria to replace with: ")][int[]] $newCriteria, [Parameter(Mandatory = $false, HelpMessage="Replace strings: ")][String[]] $replaceRegexes, # an array of Tisane feature values [Parameter(Mandatory = $false, HelpMessage="Replace with: ")][String[]] $replaceWith, [Parameter(Mandatory = $false, HelpMessage="Old trigger: ")][int] $oldTrigger, [Parameter(Mandatory = $false, HelpMessage="New trigger: ")][int[]] $newTriggers ) if (-not($replaceRegexes -or $oldCriteria -or ($oldTrigger -gt 0 -and $newTriggers))) { Write-Host "Nothing to replace. Either supply an array of regexes/strings to replace (under replaceRegexes parameter, array of strings) or an array of criteria (under oldCriteria parameter, array of integers), or new triggers" -ForegroundColor Red Break } if ($replaceRegexes -and (-not($replaceWith) -or $replaceRegexes.Length -ne $replaceWith.Length)) { Write-Host "replaceRegexes and replaceWith must have the same number of elements" -ForegroundColor Red Break } if ($oldCriteria -and (-not($newCriteria) -or $oldCriteria.Length -ne $newCriteria.Length)) { Write-Host "oldCriteria and newCriteria must have the same number of elements" -ForegroundColor Red Break } Login-Lamp Set-LampLanguage -languageId $language $affixList = Invoke-RestMethod -Uri "$global:lampHost/affixList?tagging=false&arg=$range&type=range" -Headers $global:authorizationToken -UseBasicParsing ## create an array of inflection IDs to retrieve and copy $affixList | ForEach-Object { $_.inflectionRules | ForEach-Object { $currentInflectionPatternId = $_.inflectionRuleId #"Passsing $currentInflectionPatternId" $inflectionPattern = Invoke-RestMethod -Uri "$global:lampHost/inflectionRule?id=$currentInflectionPatternId" -Headers $global:authorizationToken -UseBasicParsing $changed = $false $targetTriggers = @() $targetTriggers += $inflectionPattern.triggerId if ($oldTrigger -eq $inflectionPattern.triggerId) { $inflectionPattern.triggerId = 0 # $newTrigger $targetTriggers = $newTriggers $changed = $true } $originalPatternId = $inflectionPattern.id $inflectionPattern.affixes | ForEach-Object { if ($oldCriteria) { $indexInLabelArray = [array]::IndexOf($oldCriteria, $_.advancedCriteriaId) #"Index: $indexInLabelArray" #"Advanced criteria: " + $_.advancedCriteriaId if ($indexInLabelArray -gt -1) { $_.advancedCriteriaId = $newCriteria[$indexInLabelArray] $changedAdvancedCriteriaId = $_.advancedCriteriaId $changed = $true Write-Host "New advanced criteria in inflection pattern $originalPatternId : $changedAdvancedCriteriaId" -ForegroundColor Green } } if ($replaceRegexes) { For ($i = 0; $i -lt $replaceRegexes.length; $i++) { $currentRegex = $replaceRegexes[$i] $currentReplacement = $replaceWith[$i] $affixText = $_.affix #"Replacing $currentRegex with $currentReplacement in $affixText" $_.affix = $affixText -replace $currentRegex, $currentReplacement if ($affixText -ne $_.affix) { $affixText = $_.affix $changed = $true Write-Host "New affix text in inflection pattern $originalPatternId : $affixText" -ForegroundColor Green } } } } if ($changed) { "New pattern will be created based on pattern $originalPatternId" $inflectionPattern.id = 0 ## when adding new inflection patterns, it has to be set to 0 $targetTriggers | ForEach-Object { $inflectionPattern.triggerId = $_ if ($newTriggers) { $currentTrigger = $_ Write-Host "Copying to trigger $currentTrigger" -ForegroundColor Green $inflectionPattern } ##$inflectionPattern | Get-Member $outJson = $inflectionPattern | ConvertTo-Json -Depth 30 #$outJson ##Read-Host -Prompt "Press Enter to continue" ## Non-ASCII strings have issues. Workaround from here: https://stackoverflow.com/questions/15290185/invoke-webrequest-issue-with-special-characters-in-json $response = Invoke-WebRequest -Uri "$global:lampHost/inflectionRule" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($outJson)) -UseBasicParsing } } } } } #EndRegion '.\Public\Copy-InflectionPatterns.ps1' 137 #Region '.\Public\Edits-2CSV.ps1' 0 function Edits-2CSV { <# .SYNOPSIS Exports a list of edits to a CSV file. .DESCRIPTION The Edits-2CSV function retrieves a list of edits for a target user from a specified API endpoint and exports the information to a CSV file. The CSV file contains the transaction ID, action, data table, date/time, and lemma information for each edit. .PARAMETER csv Specifies the filename of the CSV file to be generated. .PARAMETER targetUser Specifies the target user for whom the edits should be retrieved. .EXAMPLE PS C:\> Edits-2CSV -csv "C:\Path\To\Output.csv" -targetUser "JohnDoe" Exports the edits for the user "JohnDoe" to the file "C:\Path\To\Output.csv". .NOTES This function requires the Login-Lamp and Set-LampLanguage functions to be available in the current session. #> [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $csv, [Parameter(Mandatory = $true, HelpMessage="Target user")][String] $targetUser ) Login-Lamp Set-LampLanguage -languageId $languageId $edits = Invoke-RestMethod -Uri "$global:lampHost/editList?arg=1176&type=range&user=$targetUser" -Method GET -Headers $global:authorizationToken -UseBasicParsing $outContent = 'Transaction ID, Action, Data Table, Date/time, Lemma' $edits | ForEach-Object { $lemma = '' if ($_.requestBody) { $parsedRequestBody = ConvertFrom-Json -InputObject $_.requestBody if ($parsedRequestBody.lemma) { $lemma = $parsedRequestBody.lemma } } $line = "" + $_.transaction + "," + $_.action + "," + $_.table + "," + $_.datetime + ", $lemma" $outContent = $outContent + [Environment]::NewLine + $line } $outContent | Set-Content $csv -encoding UTF8 # [gc]::Collect() # [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Edits-2CSV.ps1' 52 #Region '.\Public\Find-RedundantInflections.ps1' 0 function Find-RedundantInflections{[CmdletBinding()] <# .SYNOPSIS Find redundant inflection patterns in a language. .DESCRIPTION The Find-RedundantInflections function searches for inflection patterns that are unused in a given language and a set of lexeme IDs. It compares the inflection patterns of the provided lexemes with the inflection patterns of the first lexeme and identifies the ones that are unmatched. .PARAMETER languageId The ID of the language in which the inflection patterns are searched. This parameter is mandatory. .PARAMETER lexeme An array of lexeme IDs (integer) to compare their inflection patterns with the first lexeme. This parameter is mandatory. .NOTES - This process may take a long time to complete. - Make sure to call the Login-Lamp function before using this cmdlet. - Set the appropriate language using the Set-LampLanguage function before using this cmdlet. .EXAMPLE Find-RedundantInflections -languageId 7 -lexeme 123, 456, 789 This example finds the inflection patterns that are unused in the English language (language ID: "en") for the provided lexeme IDs: 123, 456, and 789. .INPUTS None. You cannot pipe input to this cmdlet. .OUTPUTS None. The function writes the unmatched inflection pattern IDs to the console. #> Param( [Parameter(Mandatory = $true, HelpMessage="Lamp Language Id")][String] $languageId, [Parameter(Mandatory = $true, HelpMessage="Array of lexeme Id's (Integer)")][Array] $lexeme ) Write-Host -ForegroundColor Yellow "WARNING: This process will take long time to complete" Login-Lamp Set-LampLanguage -languageId $languageId $first,$lexemes=$lexeme $firstResponse = Invoke-WebRequest -Uri "$global:lampHost/unusedForms?lexeme=$first" -Method GET -Headers $global:authorizationToken -UseBasicParsing | ConvertFrom-Json # $firstResponse = Get-Content "$first.json" | ConvertFrom-Json [System.Collections.ArrayList]$firstIds = @() # $firstResponse $propId="" $spawnId="" foreach ($generatedByObj in $firstResponse.generatedBy){ if ($generatedByObj) { if ($generatedByObj -match "propagatedId"){ $propId = $generatedByObj.propagatedId.ToString() } else { $propId = "" } if ($generatedByObj -match "spawnedFrom") { $spawnId=$generatedByObj.spawnedFrom.ToString() } else { $spawnId = "" } $stringId = $generatedByObj.id.ToString() + $propId + $spawnId #"String ID: $stringId" [void]$firstIds.Add($stringId) } } #"So far: $firstIds" $propId="" $spawnId="" [System.Collections.ArrayList]$unMatchedIds = @() [System.Collections.ArrayList]$unMatchedStringIds = @() foreach ($lexemeId in $lexemes) { $i = 0 $pct = $i / $lexemes.length * 100 Write-Progress -Activity "Processing lexeme Id" -Status "$pct% $lexemeId" -PercentComplete $pct # $Response = Get-Content "$lexemeId.json" | ConvertFrom-Json $Response = Invoke-WebRequest -Uri "$global:lampHost/unusedForms?lexeme=$lexemeId" -Method GET -Headers $global:authorizationToken -UseBasicParsing | ConvertFrom-Json # $Response $responeIds=@() if ($Response.generatedBy -match "id"){ # $responeIds += $Response.generatedBy.id foreach ($generatedByObj in $Response.generatedBy){ if ($generatedByObj) { if ($generatedByObj -match "propagatedId"){ $propId=$generatedByObj.propagatedId.ToString() } else { $propId = "" } if ($generatedByObj -match "spawnedFrom"){ $spawnId=$generatedByObj.spawnedFrom.ToString() } else { $spawnId = "" } $stringId = $generatedByObj.id.ToString() + $propId + $spawnId if ($stringId -in $firstIds){ [void]$unMatchedIds.Add($generatedByObj.id) [void]$unMatchedStringIds.Add($stringId) } } } $firstIds = @() $firstIds = $firstIds+$unMatchedStringIds $unMatchedStringIds.Clear() $i += 1 } else { Write-Host "No inflection patterns (unused in all the provided lexemes) found" exit } } if ($unMatchedIds -and $unMatchedIds.Count -gt 0) { Write-Host "Following inflection patterns were Unmatched in all the lexemes given" # Write-Host($firstIds -join ", ") -ForegroundColor Yellow $unMatchedIds_ = $unMatchedIds | select -Unique Write-Host($unMatchedIds_ -join ", ") -ForegroundColor Yellow } else { Write-Host "No inflection patterns (unused in all the provided lexemes) found" } } ############################################################################################################################################################### # # # END OF SCRIPT # ############################################################################################################################################################### # try { # foreach ($unMatchedId in $firstIds) { # # Write-Host $unMatchedId -ForegroundColor Yellow # ($firstResponse.generatedBy.Where({$_.id -eq $unMatchedId})) # } # } # catch{ # $firstResponse.generatedBy # } # ($rep.generatedBy | Get-Member id).Definition -eq ("int id=$id") # ($rep.generatedBy.Where({$_.id -eq 32692})) # @(32497, 32692, 32499, 31749, 31778, 31769, 31772) #hindi lexeme # 3414989 # 3266941 # 3415040 #EndRegion '.\Public\Find-RedundantInflections.ps1' 145 #Region '.\Public\Generate-FormulaicExpressionSynonyms.ps1' 0 function Generate-FormulaicExpressionSynonyms{ [CmdletBinding()] Param( [Parameter(Mandatory=$true, HelpMessage="English language name : ")][string]$language, [Parameter(Mandatory=$true, HelpMessage="Path to CSV file to process")][string]$inputFile, [Parameter(Mandatory=$false, HelpMessage="Open ai api key")][string]$apiKey, [Parameter(Mandatory=$true, HelpMessage="Path to CSV file to save the result (example 'C:\\tisane\output.csv')")][string]$outputFile, [Parameter(Mandatory=$false, HelpMessage="end of the prompt for query. Default : 'in <language>'")][string]$endPrompt, [Parameter(Mandatory=$false, HelpMessage="start of the prompt for query. Default : 'similar phrases to'")][string]$startPrompt = "similar phrases to", [Parameter(Mandatory=$false, HelpMessage="Number to input phrase sample to use when generating similar phrases. Default : 10")][decimal]$inputSample = 10, [Parameter(Mandatory=$false, HelpMessage="The model to use for the API call. default text-davinci-003")][string]$model = "text-davinci-003", [Parameter(Mandatory=$false, HelpMessage="The value of the top_p parameter. Defaults to 1.")][decimal]$topP = 1, [Parameter(Mandatory=$false, HelpMessage="The value of the temperature to use for the API call. Defaults to 0.5.")][decimal]$temperature = 0.5, [Parameter(Mandatory=$false, HelpMessage="The maximum number of tokens to generate. Defaults to 128.")][int]$maxTokens = 128, [Parameter(Mandatory=$false, HelpMessage="The value of the frequency_penalty parameter. Defaults to 0.")][decimal]$frequencyPenalty = 0, [Parameter(Mandatory=$false, HelpMessage="The value of the presence_penalty parameter. Defaults to 0.")][decimal]$presencePenalty = 0 ) Login-Lamp if (-not [string]::IsNullOrEmpty($apiKey)){ Save-LampSetting -settingName 'OpenAiApiKey' -settingValue $apiKey }else{ $apiKey = Get-LampSetting -settingName 'OpenAiApiKey' -defaultValue $apiKey } $englishLanguageName = $language if (-not $endPrompt){ $endPrompt = "in $englishLanguageName,1 per line" } $csvObjects = @() $df = Import-Csv $inputFile -Encoding UTF8 $groupedDf = $df | Group-Object -Property FamilyId $groupedDf | ForEach-Object -Begin {$index = 0} -Process { $mainLemma = $groupedDf[$index].Group.MainLemma if ($mainLemma.GetType().Name -eq "String"){ $samplePhrases = $mainLemma }else{ $samplePhrases = $mainLemma[0..$inputSample] } $familyId = $groupedDf[$index].Name $samplePhrases = $samplePhrases -join "','" $prompt = "{0} '{1}' {2}" -f $startPrompt,$samplePhrases,$endPrompt Write-Host "Generating similar phrases to [$familyId] $samplePhrases" -ForegroundColor Green $response = Invoke-OpenAiApiCall -prompt $prompt -model $model -apiKey $apiKey -temperature $temperature -maxTokens $maxTokens -topP $topP -frequencyPenalty $frequencyPenalty -presencePenalty $presencePenalty $response_1 = $response.Content | ConvertFrom-Json $response = [System.Text.Encoding]::UTF8.GetString($response_1.choices[0].text[0..10000]) $FESynonyms = $response -split "\n" foreach ($FESynonym in $FESynonyms){ if (-not ($FESynonym.Length -gt 1)){ continue } Write-Host $FESynonym $cleanSynonym = Remove-Punctuations -rawString $FESynonym if ($groupedDf[$index].Group.MainLemma.Contains($cleanSynonym)){ continue } $csvObject = Make-OneRowOfCsv -familyId $familyId -suggestedExpression $cleanSynonym $csvObjects += $csvObject } $pct = $index / $groupedDf.Length *100 Write-Progress -Activity "Generating similar phrases to [$familyId] $samplePhrases " -Status "$pct%" -PercentComplete $pct $index ++ } $csvObjects | Export-Csv $outputFile -NoTypeInformation -Encoding UTF8 } $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding # Generate-FormulaicExpressionSynonyms -language "russian" -inputFile "TisaneLampClient\RussianFormulaicExpressions.csv" -outputFile "TisaneLampClient\aug_RussianFormulaicExpressions.csv" #EndRegion '.\Public\Generate-FormulaicExpressionSynonyms.ps1' 71 #Region '.\Public\Generate-PhrasesWithLexemes.ps1' 0 function Generate-PhrasesWithLexemes { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Language Id: ")][int]$languageId, [Parameter(Mandatory = $true, HelpMessage="Path to csv file : ")][String]$inputCsv, [Parameter(Mandatory = $true, HelpMessage="Path to output csv file : ")][String]$outputCsv, [Parameter(Mandatory = $true, HelpMessage="Path to corpus of the same language : ")][String]$corpusPath, [Parameter(Mandatory = $false, HelpMessage="string of punctuations : ")][String]$punctuations = ".!?" ) $punctuations = $punctuations -split '' $df = Import-Csv $inputCsv -Encoding UTF8 $corpusDf = Import-Csv -Path $corpusPath -Delimiter "`t" -Header "originalSentenceId","originalSentence","translatedSentenceId","translatedSentence" $csvObjects = @() $i = 0 foreach ($row in $df) { $lexeme = $row.word $pct = $i/$df.length * 100 Write-Progress -Activity "Generating sentences for" -Status "$pct% $lexeme" -PercentComplete $pct $i +=1 $sentences = $corpusDf | Where-Object {$_.originalSentence -like "*$lexeme*"} #removing rows with more then one terminal punctuation $sentences = $sentences | Where-Object {$punctuations | % {([regex]::Matches($_.sentence, [regex]::Escape($_)).Count -le 1)} } $originalSentences = $sentences.originalSentence -join "," $translatedSentences = $sentences.translatedSentence -join "," $csvObject = [PSCustomObject]@{ "word" = $lexeme "lexemeID" = $row.lexemeID "familyId" = $row.familyId "originalSentence" = $originalSentences "englishTranslation" = $translatedSentences } $csvObjects += $csvObject } $csvObjects | Export-Csv $outputCsv -NoTypeInformation -Encoding UTF8 } # Generate-PhrasesWithLexemes -languageId 36 -inputCsv "sample.csv" -outputCsv "tsample.csv" -corpusPath "hindi-english.tsv" #EndRegion '.\Public\Generate-PhrasesWithLexemes.ps1' 42 #Region '.\Public\Get-CommonsensecueUseStats.ps1' 0 function Get-CommonsensecueUseStats { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="A filter argument")][String] $arg, [Parameter(Mandatory = $true, HelpMessage="Type of the filter argument")][String] $argType, [Parameter(Mandatory = $false, HelpMessage="How many days to go back")][int] $howManyDaysBack, [Parameter(Mandatory = $true, HelpMessage="Full pathname of the output CSV")][String] $outputFilename ) Login-Lamp $response = Invoke-RestMethod -Uri "$global:LampHost/commonsenseStats?arg=$arg&type=$argType&daysAgo=$howManyDaysBack" -Method GET -Headers $global:authorizationToken -UseBasicParsing $filteredResponse = $response | Where-Object { $_.PSObject.Properties.Name -contains 'falsePositiveCount' } $sortedResponse = $filteredResponse | Sort-Object -Property falsePositiveCount -Descending # $response | ForEach-Object { # if (-not $_.PSObject.Properties.Name -contains 'falsePositiveCount') { # Add-Member -InputObject $_ -NotePropertyName 'falsePositiveCount' -NotePropertyValue 0 # } # } # $sortedResponse = $response | Sort-Object -Property falsePositiveCount -Descending # $sortedResponse | Select-Object entityId, falsePositiveCount, truePositiveCount | Export-Csv -Path $outputFilename -NoTypeInformation -Encoding UTF8 $sortedResponse | Select-Object @{Name='commonsenseCueId'; Expression={$_.entityId}}, falsePositiveCount, truePositiveCount | Export-Csv -Path $outputFilename -NoTypeInformation } # Get-CommonsensecueUseStats -arg "1241" -argType "range" -howManyDaysBack 24 -outputFilename "output.csv" #EndRegion '.\Public\Get-CommonsensecueUseStats.ps1' 28 #Region '.\Public\Helper-ProcessResponse.ps1' 0 function Invoke-LampMethod { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="url ")][String] $url, [Parameter(Mandatory = $true, HelpMessage="Language ID")][int] $languageId, [Parameter(Mandatory = $false, HelpMessage="familyRange ")][int] $familyRange, [Parameter(Mandatory = $false, HelpMessage="lexemeRange ")][int] $lexemeRange, [Parameter(Mandatory = $false, HelpMessage="Wiktionary category: ")][String] $wiktionaryRange, [Parameter(Mandatory = $false, Helpmessage="Type of request (GET or POST) default POST")][string]$methodType="POST", [Parameter(Mandatory=$false, HelpMessage="Array of fields to display")][String[]]$fieldsToShow, [Parameter(Mandatory=$false, HelpMessage="Strat at this point for wiktionary range")][String]$afterFrom, [Parameter(Mandatory = $false, HelpMessage="Regex to verify: ")][String] $validatingRegex, [Parameter(Mandatory=$false, HelpMessage="Maximum number of records to fetch (Default = 0)")][int]$max = 0 ) Login-Lamp Set-LampLanguage -languageId $languageId if (-not ($url -match '^https?://')) { $url = "$global:lampHost/$url" } if (-not ($familyRange -or $lexemeRange -or $wiktionaryRange)){ try{ $lampMethodResponse = Invoke-RestMethod -Uri $url -Method $methodType -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } if ($fieldsToShow.Length -eq 0){ $lampMethodResponse }else{ $filteredResponse = $lampMethodResponse | Select-Object -Property $fieldsToShow $output = "" foreach ($fieldToShow in $fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } Write-Host $output } # Write-Host "Response for $url" -ForegroundColor Green # $lampMethodResponse }if ($familyRange -or $lexemeRange -or $wiktionaryRange){ if (-not ($url -match "\{0\}")) { Write-Host "url [ $url ] does not contain {0}, recheck and run code again " -ForegroundColor RED Write-Host "Exiting" -ForegroundColor RED return 1 }else{ if ($familyRange){ try { # $familyResponse = Invoke-RestMethod -Uri "$global:lampHost/knowledgeGraph?arg=$familyRange&type=range&max=$max&basic=true" -Method GET -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing $familyResponse = Invoke-RestMethod -Uri "$global:lampHost/ids?table=Families&range=$familyRange&max=$max&basic=true" -Method GET -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing # $familyResponse }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } $familyIds = $familyResponse #$familyResponse.id $totalFamilies = $familyIds.length Write-Host "Found $totalFamilies families in the given range" $i = 0 if ($fieldsToShow.Length -ne 0){ foreach ($familyId in $familyIds) { $pct = $i/$totalFamilies * 100 Write-Progress -Activity "Processing $familyId" -Status "$pct% $familyId" -PercentComplete $pct $i +=1 $finalUrl = $url -f $familyId try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8' -Headers $global:authorizationToken -UseBasicParsing $filteredResponse = $lampMethodResponse | Select-Object -Property $fieldsToShow $output = "" foreach ($fieldToShow in $fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } Write-Host "[Family $familyId] $output" }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red } } }else{ foreach ($familyId in $familyIds) { $pct = $i/$totalFamilies * 100 Write-Progress -Activity "Processing $familyId" -Status "$pct% $familyId" -PercentComplete $pct $i +=1 $finalUrl = $url -f $familyId try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing $filteredResponse = $lampMethodResponse | Select-Object -Property * Write-Host "[Family $familyId]" $filteredResponse }catch { Write-Host "Invocation failed: $_" -ForegroundColor Red } } # $familyIds | ForEach-Object -Parallel { # $familyId = $_ # Write-Host $familyId # # $pct = $using:i / $using:totalFamilies * 100 # # $using:i += 1 # $finalUrl = $using:url -f $familyId # try { # $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $using:methodType -ContentType 'application/json; charset=utf-8' -Headers $using:global:authorizationToken -UseBasicParsing # $filteredResponse = $lampMethodResponse | Select-Object -Property * # Write-Host "[Family $familyId]" $filteredResponse # } catch { # Write-Host "Invocation failed: $_" -ForegroundColor Red # } # } -ThrottleLimit 2 } }elseif ($lexemeRange) { try{ # $lexemeResponse = Invoke-RestMethod -Uri "$global:lampHost/lexicon?arg=$lexemeRange&type=range&max=$max" -Method GET -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing $lexemeResponse = Invoke-RestMethod -Uri "$global:lampHost/ids?table=Lexemes&range=$lexemeRange&max=$max" -Method GET -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } $lexemeIds = $lexemeResponse#$lexemeResponse.id $totalLexemes = $lexemeIds.length Write-Host "Found $totalLexemes lexemes in the given range" $i = 0 if ($fieldsToShow.Length -ne 0){ foreach ($lexemeId in $lexemeIds) { $pct = $i/$totalLexemes * 100 Write-Progress -Activity "Processing $lexemeId" -Status "$pct% $lexemeId" -PercentComplete $pct $i +=1 $finalUrl = $url -f $lexemeId try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8' -Headers $global:authorizationToken -UseBasicParsing $filteredResponse = $lampMethodResponse | Select-Object -Property $fieldsToShow $output = "" foreach ($fieldToShow in $fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } Write-Host "[Lexeme $lexemeId] $output" }catch { Write-Host "Invocation failed: $_" -ForegroundColor Red } } }else{ foreach ($familyId in $familyIds) { $pct = $i/$totalLexemes * 100 Write-Progress -Activity "Processing $lexemeId" -Status "$pct% $lexemeId" -PercentComplete $pct $i +=1 $finalUrl = $url -f $lexemeId try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing $filteredResponse = $lampMethodResponse | Select-Object -Property * Write-Host "[Lexeme $lexemeId]" $filteredResponse }catch { Write-Host "Invocation failed: $_" -ForegroundColor Red } } } }elseif ($wiktionaryRange){ $i=0 do { if (-not $bookmark -and $afterFrom) { # First request: Use gcmstartsortkeyprefix to start from a specific point $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&format=json&gcmstartsortkeyprefix=$([System.Web.HttpUtility]::UrlEncode($afterFrom))&gcmlimit=500&gcmtitle=Category:$wiktionaryRange&generator=categorymembers&prop=pageprops" } else { # Subsequent requests: Use gcmcontinue from the previous response $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&format=json&gcmlimit=500&gcmtitle=Category:$wiktionaryRange&generator=categorymembers&prop=pageprops&gcmcontinue=$bookmark" } # $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&generator=categorymembers&format=json&gcmtitle=Category:$wiktionaryRange&prop=pageprops&gcmlimit=500&gcmcontinue=$bookmark" Write-Host "Loading entries from $wikidataUrl" $wikidataResponse = Invoke-WebRequest -Uri $wikidataUrl -Method GET -UseBasicParsing $wikidataResponse = $wikidataResponse.Content # -replace '(?<=pages["]:{["])[^"]+', 'results' $listOfInstances = ConvertFrom-Json -InputObject $wikidataResponse $totalPages = $($listOfInstances.query.pages.PSObject.Properties).Count Write-Host "Found [$totalPages] pages" -ForegroundColor Green if ($listOfInstances.continue) { $bookmark = $listOfInstances.continue.gcmcontinue } else { $bookmark = $null } #"Bookmark: $bookmark" $i=0 $listOfInstances.query.pages.PSObject.Properties | foreach { $word = $_.Value.title #$word if ($word -and (-not ($validatingRegex) -or $word -match $validatingRegex)) { $pct = $i/$totalPages * 100 $normalizedWord = $word -replace ' ', '_' #"PageId=$pageId word=$normalizedWord" $finalUrl = $url -f $normalizedWord $i += 1 try { $importNonLemmaResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -Headers $global:authorizationToken -UseBasicParsing Write-Host "Imported [$normalizedWord]" -ForegroundColor Green # ProcessPageTitle -title $word } catch { Write-Host "IMPORT FAILED FOR $normalizedWord $_" -ForegroundColor Red } } } Write-Host "[$normalizedWord] was the last word imported" -ForegroundColor Magenta } while ($bookmark) } } } } #EndRegion '.\Public\Helper-ProcessResponse.ps1' 216 #Region '.\Public\Import-CustomJson.ps1' 0 function Import-CustomJson { <# .SYNOPSIS Imports and processes a custom JSON file. .DESCRIPTION The Import-CustomJson function imports a custom JSON file, processes each line of the file, and sends the content to a web service for further processing. .PARAMETER Path Specifies the filename and path of the JSON file to import. The value must be provided as a string. This parameter is mandatory. .PARAMETER Corpora Specifies the Corpora ID to be used for the web service. The value must be provided as an integer. This parameter is mandatory. .EXAMPLE Import-CustomJson -Path "C:\Data\custom.json" -Corpora 1234 Imports the custom JSON file located at "C:\Data\custom.json" and uses the Corpora ID 1234 for the web service. #> [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Filename including path: ")][String] $path, [Parameter(Mandatory = $true, HelpMessage="Corpora ID: ")][int] $corpora ) $fileSize = (Get-Item $path).Length $PROGRESS_UPDATE_INTERVAL = 10 $lastReported = 0 $processed = 0 $TISANE_PATH = 'C:\Tisane\TestConsole\' $config_path = $TISANE_PATH + "Tisane.TestConsole.exe.Config" [System.AppDomain]::CurrentDomain.SetData("APP_CONFIG_FILE", $config_path) # assign the configuration file [Reflection.Assembly]::LoadFrom($tisane_path + "Tisane.Runtime.dll") # load the type Login-Lamp Set-LampLanguage -languageId $languageId $r = [IO.File]::OpenText($path) $startedAt = Get-Date "Everything is loaded. Starting at " + $startedAt.ToString('T') $outContent = '' $detected = 0 $control = 'not yet' $totalPostCount = 0 while ($r.Peek() -ge 0) { $line = $r.ReadLine() #$processed += $line.Length # Process $line here... $inJson = ConvertFrom-Json -InputObject $line $objectCount = $inJson.length $inJson | ForEach-Object { $totalPostCount += 1 $processed += $line.Length / $objectCount if ($processed - $lastReported -gt $PROGRESS_UPDATE_INTERVAL) { $pct = ($processed / $fileSize) * 100 $timeTaken = ((Get-Date) - $startedAt).TotalMinutes Write-Progress -Activity "Importing $path" -Status "$pct% complete" -PercentComplete $pct $lastReported = $processed } $content = [System.Web.HttpUtility]::UrlEncode($_.body) $response = Invoke-WebRequest -Uri "$global:lampHost/testfragment?corpora=$corpora&fragment=$content&critical=false&test=true" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing #$response } } #"Control: $control" $r.Dispose() # save content to a new file $finishedAt = Get-Date $timeTaken = ($finishedAt - $startedAt).TotalMinutes "Done at $finishedAt after $timeTaken minutes" } #EndRegion '.\Public\Import-CustomJson.ps1' 82 #Region '.\Public\Import-CX.ps1' 0 # ======================================================================================= # Imports lexemes from XML files # ======================================================================================= # script parameters function Import-CX { <# .SYNOPSIS Imports entries from an XML file into a CX system. .DESCRIPTION The Import-CX function reads the content of an XML file and parses it. It then processes each entry in the XML file and imports it into the CX system using the POST /import method. .PARAMETER cx Specifies the filename of the XML file to import. .PARAMETER languageId Specifies the language ID for the entries in the XML file. .PARAMETER feature Specifies the feature value for the entries in the XML file. .PARAMETER featureId Specifies the feature ID for the entries in the XML file. If not provided, the default value is 1 (part of speech). .PARAMETER source Specifies the source for the entries in the XML file. .PARAMETER proper Specifies whether the entries in the XML file are proper nouns (True) or not (False). If not provided, the default value is False. .EXAMPLE Import-CX -cx "dictionary.xml" -languageId "en" -feature "noun" -featureId 1 -source "Web" -proper $false Imports entries from the "dictionary.xml" file into the CX system with the specified parameters. .NOTES The function requires the Login-Lamp and Set-LampLanguage functions to be defined. #> [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $cx, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $true, HelpMessage="Feature value")][String] $feature, [Parameter(Mandatory = $false, HelpMessage="Feature ID")][String] $featureId, [Parameter(Mandatory = $false, HelpMessage="Source")][String] $source, [Parameter(Mandatory = $false, HelpMessage="Is proper noun")][Boolean] $proper ) # read the content of the XML and parse it [xml]$dictX = Get-Content $cx if (-not($featureId)) { # if the feature ID was not supplied, use part of speech $featureId = 1 } Login-Lamp Set-LampLanguage -languageId $languageId # process XML entries $dictX.list.row | ForEach-Object { $word = $_.native $referenceWord = $_.english if ($word -and $referenceWord) { # we have the word and its translation to English. Call the POST /import method "Importing $word -> $referenceWord" $response = Invoke-WebRequest -Uri "$global:lampHost/import?lexeme=$word&reference=$referenceWord&proper=$proper&hypernym=0&featureList=$featureId&featureValue=$feature&source=$source&orgId=$referenceWord" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing #$response } } # clean up RAM [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-CX.ps1' 76 #Region '.\Public\Import-Emoji.ps1' 0 function Import-Emoji{ <# .SYNOPSIS Imports emoji data from a CSV file into the system. .DESCRIPTION The Import-Emoji function imports emoji data from a CSV file and adds it to the system using the AddEmoji cmdlet. It requires the Login-Lamp cmdlet to be executed first to establish a connection. Each line in the CSV file should contain comma-separated values for emoji, long name, and short name. .PARAMETER csv Specifies the filename of the CSV file containing the emoji data. This parameter is mandatory. .PARAMETER languageId Specifies the language ID to associate with the imported emoji. This parameter is mandatory. .PARAMETER featureId Specifies the feature ID for the imported emoji. This parameter is optional. .PARAMETER source Specifies the source of the imported emoji. This parameter is optional. .EXAMPLE Import-Emoji -csv "emoji.csv" -languageId "7" -featureId "1" Imports emoji data from the "emoji.csv" file, associates it with the "en-US" language, sets the feature ID to "1" #> [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $csv, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $false, HelpMessage="Feature ID")][String] $featureId, [Parameter(Mandatory = $false, HelpMessage="Source")][String] $source ) Login-Lamp $fileLines = Get-Content $csv Set-LampLanguage -languageId $languageId $fileLines | ForEach-Object { $emoji, $longName, $shortName = $_.Split(",") AddEmoji -emoji $emoji -longName $longName -shortName $shortName } [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-Emoji.ps1' 45 #Region '.\Public\Import-ExtractionTestDataset.ps1' 0 function Import-ExtractionTestDataset { <# .SYNOPSIS Imports a test dataset for extraction testing. .DESCRIPTION The Import-ExtractionTestDataset function imports a test dataset for extraction testing. It takes various parameters to specify the input file, target corpora, extraction type, extraction value, and other optional settings. .PARAMETER filename Specifies the input file for the test dataset. The file should be in UTF-8 plain text format. .PARAMETER corpora Specifies the target corpora for the test dataset. .PARAMETER extractionType Specifies the extraction type for the test dataset. Valid options are "abuse," "entity," or "sentiment_expressions". .PARAMETER extractionValue Specifies the extraction value for the test dataset. For sentiment testing, use "positive" or "negative." For entity or abuse testing, specify the entity or abuse type. .PARAMETER auxLabel Specifies an optional auxiliary label for the test dataset. .PARAMETER tags Specifies optional tags for the test dataset. .PARAMETER testDaily Specifies whether the test should be performed daily. This is an optional parameter. If specified, the value should be $true. If not specified, the value is $false. .PARAMETER templates Specifies an array of templates for the test fragments. .EXAMPLE Import-ExtractionTestDataset -filename "test.txt" -corpora 1 -extractionType "entity" -extractionValue "person" -auxLabel "optional" -tags "tag1,tag2" -testDaily $false -templates "template1", "template2" This example imports a test dataset from the "test.txt" file, targeting corpora 1. The extraction type is set to "entity" and the extraction value is set to "person." An optional auxiliary label and tags are provided. The test is not performed daily. Two templates are specified for the test fragments. #> Param( [Parameter(Mandatory = $true, HelpMessage="Input file (UTF-8 plain text)")][String] $filename, [Parameter(Mandatory = $true, HelpMessage="Target corpora")][int] $corpora, [Parameter(Mandatory = $true, HelpMessage="Extraction type (abuse / entity / sentiment_expressions)")][String] $extractionType, [Parameter(Mandatory = $true, HelpMessage="Extraction value (positive or negative for sentiment, entity or abuse type for entities and abuse)")][String] $extractionValue, [Parameter(HelpMessage="Auxiliary label (optional)")][String] $auxLabel, [Parameter(HelpMessage="Tags (optional)")][String] $tags, [Parameter(HelpMessage="Test daily (optional)")][bool] $testDaily, [Parameter(Mandatory = $true, HelpMessage="Templates for the test fragments")][String[]] $templates ) $sMustTest = '' if ($testDaily) { $sMustTest = 'true' } else { $sMustTest = 'false' } #Checking templeats foreach ($template in $templates) { if (-not ($template -match "\{0\}")) { Write-Host "template [ $template ] does not contain {0}, recheck and run code again" -ForegroundColor RED Write-Host "Exiting" -ForegroundColor RED EXIT 1 } } Login-Lamp $lines = Get-Content $filename -Encoding UTF8 foreach ($line in $lines) { if (-not [string]::IsNullOrWhiteSpace($line)) { foreach ($template in $templates) { $fragment = $template -f $line $originalLineLength = $line.length $positionInTemplate = $fragment.IndexOf($line) $encodedFragment = [System.Web.HttpUtility]::UrlEncode($fragment) Write-Host "Adding $fragment (pos: $positionInTemplate) to LaMP" $fragmentIdResponse = Invoke-RestMethod -Uri "$global:lampHost/testfragment?corpora=$corpora&fragment=$encodedFragment&test=$sMustTest" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing if (-not $fragmentIdResponse.success) { Write-Host "Could not add [ $fragment ] to gold standard" -ForegroundColor RED Write-Host $fragmentIdResponse.error -ForegroundColor RED continue } $fragmentId = $fragmentIdResponse.id Write-Host "Adding $fragment (pos: $positionInTemplate) to gold standard records" $response = Invoke-WebRequest -Uri "$global:lampHost/gold?corpora=$corpora&fragment=$fragmentId&provider=1&type=information%20extraction&subtype=$extractionType&attribute=$extractionType&offset=$positionInTemplate&length=$originalLineLength&num&entityId&label=$extractionValue&aux=$auxLabel&tags=$tags" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } } } } #EndRegion '.\Public\Import-ExtractionTestDataset.ps1' 90 #Region '.\Public\Import-InflectedFormsFromWiktionary.ps1' 0 function Import-InflectedFormsFromWiktionary { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="ISO language code")][String] $language, [Parameter(Mandatory = $true, HelpMessage="The word to look for in the Wiktionary")][String] $word, [Parameter(Mandatory = $true, HelpMessage="The lexeme ID to attach the inflected forms to")][int] $lexemeId, [Parameter(Mandatory = $true, HelpMessage="An array of possible tags in the table")][String[]] $tags, [Parameter(Mandatory = $true, HelpMessage="An array with the same number of elements as tags containing Tisane features in the format index=value")][String[]] $features, [Parameter(Mandatory = $true, HelpMessage="Path to where Tisane.Runtime.dll sits (only the directory)")][String] $path ) # Load Tisane.Runtime.dll [Reflection.Assembly]::LoadFrom($path + "Tisane.Runtime.dll") Login-Lamp $languageJSON = Invoke-RestMethod -Uri "$global:lampHost/languages" -Method GET -UseBasicParsing $languageNamesToCodes = @{} $found = $false $languageJSON | foreach { if ($_.ISOCode -eq $language -and !$found) { $languageId = $_.id $languageName = $_.englishName $commaPos = $languageName.IndexOf(',') if ($commaPos -gt 0) { $languageName = $languageName.Substring(0, $commaPos) } $languageNamesToCodes.Add($languageName, $language) # Add English name and its corresponding language $found = $true } } $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding Set-LampLanguage -languageId $languageId $word = $word -replace ' ','_' $wiktionaryParser = New-Object Tisane.Helper.EnglishWiktionaryParser -ArgumentList ($word, $languageNamesToCodes) $jparsedPage = $wiktionaryParser.ToJson() $parsedPage = $jparsedPage.ToString() | ConvertFrom-Json # $parsedPage = Invoke-RestMethod -Uri '' -Method GET -UseBasicParsing $posObject = $parsedPage.$language.($parsedPage.$language.PSObject.Properties.Name) $declensionTable = $posObject.inflection $inflectedForms = @() # $declensionTable $a =0 foreach ($entry in $declensionTable) { # Create a new inflection form $inflectionForm = @{ "text" = $entry.text "inflectionGrammar" = @() "isLemma" = $false "lexemeId" = $lexemeId } # Translate the tags into Tisane features for ($i = 0; $i -lt $tags.length; $i++) { if ($entry.categories -contains $tags[$i]) { $inflectionForm["inflectionGrammar"] += @{ "index" = $features[$i].Split('=')[0] "value" = $features[$i].Split('=')[1] } } } if ([string]::IsNullOrEmpty($inflectionForm["text"])) { $inflectionForm["text"] = '_' } # Check for duplicates if ($inflectedForms -notcontains $inflectionForm -and $inflectionForm.inflectionGrammar.Count -gt 0) { # Add the inflection form to the list $inflectedForms += $inflectionForm $pct = $a / $declensionTable.length * 100 $a +=1 $body = $inflectionForm | ConvertTo-Json $utf8Body = [System.Text.Encoding]::UTF8.GetBytes($body) Write-Progress -Activity "Writing Inflicted form" -Status "$pct% $entry.text" -PercentComplete $pct $r=Invoke-RestMethod -Uri "$global:lampHost/inflection" -Method PUT -Headers $global:authorizationToken -Body $utf8Body -ContentType "application/json" -UseBasicParsing } } # foreach ($form in $inflectedForms) { # Write-Output "Inflection Form: $($form.text)" # Write-Output "Lexeme ID: $($form.lexemeId)" # Write-Output "Inflection Grammar:" # foreach ($grammar in $form.inflectionGrammar) { # Write-Output "Index: $($grammar.index), Value: $($grammar.value)" # } # Write-Output "-------------------------" # } } # Import-InflectedFormsFromWiktionary -language ro -word da -lexemeId 4220343 -path C:\Tisane\ -tags 'present','preterite','future','gerund','past participle','imperfect','imperfect(ra)','indicative','conditional','subjunctive','singular','plural' -features '10=PRES','10=PAST','10=FUT','10=GER','10=PAP','27=IPI','27=IPI','20=IND','20=COND','20=SUB','2=1','2=MANY' #EndRegion '.\Public\Import-InflectedFormsFromWiktionary.ps1' 92 #Region '.\Public\Import-Jargon.ps1' 0 function Import-Jargon { <# .SYNOPSIS Imports jargon from a CSV file into the specified language. .DESCRIPTION The Import-Jargon function imports jargon from a CSV file into the specified language in the LAMP system. Each line in the CSV file should contain a word, family ID, and optional extra features separated by commas. The function sends requests to the LAMP API to import the jargon. .PARAMETER csv Specifies the path to the CSV file containing the jargon to import. The CSV file should have the following format: "word,familyId,extraFeatures". - word: The jargon word to import. - familyId: The ID of the family to which the jargon belongs. - extraFeatures (optional): Extra features or additional information about the jargon. .PARAMETER languageId Specifies the ID of the language into which the jargon will be imported. This ID should correspond to the target language in the LAMP system. .PARAMETER source Specifies the source of the jargon being imported (optional). Use this parameter to specify the source of the jargon, such as the name of the file or system. If not provided, the source will be empty. .EXAMPLE Import-Jargon -csv "C:\jargon.csv" -languageId "7" Imports jargon from the "jargon.csv" file into the English language in the LAMP system. #> Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $csv, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $false, HelpMessage="Source")][String] $source ) # BEGIN fix for Powershell bug: in some cases, the configuration files aren't read properly Add-Type -AssemblyName System.Configuration [Configuration.ConfigurationManager].GetField("s_initState", "NonPublic, Static").SetValue($null, 0) [Configuration.ConfigurationManager].GetField("s_configSystem", "NonPublic, Static").SetValue($null, $null) ([Configuration.ConfigurationManager].Assembly.GetTypes() | where {$_.FullName -eq "System.Configuration.ClientConfigPaths"}).GetField("s_current", "NonPublic, Static").SetValue($null, $null) [Configuration.ConfigurationManager]::ConnectionStrings[0].Name $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding $fileLines = Get-Content $csv Login-Lamp Set-LampLanguage -languageId $languageId $fileLines | ForEach-Object { $word,$familyId,$extraFeatures = $_.Split(",") if ($word -and $familyId) { $assign = $extraFeatures if ($assign) { $assign = [System.Web.HttpUtility]::UrlEncode("$assign,40=JAR") } else { $assign = [System.Web.HttpUtility]::UrlEncode("40=JAR") } "Importing $word -> Family $familyId" $response = Invoke-WebRequest -Uri "$global:lampHost/importFamilies?lexeme=$word&families=$familyId&behavior=complement&source=$source&orgId=$familyId&assign=$assign" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } } [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-Jargon.ps1' 71 #Region '.\Public\Import-LabeledDataset.ps1' 0 function Import-LabeledDataset { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="CSV filename to load")][String] $pathname, [Parameter(Mandatory = $true, HelpMessage="Corpora to load the dataset to")][Int] $corpora, [Parameter(Mandatory = $true, HelpMessage="Target language")][Int] $languageId, [Parameter(Mandatory = $true, HelpMessage="The column in the CSV where the text fragments are stored")][Int] $fragmentColumnIndex, [Parameter(Mandatory = $true, HelpMessage="The column in the CSV where the labels are stored")][Int] $labelColumnIndex, [Parameter(Mandatory = $false, HelpMessage="The column in the CSV where the values for the ref parameter in POST /testfragment are stored")][Int] $idColumnIndex, [Parameter(Mandatory = $false, HelpMessage="A column where the value for the auxLabel parameter in POST /gold is found")][Int] $auxLabelColumnIndex, [Parameter(Mandatory = $false, HelpMessage="A column where the value for the tags parameter in POST /gold is found")][Int] $tagsColumnIndex, [Parameter(Mandatory = $false, HelpMessage="Whether the fragments are to be tested daily")][Bool] $test, [Parameter(Mandatory = $false, HelpMessage="A value of the provider ID for POST /gold")][Int] $providerId, [Parameter(Mandatory = $false, HelpMessage="A value for the resultType parameter in POST /gold")][String] $resultType, [Parameter(Mandatory = $true, HelpMessage="A value for the resultSubtype and attribute parameters in POST /gold")][String] $attributeName, [Parameter(Mandatory = $true, HelpMessage="Set of label values in the dataset being loaded")][AllowEmptyString()][string[]] $datasetLabelValues, [Parameter(Mandatory = $true, HelpMessage="Set of label values to map the original labels to")][string[]] $lampLabelValues ) # $data = Import-Csv -Path $pathname | ForEach-Object { ,@( $_.PSObject.Properties.Value ) } $data = Import-Csv -Path $pathname $columnNames = $data[0].PSObject.Properties.Name $column_mappings = New-Object System.Collections.Specialized.OrderedDictionary for ($i=0; $i -lt $columnNames.Count; $i++) { $column_mappings.Add($i,$columnNames[$i]) } Write-Host "Column Mappings" $column_mappings.GetEnumerator() | Format-Table @{Name='Index'; Expression={$_.Name}}, @{Name='Column'; Expression={$_.Value}} -AutoSize $labelMapping = @{} for ($i = 0; $i -lt $datasetLabelValues.Length; $i++) { $labelMapping[$datasetLabelValues[$i]] = $i -lt $lampLabelValues.Length ? $lampLabelValues[$i] : $null } Write-Host "Label Mappings" $labelMapping.GetEnumerator() | Format-Table @{Name='datasetLabelValues'; Expression={$_.Name}}, @{Name='lampLabelValues'; Expression={$_.Value}} -AutoSize $i = 0 try { # Check if max index is less than number of columns if ((@( $fragmentColumnIndex, $labelColumnIndex, $idColumnIndex ) | Measure-Object -Maximum).Maximum -ge $columnNames.Count) { throw "Index is greater than the number of columns in the data." } # Check if all values in label column are in the list $labelColumnValues = $data | ForEach-Object { $_.$($column_mappings[$labelColumnIndex]) } $invalidValues = $labelColumnValues | Where-Object { $_ -notin $datasetLabelValues } | Sort-Object -Unique if ($invalidValues) { Write-Warning "The following labels do not have mapping: $($invalidValues -join ', ')" } } catch { Write-Error $_.Exception.Message return } Login-Lamp $data = $data | ForEach-Object { ,@( $_.PSObject.Properties.Value ) } foreach ($row in $data){ $fragment_original = $row[$fragmentColumnIndex] $fragment = [System.Web.HttpUtility]::UrlEncode($fragment_original) # write-host $fragment $ref = [System.Web.HttpUtility]::UrlEncode($row[$idColumnIndex]) $response = Invoke-RestMethod -Uri "$global:lampHost/testfragment?corpora=$corpora&fragment=$fragment&test=$test&ref=$ref" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing # Write-Host $response if (-not $response.success){ Write-Warning "Could not save $fragment_original" continue $i += 1 } $pct = $i / $data.length * 100 $i += 1 Write-Progress -Activity "Saving test fragment" -Status "$pct% $fragment_original" -PercentComplete $pct ## dealing with gold $originalLabel = $row[$labelColumnIndex] if ($labelMapping.ContainsKey($originalLabel) -and $null -ne $labelMapping[$originalLabel]) { $translatedLabel = [System.Web.HttpUtility]::UrlEncode($labelMapping[$originalLabel]) $fragmentLength = $fragment.Length $fragmentId = $response.id $tagsColumnIndex = [System.Web.HttpUtility]::UrlEncode($tagsColumnIndex) $auxLabelColumnIndex = [System.Web.HttpUtility]::UrlEncode($auxLabelColumnIndex) $temp = Invoke-RestMethod -Uri "$global:lampHost/gold?corpora=$corpora&fragment=$fragmentId&provider=$providerId&type=$resultType&subtype=$attributeName&attribute=$attributeName&offset=0&length=$fragmentLength&label=$translatedLabel&aux=$auxLabelColumnIndex&tags=$tagsColumnIndex&" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing Write-Progress -Activity "Saving gold standard" -Status "$pct% $fragment_original" -PercentComplete $pct } } } # Import-LabeledDataset -pathname ro-fb-offense.csv -corpora 840 -languageId 40 -fragmentColumnIndex 2 -labelColumnIndex 3 -idColumnIndex 0 -test:1 -providerId 1 -resultType 'information extraction' -attributeName abuse -datasetLabelValues 'ABUSE','PROFANITY','INSULT','OTHER' -lampLabelValues 'personal_attack','profanity' #EndRegion '.\Public\Import-LabeledDataset.ps1' 86 #Region '.\Public\Import-LexemesFromWiktionary.ps1' 0 ## ============================================================================= ## ## This script's purpose is import new lexemes from Wiktionary ## ## ============================================================================= function Import-LexemesFromWiktionary{ <# .SYNOPSIS Imports lexemes from Wiktionary based on specified parameters. .DESCRIPTION The Import-LexemesFromWiktionary function imports lexemes from Wiktionary based on the provided language code, path, part of speech, and other optional parameters. It retrieves data from the Wiktionary API and parses the response to extract relevant information. The function then sends the extracted data to a target system for further processing. .PARAMETER language Specifies the language code for the lexemes to import from Wiktionary. .PARAMETER path Specifies the path where the Tisane.Runtime.dll file is located. .PARAMETER pos Specifies the part of speech for the lexemes to import. .PARAMETER pruneListId Specifies the ID of the prune feature list. (Optional) .PARAMETER pruneValue Specifies the value of the prune feature. (Optional) .PARAMETER listId Specifies the ID of the feature list. (Optional) .PARAMETER labels Specifies an array of Wiktionary labels. (Optional) .PARAMETER values Specifies an array of Tisane feature values. (Optional) .PARAMETER category Specifies the Wiktionary category to retrieve lexemes from. .PARAMETER complement Indicates whether to complement existing lexemes. (Optional) .PARAMETER validatingRegex Specifies a regular expression used to validate the retrieved lexemes. (Optional) .EXAMPLE Import-LexemesFromWiktionary -language "en" -path "C:\Tisane\" -pos "Noun" -category "nouns" -complement $true Imports English nouns from Wiktionary and complements existing lexemes. #> [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Language code: ")][String] $language, # [Parameter(Mandatory = $true, HelpMessage="Path: ")][String] $path, [Parameter(Mandatory = $true, HelpMessage="Part of speech: ")][String] $pos, [Parameter(Mandatory = $false, HelpMessage="Prune Feature List ID: ")][int] $pruneListId, [Parameter(Mandatory = $false, HelpMessage="Prune Feature Value: ")][String] $pruneValue, [Parameter(Mandatory = $false, HelpMessage="Feature List ID: ")][int] $listId, [Parameter(Mandatory = $false, HelpMessage="Wiktionary labels: ")][String[]] $labels, [Parameter(Mandatory = $false, HelpMessage="Feature values: ")][String[]] $values, # an array of Tisane feature values [Parameter(Mandatory = $true, HelpMessage="Wiktionary category: ")][String] $category, [Parameter(Mandatory = $false, HelpMessage="Complement existing lexemes: ")][bool] $complement, [Parameter(Mandatory = $false, HelpMessage="Regex to verify: ")][String] $validatingRegex ) class Feature { [string]$index [string]$value [string]$type Feature([string]$index, [string]$value, [string]$type) { $this.index = $index $this.value = $value $this.type = $type } } # [Reflection.Assembly]::LoadFrom($path + "Tisane.Runtime.dll") Login-Lamp $languageJSON = Invoke-RestMethod -Uri "$global:lampHost/languages" -Method GET -UseBasicParsing -Headers $global:authorizationToken $languageNamesToCodes = @{} $languageID = 0 $languageJSON | foreach { if ($_.ISOCode -eq $language) { $languageID = $_.id $languageName = $_.englishName $commaPos = $languageName.IndexOf(',') if ($commaPos -gt 0) { $languageName = $languageName.Substring(0, $commaPos) } $languageNamesToCodes.Add($languageName, $_.ISOCode) } } # $languageNamesToCodes.Add('Norwegian Bokmål', 'no') # $languageNamesToCodes.Add('Norwegian Nynorsk', 'no') $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding Set-LampLanguage -languageId $languageId $posFeatureList = '1' $tisanePOSValue = $pos.ToUpper() switch ($tisanePOSValue) { 'ADJECTIVE' { $tisanePOSValue = 'ADJ' } 'ADVERB' { $tisanePOSValue = 'ADV' } 'PREPOSITION' { $tisanePOSValue = 'PREP' } 'POSTPOSITION' { $tisanePOSValue = 'PREP' } 'CONJUNCTION' { $tisanePOSValue = 'CJ' } 'INTERJECTION' { $tisanePOSValue = 'INTJ' } 'PHRASE' { $tisanePOSValue = '' } 'PROVERB' { $tisanePOSValue = 'FORE' } 'IDIOM' { $tisanePOSValue = '' } 'NUMERAL' { $tisanePOSValue = 'NOUN' } 'PREPOSITIONAL PHRASE' { $tisanePOSValue = '' } 'PREPOSITIONAL_PHRASE' { $tisanePOSValue = '' } 'POSTPOSITIONAL PHRASE' { $tisanePOSValue = '' } 'POSTPOSITIONAL_PHRASE' { $tisanePOSValue = '' } } if ($tisanePOSValue.Length -lt 1) { $posFeatureList = '0' } $tagsFound = @() $articleCount = 0 $sComplement = '0' if ($complement) { $sComplement = '1' } do { $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&generator=categorymembers&format=json&gcmtitle=Category:$category&prop=pageprops&gcmlimit=500&gcmcontinue=$bookmark" "Loading entries from $wikidataUrl" $wikidataResponse = Invoke-WebRequest -Uri $wikidataUrl -Method GET -UseBasicParsing $wikidataResponse = $wikidataResponse.Content # -replace '(?<=pages["]:{["])[^"]+', 'results' $listOfInstances = ConvertFrom-Json -InputObject $wikidataResponse $totalPages = $($listOfInstances.query.pages.PSObject.Properties).Count Write-Host "Found [$totalPages] pages" -ForegroundColor Green if ($listOfInstances.continue) { $bookmark = $listOfInstances.continue.gcmcontinue } else { $bookmark = $null } #"Bookmark: $bookmark" $i=0 $listOfInstances.query.pages.PSObject.Properties | foreach { $word = $_.Value.title $word if ($word -and (-not ($validatingRegex) -or $word -match $validatingRegex)) { $pct = $i/$totalPages * 100 Write-Progress -Activity "Processing $word" -Status "$pct% $word" -PercentComplete $pct $i += 1 $articleCount += 1 $pageId = $_.Value.pageid $normalizedWord = $word -replace ' ', '_' #"PageId=$pageId word=$normalizedWord" $apiUrl = "$global:lamphost/wiktionary?lemma=$normalizedWord&language=$language" try { # $wiktionaryParser = New-Object Tisane.Helper.EnglishWiktionaryParser -ArgumentList ($normalizedWord, $languageNamesToCodes) $response = Invoke-RestMethod -Uri $apiUrl -Method GET -Headers $global:authorizationToken -UseBasicParsing # $fullDoc = $response # | ConvertFrom-Json #$wiktionaryParser.ToJson() # $articleJSON = (($fullDoc | Where Key -eq $language).Value | Where Key -eq $pos).Value $articleJSON = $response.$language.$pos if ($articleJSON) { if ($language -like 'zh-*') { #$sFullDoc = $fullDoc.ToString() $orgWord = $word switch ($language) { 'zh-CN' { $word = $fullDoc['simplified'].ToString() -replace '"', '' } 'zh-TW' { $word = $fullDoc['traditional'].ToString() -replace '"', '' } } if ($word.Length -gt $orgWord.Length) { Write-Host "Chinese script adjustment inconsistent: $word / $orgWord" -ForegroundColor Red $word = '' } #"Result: $orgWord / $word -> $sFullDoc" } $sArticleJSON = $articleJSON#.ToString() if ($sArticleJSON) { #"Parsing: $sArticleJSON" $article = $articleJSON#ConvertFrom-Json -InputObject $sArticleJSON $referenceWords = '' $tag = $article.tag $article.interpretations | ForEach-Object { $english = $_.english if ($english) { $referenceWords = $english -join "," if ($labels) { $indexInLabelArray = [array]::IndexOf($labels, $tag) } $assign = "" if ($indexInLabelArray -gt -1 -and $values) { if ($values[$indexInLabelArray] -like '*=*') { $assign = "assign=" + [System.Web.HttpUtility]::UrlEncode($values[$indexInLabelArray]) } else { $assign = "assign=" + [System.Web.HttpUtility]::UrlEncode("$listId=" + $values[$indexInLabelArray]) } } $assignments = @() if ($pos -eq 'Noun' -and ($tag -eq 'f' -or $tag -eq 'm')) { $tag = $tag.ToUpper() $assignments += [System.Web.HttpUtility]::UrlEncode("5=$tag") } if ($_.figurative -or $_.idiomatic) { $assignments += [System.Web.HttpUtility]::UrlEncode("51=FS") } if ($_.literary) { $assignments += [System.Web.HttpUtility]::UrlEncode("40=BOOK") } if ($_.aspect -or $tag -eq 'impf' -or $tag -eq 'pf') { $aspect = $_.aspect if (-not($aspect)) { $aspect = $tag } switch ($aspect) { 'impf' { $assignments += [System.Web.HttpUtility]::UrlEncode("27=IPI") } 'pf' { $assignments += [System.Web.HttpUtility]::UrlEncode("27=PI") } } } if ($_.colloquial) { $assignments += [System.Web.HttpUtility]::UrlEncode("40=TALK") } if ($_.formal) { $assignments += [System.Web.HttpUtility]::UrlEncode("40=OFCL") } if ($_.dialect) { $assignments += [System.Web.HttpUtility]::UrlEncode("43=DIAL") } if ($_.slang) { $assignments += [System.Web.HttpUtility]::UrlEncode("40=JAR") } if ($_.obscure) { $assignments += [System.Web.HttpUtility]::UrlEncode("51=OBSC") } if ($_.nonstandard) { $assignments += [System.Web.HttpUtility]::UrlEncode("49=ERR") } if ($_.humorous) { $assignments += [System.Web.HttpUtility]::UrlEncode("51=HUM") } if ($_.offensive) { $assignments += [System.Web.HttpUtility]::UrlEncode("47=DIRT") } if ($_.derogatory) { $assignments += [System.Web.HttpUtility]::UrlEncode("47=DER") } if ($_.euphemism) { $assignments += [System.Web.HttpUtility]::UrlEncode("47=EUPH") } if ($_.familiar) { $assignments += [System.Web.HttpUtility]::UrlEncode("47=FAM") } if ($_.obsolete) { $assignments += [System.Web.HttpUtility]::UrlEncode("41=OBSL") } if ($_.polite) { $assignments += [System.Web.HttpUtility]::UrlEncode("47=POS") } if ($_.childish) { $assignments += [System.Web.HttpUtility]::UrlEncode("40=CHLD") } if ($_.military) { $assignments += [System.Web.HttpUtility]::UrlEncode("42=MILI") } if ($assignments -and $assignments.Length -gt 0) { $assignmentsTogether = $assignments -join ',' if ($assign -and $assign.Length -gt 0) { $assign = "$assign,$assignmentsTogether" } else { $assign = $assignmentsTogether } } if ($pruneListId -gt 0) { $assign = "$assign&featureList2=$pruneListId&featurValue2=$pruneValue" } $assignDescription = "" if ($assign) { $assign = "assign=$assign" $assignDescription = "[ASSIGNING: $assign]" } $example = $article.example if ($example) { $exampleTranslation = $article.example_translation $example = "E.g. '$example'" if ($exampleTranslation) { $example = "$example ('$exampleTranslation')" } } if ($referenceWords -and $word) { Write-Host "Importing $word -> $referenceWords $tag $assignDescription $example" $response = Invoke-WebRequest -Uri "$global:lampHost/import?lexeme=$word&reference=$referenceWords&proper=0&hypernym=0&featureList=$posFeatureList&featureValue=$tisanePOSValue&source=wiktionary&orgId=$pageId&$assign&complement=$sComplement¬e=$example" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } } } } } } catch { Write-Host "IMPORT FAILED FOR $word $_" -ForegroundColor Red } } } } while ($bookmark) "Tags encountered:" $tagsFound "Number of articles: $articleCount" # cleanup: #"DELETE l FROM Lexemes l WHERE l.SourceType = 'wiktionary' AND l.Note LIKE '%Generated by matching: proper=0%' AND l.LastUpdatedBy = 'bulkimport' # "DELETE f FROM Lexemes l INNER JOIN Features f ON f.ConnectionType = 1 AND f.EntityId = l.Id AND f.FeatureListId IN (1, 3, 4, 7, 8, 9, 13, 22, 23, 24, 26, 32, 33, 34) WHERE l.SourceType = 'wiktionary'" # "DELETE f FROM Lexemes l INNER JOIN Features f ON f.ConnectionType = 1 AND f.EntityId = l.Id AND f.FeatureListId = 2 AND f.FeatureValue = '1' WHERE l.SourceType = 'wiktionary'" # "DELETE f FROM Lexemes l INNER JOIN Features f ON f.ConnectionType = 1 AND f.EntityId = l.Id AND f.FeatureListId = 5 INNER JOIN LexemeFamilies lf ON lf.LexemeId = l.Id INNER JOIN Features pos ON pos.ConnectionType = 2 AND pos.EntityID = lf.FamilyID AND pos.FeatureListId = 1 AND pos.FeatureValue <> 'NOUN' WHERE l.SourceType = 'wiktionary'" } # Import-LexemesFromWiktionary -pos Numeral -category Telugu_numerals -language te #EndRegion '.\Public\Import-LexemesFromWiktionary.ps1' 326 #Region '.\Public\Import-LexemesWithFamily.ps1' 0 function Import-LexemesWithFamily { <# .SYNOPSIS Imports lexemes with family information from a CSV file into a specified language. .DESCRIPTION The Import-LexemesWithFamily function imports lexemes with family information from a CSV file into a specified language in the LAMP system. Each line in the CSV file should contain the word, family ID, and optional extra features separated by commas. The function logs in to LAMP, sets the language, and then sends a request to import each lexeme with its associated family and extra features. .PARAMETER csv Specifies the path to the CSV file containing lexeme and family information. .PARAMETER languageId Specifies the ID of the language in which the lexemes should be imported. .PARAMETER source Specifies the source of the lexemes (optional). .EXAMPLE Import-LexemesWithFamily -csv "C:\lexemes.csv" -languageId "en-US" -source "Dictionary" Imports lexemes from the "lexemes.csv" file into the English (United States) language with the source "Dictionary". #> [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $csv, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $false, HelpMessage="Source")][String] $source ) $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding $fileLines = Get-Content $csv Login-Lamp Set-LampLanguage -languageId $languageId $fileLines | ForEach-Object { $word,$familyId,$extraFeatures = $_.Split(",") if ($word -and $familyId) { $assign = $extraFeatures if ($extraFeatures) { $assign = [System.Web.HttpUtility]::UrlEncode($extraFeatures) } "Importing $word -> Family $familyId $extraFeatures" $response = Invoke-WebRequest -Uri "$global:lampHost/importFamilies?lexeme=$word&families=$familyId&behavior=complement&source=$source&orgId=$familyId&assign=$assign" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } } [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-LexemesWithFamily.ps1' 53 #Region '.\Public\Import-LinkedLexemes.ps1' 0 function Import-LinkedLexemes { <# .SYNOPSIS Imports linked lexemes from a translated CSV file into the Lamp system. .DESCRIPTION The Import-LinkedLexemes function imports linked lexemes from a translated CSV file into the Lamp system. It retrieves the translated lexemes from the CSV file, maps them to their corresponding English lexemes, and creates new lexemes in the target language based on the mappings. .PARAMETER language Specifies the target language code. The language code should match the ISOCode used in the Lamp system. .PARAMETER path Specifies the path to the translated CSV file. The CSV file should contain the necessary columns: Id, MainLemma, styleFeatures, and translated. .EXAMPLE Import-LinkedLexemes -language "fr" -path "C:\Translations.csv" Imports the linked lexemes from the "Translations.csv" file into the Lamp system for the French language. #> [CmdletBinding()] Param ( [Parameter(Mandatory = $true, HelpMessage="Target language code")][String] $language, [Parameter(Mandatory = $true, HelpMessage="Path: to translated CSV file")][string] $path ) Login-Lamp $languageJSON = Invoke-RestMethod -Uri "$global:lampHost/languages" -Method GET -UseBasicParsing $languageId = 0 $languageJSON | foreach { if ($_.ISOCode -eq $language) { $languageId = $_.id } } Set-LampLanguage -languageId $languageId $csv = Import-Csv $path $columnNames=$csv[0].psobject.properties.name if (("Id" -in $columnNames) -And ("MainLemma" -in $columnNames) -And ("styleFeatures" -in $columnNames) -And ("translated" -in $columnNames)) { $i=0 foreach($item in $csv) { $word=$($item.translated) $englishLexemeId=$($item.Id) $styleFeatures=$($item.styleFeatures) $englishLexeme=$($item.MainLemma) #use -eq for non-case sensitive comparision and -ceq for case sensitive comparision if (($language -ne "de") -And ($word -eq $englishLexeme)){ $word = $englishLexeme } $note = "Created by machine-translating '$englishLexeme'" $response = Invoke-WebRequest -Uri "$global:lampHost/import?lexeme=$word&sameFamiliesAs=$englishLexemeId&assign=$styleFeatures&source=mt&orgId=$englishLexemeId¬e=$note" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing $pct = $i / $csv.length * 100 Write-Progress -Activity "Writing lexeme" -Status "$pct% $word" -PercentComplete $pct $i +=1 } } Else { Write-Host "Error : required columns not found" -ForegroundColor DarkRed Write-Host "Ensure : you have following columns in your csv " -ForegroundColor DarkRed Write-Host "Id MainLemma styleFeatures translated" -ForegroundColor DarkRed } } ################################################################################################################################################### # TEST SCRIPT # ################################################################################################################################################## # $csv = Import-Csv "C:\D\ML_tisane\1109\google sheet tranlatinos\en-esTranlated.csv" # $columnNames=$csv[0].psobject.properties.name #for getting headers of the CSV fileee # "Id" -in $columnNames # if (("Id" -in $columnNames) -And ("MainLemma" -in $columnNames) -And ("styleFeatures" -in $columnNames) -And ("translated" -in $columnNames)) # { # $i=0 # foreach($item in $csv) # { # $pct = $i / $csv.length * 100 # Write-Progress -Activity "Writing lexeme" -Status "$pct% $item" -PercentComplete $pct # $word=$($item.translated) # $englishLexemeId=$($item.Id) # $styleFeatures=$($item.styleFeatures) # $englishLexeme=$($item.MainLemma) # $note = "Created by machine-translating '$englishLexeme'" # # Write-Host "$word and $englishLexemeId and $styleFeatures" # } # } # Else { # Write-Host "Error : required columns not found" -BackgroundColor DarkRed # Write-Host "Ensure : you have following columns in your csv " -BackgroundColor DarkRed # Write-Host "Id MainLemma styleFeatures translated" -BackgroundColor DarkRed # exit # } #EndRegion '.\Public\Import-LinkedLexemes.ps1' 94 #Region '.\Public\Import-MonolingualLog.ps1' 0 function Import-MonolingualLog{ [CmdletBinding()] <# .SYNOPSIS Imports a monolingual log file. .DESCRIPTION The Import-MonolingualLog function imports a monolingual log file into the specified corpora using the LAMP. .PARAMETER pathname Specifies the path to the monolingual log file. .PARAMETER languageId Specifies the language ID for the log file. .PARAMETER corpora Specifies the corpora ID where the log file will be imported. .EXAMPLE Import-MonolingualLog -pathname "C:\Logs\log.txt" -languageId "7" -corpora "123" #> Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $pathname, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $true, HelpMessage="Corpora ID")][String] $corpora ) $fileLines = Get-Content $pathname Login-Lamp Set-LampLanguage -languageId $languageId $i = 0 $fileLines | ForEach-Object { $originalLine = $_ $ln = [System.Web.HttpUtility]::UrlEncode($_) $pct = $i / $fileLines.length * 100 $i += 1 Write-Progress -Activity "Importing" -Status "$pct% $originalLine" -PercentComplete $pct $response = Invoke-WebRequest -Uri "$global:lampHost/testfragment?corpora=$corpora&fragment=$ln&test=true&ref=cht" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } "$i lines imported" [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-MonolingualLog.ps1' 49 #Region '.\Public\Import-WordNet.ps1' 0 function Import-WordNet{ [CmdletBinding()] <# .SYNOPSIS Imports WordNet data into the system. .DESCRIPTION The Import-WordNet function imports WordNet data into the system. It processes an XML file containing WordNet synsets and imports them into the target system. .PARAMETER inputXml The path to the XML file containing WordNet synsets. .PARAMETER languageId The language ID of the WordNet data to import. .PARAMETER source The source of the WordNet data. (Optional) .EXAMPLE Import-WordNet -inputXml "C:\WordNetData.xml" -languageId "7" -source "WordNet" #> Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $inputXml, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $false, HelpMessage="Source")][String] $source ) [xml]$wnXml = Get-Content $inputXml Login-Lamp Set-LampLanguage -languageId $languageId $wnXml.list.synset | ForEach-Object { $familyId = $_.family $sourceId = $_.id #$localization = Invoke-RestMethod -Uri "$productionHost/familyLocalization?id=$familyId" -Method GET -Headers $authorizationToken #$localization.definition = $_.definition #$localization.description = $_.word[0] #$localizationJson = ConvertTo-Json -InputObject $localization #"$familyId : $localizationJson" #Invoke-WebRequest -Uri "$productionHost/familyLocalization" -Method PUT -Headers $authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($localizationJson)) $word = $_.native # $_.matchTokens[0].matchToken[1] $referenceWord = $_.english # [0] $_.word | ForEach-Object { $word = $_ "Importing $word -> $familyId" $response = Invoke-WebRequest -Uri "$global:lampHost/importFamilies?lexeme=$word&families=$familyId&behavior=complement&source=$source&orgId=$sourceId" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } } [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-WordNet.ps1' 55 #Region '.\Public\Invoke-LampMethod.ps1' 0 function Invoke-LampMethod { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="url ")][String] $url, [Parameter(Mandatory = $true, HelpMessage="Language ID")][int] $languageId, [Parameter(Mandatory = $false, HelpMessage="familyRange ")][int] $familyRange, [Parameter(Mandatory = $false, HelpMessage="lexemeRange ")][int] $lexemeRange, [Parameter(Mandatory = $false, HelpMessage="Wiktionary category: ")][String] $wiktionaryRange, [Parameter(Mandatory = $false, Helpmessage="Type of request (GET or POST) default POST")][string]$methodType="POST", [Parameter(Mandatory=$false, HelpMessage="Array of fields to display")][String[]]$fieldsToShow, [Parameter(Mandatory=$false, HelpMessage="Strat at this point for wiktionary range")][String]$afterFrom, [Parameter(Mandatory = $false, HelpMessage="Regex to verify: ")][String] $validatingRegex, [Parameter(Mandatory=$false, HelpMessage="Maximum number of records to fetch (Default = 0)")][int]$max = 0 ) Login-Lamp Set-LampLanguage -languageId $languageId if (-not ($url -match '^https?://')) { $url = "$global:lampHost/$url" } if (-not ($familyRange -or $lexemeRange -or $wiktionaryRange)){ try{ $lampMethodResponse = Invoke-RestMethod -Uri $url -Method $methodType -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } if ($fieldsToShow.Length -eq 0){ $lampMethodResponse }else{ $filteredResponse = $lampMethodResponse | Select-Object -Property $fieldsToShow $output = "" foreach ($fieldToShow in $fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } Write-Host $output } # Write-Host "Response for $url" -ForegroundColor Green # $lampMethodResponse }if ($familyRange -or $lexemeRange -or $wiktionaryRange){ if (-not ($url -match "\{0\}")) { Write-Host "url [ $url ] does not contain {0}, recheck and run code again " -ForegroundColor RED Write-Host "Exiting" -ForegroundColor RED return 1 }else{ if ($familyRange){ try { # $familyResponse = Invoke-RestMethod -Uri "$global:lampHost/knowledgeGraph?arg=$familyRange&type=range&max=$max&basic=true" -Method GET -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing $familyResponse = Invoke-RestMethod -Uri "$global:lampHost/ids?table=Families&range=$familyRange&max=$max&basic=true" -Method GET -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing # $familyResponse }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } $familyIds = $familyResponse #$familyResponse.id $totalFamilies = $familyIds.length Write-Host "Found $totalFamilies families in the given range" $i = 0 if ($fieldsToShow.Length -ne 0){ foreach ($familyId in $familyIds) { $pct = $i/$totalFamilies * 100 Write-Progress -Activity "Processing $familyId" -Status "$pct% $familyId" -PercentComplete $pct $i +=1 $finalUrl = $url -f $familyId try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8' -Headers $global:authorizationToken -UseBasicParsing $filteredResponse = $lampMethodResponse | Select-Object -Property $fieldsToShow $output = "" foreach ($fieldToShow in $fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } Write-Host "[Family $familyId] $output" }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red } } }else{ foreach ($familyId in $familyIds) { $pct = $i/$totalFamilies * 100 Write-Progress -Activity "Processing $familyId" -Status "$pct% $familyId" -PercentComplete $pct $i +=1 $finalUrl = $url -f $familyId try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing $filteredResponse = $lampMethodResponse | Select-Object -Property * Write-Host "[Family $familyId]" $filteredResponse }catch { Write-Host "Invocation failed: $_" -ForegroundColor Red } } # $familyIds | ForEach-Object -Parallel { # $familyId = $_ # Write-Host $familyId # # $pct = $using:i / $using:totalFamilies * 100 # # $using:i += 1 # $finalUrl = $using:url -f $familyId # try { # $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $using:methodType -ContentType 'application/json; charset=utf-8' -Headers $using:global:authorizationToken -UseBasicParsing # $filteredResponse = $lampMethodResponse | Select-Object -Property * # Write-Host "[Family $familyId]" $filteredResponse # } catch { # Write-Host "Invocation failed: $_" -ForegroundColor Red # } # } -ThrottleLimit 2 } }elseif ($lexemeRange) { try{ # $lexemeResponse = Invoke-RestMethod -Uri "$global:lampHost/lexicon?arg=$lexemeRange&type=range&max=$max" -Method GET -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing $lexemeResponse = Invoke-RestMethod -Uri "$global:lampHost/ids?table=Lexemes&range=$lexemeRange&max=$max" -Method GET -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } $lexemeIds = $lexemeResponse#$lexemeResponse.id $totalLexemes = $lexemeIds.length Write-Host "Found $totalLexemes lexmes in the given range" $i = 0 if ($fieldsToShow.Length -ne 0){ foreach ($lexemeId in $lexemeIds) { $pct = $i/$totalLexemes * 100 Write-Progress -Activity "Processing $lexemeId" -Status "$pct% $lexemeId" -PercentComplete $pct $i +=1 $finalUrl = $url -f $lexemeId try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8' -Headers $global:authorizationToken -UseBasicParsing $filteredResponse = $lampMethodResponse | Select-Object -Property $fieldsToShow $output = "" foreach ($fieldToShow in $fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } Write-Host "[Lexeme $lexemeId] $output" }catch { Write-Host "Invocation failed: $_" -ForegroundColor Red } } }else{ foreach ($lexemeId in $lexemeIds) { $pct = $i/$totalLexemes * 100 Write-Progress -Activity "Processing $lexemeId" -Status "$pct% $lexemeId" -PercentComplete $pct $i +=1 $finalUrl = $url -f $lexemeId try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing $filteredResponse = $lampMethodResponse | Select-Object -Property * Write-Host "[Lexeme $lexemeId]" $filteredResponse }catch { Write-Host "Invocation failed: $_" -ForegroundColor Red } } } }elseif ($wiktionaryRange){ $i=0 do { if (-not $bookmark -and $afterFrom) { # First request: Use gcmstartsortkeyprefix to start from a specific point $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&format=json&gcmstartsortkeyprefix=$([System.Web.HttpUtility]::UrlEncode($afterFrom))&gcmlimit=500&gcmtitle=Category:$wiktionaryRange&generator=categorymembers&prop=pageprops" } else { # Subsequent requests: Use gcmcontinue from the previous response $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&format=json&gcmlimit=500&gcmtitle=Category:$wiktionaryRange&generator=categorymembers&prop=pageprops&gcmcontinue=$bookmark" } # $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&generator=categorymembers&format=json&gcmtitle=Category:$wiktionaryRange&prop=pageprops&gcmlimit=500&gcmcontinue=$bookmark" Write-Host "Loading entries from $wikidataUrl" $wikidataResponse = Invoke-WebRequest -Uri $wikidataUrl -Method GET -UseBasicParsing $wikidataResponse = $wikidataResponse.Content # -replace '(?<=pages["]:{["])[^"]+', 'results' $listOfInstances = ConvertFrom-Json -InputObject $wikidataResponse $totalPages = $($listOfInstances.query.pages.PSObject.Properties).Count Write-Host "Found [$totalPages] pages" -ForegroundColor Green if ($listOfInstances.continue) { $bookmark = $listOfInstances.continue.gcmcontinue } else { $bookmark = $null } #"Bookmark: $bookmark" $i=0 $listOfInstances.query.pages.PSObject.Properties | foreach { $word = $_.Value.title #$word if ($word -and (-not ($validatingRegex) -or $word -match $validatingRegex)) { $pct = $i/$totalPages * 100 Write-Progress -Activity "Processing $word" -Status "$pct% $word" -PercentComplete $pct $articleCount += 1 $normalizedWord = $word -replace ' ', '_' #"PageId=$pageId word=$normalizedWord" $finalUrl = $url -f $normalizedWord $i += 1 try { $importNonLemmaResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -Headers $global:authorizationToken -UseBasicParsing Write-Host "Imported [$normalizedWord]" -ForegroundColor Green # ProcessPageTitle -title $word } catch { Write-Host "IMPORT FAILED FOR $normalizedWord $_" -ForegroundColor Red } } } Write-Host "[$normalizedWord] was the last word imported" -ForegroundColor Magenta } while ($bookmark) } } } } # Invoke-LampMethod -url "knowledgeGraph?type=id&arg={0}&basic=true" -languageId 36 -familyRange 1352 -methodType "GET" -fieldsToShow "lexemeList" # Invoke-LampMethod -url "lexicon?type=id&arg={0}" -languageId 36 -lexemeRange 1643 -methodType "GET" -fieldsToShow "id" # Invoke-LampMethod -url "validateLexemeLinks?lexeme={0}&existing=true" -languageId 12 -methodType "POST" -fieldsToShow "lemma","sql","error" -lexemeRange 1452 # Invoke-LampMethod -url "importNonLemma?nonlemma={0}" -languageId 7 -methodType "POST" -wiktionaryRange "English_non-lemma_forms" -afterFrom "zw" # https://en.wiktionary.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Hindi_non-lemma_forms&cmstartsortkeyprefix=%E0%A4%86&cmlimit=max&format=json # https://en.wiktionary.org/w/api.php?action=query&format=json&gcmstartsortkeyprefix=इ&gcmlimit=500&gcmtitle=Category:Hindi_non-lemma_forms&generator=categorymembers&prop=pageprops # Measure-Command { # Invoke-LampMethod -url "lexicon?type=id&arg={0}" -languageId 36 -lexemeRange 1643 -methodType "GET" # } Invoke-LampMethod -url "batchUpload?id=807&key=-1111667852" -languageId 7 -methodType "GET" -fieldsToShow "entities" #EndRegion '.\Public\Invoke-LampMethod.ps1' 234 #Region '.\Public\Invoke-OpenAiApiCall.ps1' 0 function Invoke-OpenAiApiCall{ [CmdletBinding()] <# .SYNOPSIS Sends a prompt to the OpenAI API and retrieves the response. .DESCRIPTION The Invoke-OpenAiApiCall function sends a prompt to the OpenAI API and retrieves the response. It requires an API key to authenticate the request. By default, it uses the 'text-davinci-003' model, a temperature of 0.5, a maximum token limit of 128, and other optional parameters. .PARAMETER prompt The prompt to send to the API. .PARAMETER apiKey The API key to authenticate the request. If not provided, it uses the global variable $OpenAiApiKey. .PARAMETER model The model to use for the API call. Defaults to 'text-davinci-003'. .PARAMETER temperature The temperature to use for the API call. Defaults to 0.5. .PARAMETER maxTokens The maximum number of tokens to generate. Defaults to 128. .PARAMETER topP The value of the top_p parameter. Defaults to 1. .PARAMETER frequencyPenalty The value of the frequency_penalty parameter. Defaults to 0. .PARAMETER presencePenalty The value of the presence_penalty parameter. Defaults to 0. .EXAMPLE PS C:\> Invoke-OpenAiApiCall -prompt "Hello, world!" -apiKey "YOUR_API_KEY" Sends the prompt "Hello, world!" to the OpenAI API using the specified API key. .EXAMPLE PS C:\> $response = Invoke-OpenAiApiCall -prompt "Tell me a joke." Sends the prompt "Tell me a joke." to the OpenAI API using the global API key and assigns the response to the $response variable. #> Param( [Parameter(Mandatory=$true, HelpMessage="The prompt to send to the API.")][string]$prompt, [Parameter(Mandatory=$false, HelpMessage="API key")][string]$apiKey, [Parameter(Mandatory=$false, HelpMessage="The model to use for the API call. default text-davinci-003")][string]$model = "text-davinci-003", [Parameter(Mandatory=$false, HelpMessage="The temperature to use for the API call. Defaults to 0.5.")][decimal]$temperature = 0.5, [Parameter(Mandatory=$false, HelpMessage="The maximum number of tokens to generate. Defaults to 128.")][int]$maxTokens = 128, [Parameter(Mandatory=$false, HelpMessage="The value of the top_p parameter. Defaults to 1.")][decimal]$topP = 1, [Parameter(Mandatory=$false, HelpMessage="The value of the frequency_penalty parameter. Defaults to 0.")][decimal]$frequencyPenalty = 0, [Parameter(Mandatory=$false, HelpMessage="The value of the presence_penalty parameter. Defaults to 0.")][decimal]$presencePenalty = 0 ) $url = "https://api.openai.com/v1/completions" if (-not $apiKey){ $apiKey = $global:OpenAiApiKey } $body = @{ model = $model prompt = $prompt temperature = $temperature max_tokens = $maxTokens top_p = $topP frequency_penalty = $frequencyPenalty presence_penalty = $presencePenalty } | ConvertTo-Json # Write-Host $body $response = Invoke-WebRequest -Method Post -Uri $url -Headers @{ "Content-Type" = "application/json;charset=utf-8"; "Authorization" = "Bearer $apiKey" } -ContentType 'application/json; charset=utf-8' -Body $body -UseBasicParsing return $response } # $c= Get-Content -Encoding UTF8 "TisaneLampClient\br.txt" # $response = Invoke-OpenAIAPICall -prompt $c -apiKey # $response_1 = $response.Content | ConvertFrom-Json # Write-Host $response_1.choices[0].text # $response_2 = [System.Text.Encoding]::UTF8.GetString($response_1.choices[0].text[0..1000]) # Write-Host $response_2 #EndRegion '.\Public\Invoke-OpenAiApiCall.ps1' 84 #Region '.\Public\Invoke-ParallelLampMethod.ps1' 0 function Get-Ids{ param($range,$tableName,$max) try { $Ids = Invoke-RestMethod -Uri "$global:lampHost/ids?table=$tableName&range=$range&max=$max&basic=true" -Method GET -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing return $Ids }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } } function oldCall-MethodForIds{ param($Ids,$tableName,$url,$methodType,$fieldsToShow) $totalIds = $Ids.Length $i = 0 Write-Host "Found $totalIds $tableName in the given range" foreach ($Id in $Ids) { $pct = $i / $totalIds * 100 Write-Progress -Activity "Processing $Id" -Status "$pct% $Id" -PercentComplete $pct $i += 1 $finalUrl = $url -f $Id try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8' -Headers $global:authorizationToken -UseBasicParsing if ($fieldsToShow.Length -ne 0) { $filteredResponse = $lampMethodResponse | Select-Object -Property $fieldsToShow $output = "" foreach ($fieldToShow in $fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } Write-Host "[$Id] $output" } else { $filteredResponse = $lampMethodResponse | Select-Object -Property * Write-Host "[$Id]" $filteredResponse } } catch { Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } } } function Call-MethodForIds { param($Ids, $tableName, $url, $methodType, $fieldsToShow, $maxThreads, $batchSize) $totalIds = $Ids.Length Write-Host "Found $totalIds $tableName in the given range" $batches = [System.Collections.Generic.List[object]]::new() # Splitting IDs into batches for ($i = 0; $i -lt $totalIds; $i += $batchSize) { $batches.Add($Ids[$i..[Math]::Min($i + $batchSize - 1, $totalIds - 1)]) } $batchCount = 0 $totalBatches = $batches.Count foreach ($batch in $batches){ $currentBatchIds = $batch -join ", " # $batchIndex = $batches.IndexOf($batch) + 1 $batchCount ++ Write-Host "Processing batch $batchCount of $($batches.Count): IDs $currentBatchIds" -ForegroundColor Green $pct = ($batchCount / $totalBatches) * 100 Write-Progress -Activity "Processing Batch [$batchCount of $totalBatches]" -Status "$pct" -PercentComplete $pct $batch | ForEach-Object -Parallel { $Id = $_ $finalUrl = $using:url -f $Id try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $using:methodType -ContentType 'application/json; charset=utf-8' -Headers $using:global:authorizationToken -UseBasicParsing if ($using:fieldsToShow.Length -ne 0) { $filteredResponse = $lampMethodResponse | Select-Object -Property $using:fieldsToShow $output = "" foreach ($fieldToShow in $using:fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } # Write-Output "[$Id] $output" Write-Host "[$Id] $output" } else { $filteredResponse = $lampMethodResponse | Select-Object -Property * # Write-Output "[$Id]" $filteredResponse Write-Host "[$Id]" $lampMethodResponse } } catch { # Write-Output "Invocation failed for $Id : $_" Write-Host "Invocation failed for $Id : $_" -ForegroundColor Red } } -ThrottleLimit $maxThreads # Adjust ThrottleLimit as needed based on your environment } } function Validate-UrlForLampMethod{ param($url) if ($url -match "\{0\}"){ return $false }else{ Write-Host "url [ $url ] does not contain {0}, recheck and run code again " -ForegroundColor RED return $true } } function Invoke-ParallelLampMethod { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="url ")][String] $url, [Parameter(Mandatory = $true, HelpMessage="Language ID")][int] $languageId, [Parameter(Mandatory = $false, HelpMessage="familyRange ")][int] $familyRange, [Parameter(Mandatory = $false, HelpMessage="lexemeRange ")][int] $lexemeRange, [Parameter(Mandatory = $false, HelpMessage="Wiktionary category: ")][String] $wiktionaryRange, [Parameter(Mandatory = $false, Helpmessage="Type of request (GET or POST) default POST")][string]$methodType="POST", [Parameter(Mandatory=$false, HelpMessage="Array of fields to display")][String[]]$fieldsToShow, [Parameter(Mandatory=$false, HelpMessage="Strat at this point for wiktionary range")][String]$from, [Parameter(Mandatory = $false, HelpMessage="Regex to verify: ")][String] $validatingRegex, [Parameter(Mandatory=$false, HelpMessage="Maximum number of records to fetch (Default = 0)")][int]$max = 0, [Parameter(Mandatory=$false, HelpMessage="Maximum number of parallel threads (Default = 10)")][int]$maxThreads = 10, [Parameter(Mandatory=$false, HelpMessage="Number of ids to process per batch (Default = 10)")][int]$batchSize = 10 ) Login-Lamp Set-LampLanguage -languageId $languageId if (-not ($url -match '^https?://')) { $url = "$global:lampHost/$url" } if ($familyRange){ if (Validate-UrlForLampMethod $url){ Write-Host "Exiting" -ForegroundColor RED return 1 }else{ $familyIds = Get-Ids -range $familyRange -tableName "Families" -max $max Call-MethodForIds -Ids $familyIds -tableName "Families" -url $url -methodType $methodType -fieldsToShow $fieldsToShow -maxThreads $maxThreads -batchSize $batchSize } }elseif ($lexemeRange) { if (Validate-UrlForLampMethod $url){ Write-Host "Exiting" -ForegroundColor RED return 1 }else{ $lexemeIds = Get-Ids -range $lexemeRange -tableName "Lexemes" -max $max Call-MethodForIds -Ids $lexemeIds -tableName "Lexemes" -url $url -methodType $methodType -fieldsToShow $fieldsToShow -maxThreads $maxThreads -batchSize $batchSize } }elseif ($wiktionaryRange){ $i=0 do { if (-not $bookmark -and $from) { # First request: Use gcmstartsortkeyprefix to start from a specific point $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&format=json&gcmstartsortkeyprefix=$([System.Web.HttpUtility]::UrlEncode($from))&gcmlimit=500&gcmtitle=Category:$wiktionaryRange&generator=categorymembers&prop=pageprops" } else { # Subsequent requests: Use gcmcontinue from the previous response $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&format=json&gcmlimit=500&gcmtitle=Category:$wiktionaryRange&generator=categorymembers&prop=pageprops&gcmcontinue=$bookmark" } # $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&generator=categorymembers&format=json&gcmtitle=Category:$wiktionaryRange&prop=pageprops&gcmlimit=500&gcmcontinue=$bookmark" Write-Host "Loading entries from $wikidataUrl" $wikidataResponse = Invoke-WebRequest -Uri $wikidataUrl -Method GET -UseBasicParsing $wikidataResponse = $wikidataResponse.Content # -replace '(?<=pages["]:{["])[^"]+', 'results' $listOfInstances = ConvertFrom-Json -InputObject $wikidataResponse $totalPages = $($listOfInstances.query.pages.PSObject.Properties).Count Write-Host "Found [$totalPages] pages" -ForegroundColor Green if ($listOfInstances.continue) { $bookmark = $listOfInstances.continue.gcmcontinue } else { $bookmark = $null } #"Bookmark: $bookmark" $i=0 $listOfInstances.query.pages.PSObject.Properties | foreach { $word = $_.Value.title #$word if ($word -and (-not ($validatingRegex) -or $word -match $validatingRegex)) { $pct = $i/$totalPages * 100 $normalizedWord = $word -replace ' ', '_' #"PageId=$pageId word=$normalizedWord" $finalUrl = $url -f $normalizedWord $i += 1 try { $importNonLemmaResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -Headers $global:authorizationToken -UseBasicParsing Write-Host "Imported [$normalizedWord]" -ForegroundColor Green # ProcessPageTitle -title $word } catch { Write-Host "IMPORT FAILED FOR $normalizedWord $_" -ForegroundColor Red } } } Write-Host "[$normalizedWord] was the last word imported" -ForegroundColor Magenta } while ($bookmark) }else{ try{ $lampMethodResponse = Invoke-RestMethod -Uri $url -Method $methodType -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } if ($fieldsToShow.Length -eq 0){ $lampMethodResponse }else{ $filteredResponse = $lampMethodResponse | Select-Object -Property $fieldsToShow $output = "" foreach ($fieldToShow in $fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } Write-Host $output } } } # Invoke-LampMethod -url "knowledgeGraph?type=id&arg={0}&basic=true" -languageId 36 -familyRange 1352 -methodType "GET" -fieldsToShow "lexemeList" # Invoke-ParallelLampMethod -url "lexicon?type=id&arg={0}" -languageId 36 -lexemeRange 1643 -methodType "GET" -fieldsToShow "id" # Invoke-LampMethod -url "validateLexemeLinks?lexeme={0}&existing=true" -languageId 12 -methodType "POST" -fieldsToShow "lemma","sql","error" -lexemeRange 1452 # Invoke-LampMethod -url "importNonLemma?nonlemma={0}" -languageId 7 -methodType "POST" -wiktionaryRange "English_non-lemma_forms" -afterFrom "zw" # https://en.wiktionary.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:Hindi_non-lemma_forms&cmstartsortkeyprefix=%E0%A4%86&cmlimit=max&format=json # https://en.wiktionary.org/w/api.php?action=query&format=json&gcmstartsortkeyprefix=इ&gcmlimit=500&gcmtitle=Category:Hindi_non-lemma_forms&generator=categorymembers&prop=pageprops # Measure-Command { # Invoke-ParallelLampMethod -url "lexicon?type=id&arg={0}" -languageId 36 -lexemeRange 1643 -methodType "GET" -fieldsToShow "id","clitic" -batchSize 24 -maxThreads 24 # } #EndRegion '.\Public\Invoke-ParallelLampMethod.ps1' 215 #Region '.\Public\Lamp-DeleteIncorrectFamilyLinks2F.ps1' 0 function Lamp-DeleteIncorrectFamilyLinks2F { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Language ID: ")][int] $languageID, [Parameter(Mandatory = $true, HelpMessage="Argument (ID / text / range ID): ")][String] $arg, [Parameter(Mandatory = $true, HelpMessage="Type of the argument: ")][String] $type, [Parameter(Mandatory = $true, HelpMessage="Set to true to keep words not found in Wiktionary, false otherwise.")][bool] $keepMissing, [Parameter(Mandatory = $true, HelpMessage="Hashtable for mapping dictionary tags to : ")][hashtable] $tag2pos ) Login-Lamp # $global:lampHost = '' # $global:authorizationToken=@{} # $global:authorizationToken.Add("Authorization", "") $response = Invoke-RestMethod -Uri "$global:LampHost/setLanguage?language=$languageID" -Method POST -Headers $global:authorizationToken -UseBasicParsing $languageISOCode = $response.ISOCode $dataset = @() $deleteList = @{} $lexiconRawData = Invoke-RestMethod -Uri "$global:LampHost/lexicon?type=$type&arg=$arg" -Method GET -Headers $global:authorizationToken -UseBasicParsing $i=0 $totalItems = $lexiconRawData.Count foreach ($item in $lexiconRawData) { $i++ Write-Progress -Activity "Processing lexeme [$($item.id)]" -Status "$i out of $totalItems items processed" -PercentComplete (($i / $totalItems) * 100) # Clean the lemma by removing content within square brackets, trimming, and converting to lowercase $cleanLemma = ($item.lemma -replace '\[.*?\]', '').Trim() # if ($tag2pos) { # $splitTags = $item.dictionaryTag -split ' ' # $mappedTags = $splitTags | ForEach-Object { # $tag2pos[$_] # } -join ' ' # } $splitTags = $item.dictionaryTag -split ' ' $mappedTags = '' foreach ($tag in $splitTags) { if ($tag2pos.ContainsKey($tag)){ $mappedTags += $tag2pos[$tag] + ' ' } } $mappedTags = $mappedTags.TrimEnd() # Loop through each family for the current item foreach ($family in $item.families) { if ($family.LFUnverified -eq 'failedAutoCheck' -or $family.LFUnverified -eq 'humanFactor') { $dataset += [PSCustomObject]@{ Lemma = $cleanLemma LexemeId = $item.id DictionaryTag = $mappedTags #$item.dictionaryTag FamilyId = $family.id LFUnverified = $family.LFUnverified } } } } # $response = Invoke-RestMethod -Uri "$global:LampHost/setLanguageByISO?language=en" -Method POST -Headers $global:authorizationToken -UseBasicParsing Write-Host "Lexeme Data fetched" -ForegroundColor Green $processedLemmas = @{} $j=0 $totalRows = $dataset.Count $response = Invoke-RestMethod -Uri "$global:LampHost/setLanguageByISO?language=en" -Method POST -Headers $global:authorizationToken -UseBasicParsing foreach ($row in $dataset) { $j++ Write-Progress -Activity "Checking Wiktionary for [$($row.LexemeId)]" -Status "$j out of $totalRows lexemes processed" -PercentComplete (($j / $totalRows) * 100) if (-not $processedLemmas.ContainsKey($row.Lemma)){ $wiktionaryResponse = Invoke-RestMethod -Uri "$global:LampHost/wiktionary?lemma=$($row.Lemma)&language=$languageISOCode" -Method GET -Headers $global:authorizationToken -UseBasicParsing if ($wiktionaryResponse.error){ Write-Host "Error occured when processing $($row.Lemma)" if ($keepMissing -eq $true){ continue }else{ # Write-Error "$wiktionaryResponse.error" $processedLemmas[$row.Lemma] = $false } }else{ $processedLemmas[$row.Lemma] = $true } } if (-not $processedLemmas[$row.Lemma]){ if (-not $deleteList.ContainsKey($row.LexemeId)){ $deleteList[$row.LexemeId] = @() #@("NoWebpageFound") } $deleteList[$row.LexemeId] += $row.FamilyId }else{ # getting english lexemes # $response = Invoke-RestMethod -Uri "$global:LampHost/setLanguageByISO?language=en" -Method POST -Headers $global:authorizationToken -UseBasicParsing $familyID = $row.FamilyId $englishLexemesRaw = Invoke-RestMethod -Uri "$global:LampHost/knowledgeGraph?type=id&arg=$familyId&basic=true" -Method GET -Headers $global:authorizationToken -UseBasicParsing $englishLexemes = $englishLexemesRaw.lexemeList -Split ',' # getting the english interpreations from wiktionary $splitDictionaryTags = $row.DictionaryTag -split ' ' foreach ($currentDictionaryTag in $splitDictionaryTags) { $englishInterpretations = $wiktionaryResponse.$languageISOCode.$currentDictionaryTag.interpretations.english $isInEnglishLexemes = $false foreach ($interpretation in $englishInterpretations) { if ($englishLexemes -contains $interpretation) { $isInEnglishLexemes = $true break } } if (-not $isInEnglishLexemes) { # Add to delete list hashtable if (-not $deleteList.ContainsKey($row.LexemeId)) { $deleteList[$row.LexemeId] = @() #@("NoInterpretation") } $deleteList[$row.LexemeId] += $row.FamilyId } } } } Write-Host "Delete List: " Write-Host $deleteList if ($deleteList.Count -gt 0) { Write-Host "Deleting the lexeme/family links." $response = Invoke-RestMethod -Uri "$global:LampHost/setLanguage?language=$languageID" -Method POST -Headers $global:authorizationToken -UseBasicParsing foreach ($lexemeId in $deleteList.Keys) { $familyIds = $deleteList[$lexemeId] $familyIdsString = $familyIds -join "," $deleteResponse = Invoke-RestMethod -Uri "$global:LampHost/lexemeFamilies?lexeme=$lexemeId&families=$familyIdsString" -Method DELETE -Headers $global:authorizationToken -UseBasicParsing if (-not $deleteResponse.success){ Write-Host "Could not unlink families for lexemeId: [$lexemeId]" -ForegroundColor RED } } } } # Lamp-DeleteIncorrectFamilyLinks2F -languageID 36 -arg "1152" -type "range" -tag2pos @{ "adj." = "Adjective"; "n." = "Noun" } # Lamp-DeleteIncorrectFamilyLinks2F -languageID 36 -arg "3847264" -type "id" -tag2pos @{ "adj." = "Adjective"; "n." = "Noun" } # Lamp-DeleteIncorrectFamilyLinks2F -languageID 36 -arg "3952369" -type "id" -tag2pos @{ "adj." = "Adjective"; "n." = "Noun" } -keepMissing $true # Lamp-DeleteIncorrectFamilyLinks2F -languageID 9 -arg "1643" -type "range" -tag2pos @{ "adj." = "Adjective"; "n." = "Noun" ; "adv." = "Adverb" ; "v." = "Verb" } -keepMissing $true #EndRegion '.\Public\Lamp-DeleteIncorrectFamilyLinks2F.ps1' 135 #Region '.\Public\New-AdjFamily.ps1' 0 ## ============================================================================= ## ## This script's purpose is to add a new adj family and new lexemes in English and another language, link them, and tag them ## ## ============================================================================= function New-AdjFamily{ [CmdletBinding()] <# .SYNOPSIS Creates a new adjective family with the specified parameters. .DESCRIPTION The New-AdjFamily function creates a new adjective family using the specified parameters, such as language code, definition, Wikidata ID, lemmas in English and native languages, family ID, and hypernym ID. .PARAMETER lang Language code of the native language. Specify an integer representing the language code. .PARAMETER definition Definition of the adjective family. Specify a string. .PARAMETER wikidata Wikidata ID of the adjective family. Specify a string. .PARAMETER english English lemmas of the adjective family. Specify an array of strings, where each string represents a lemma. .PARAMETER native Native lemmas of the adjective family. Specify an array of strings, where each string represents a lemma. .PARAMETER family Family ID of the adjective family. Specify an integer representing the family ID. This parameter is mandatory. .PARAMETER hypernym Hypernym ID of the adjective family. Specify an integer representing the hypernym ID. This parameter is mandatory. .EXAMPLE New-AdjFamily -lang 7 -definition "This is a test" -wikidata "Q123" -english "beautiful, lovely" -native "bello, encantador" -family 123 -hypernym 456 Creates a new adjective family with the specified parameters. #> Param( [Parameter(HelpMessage="Language code: ")][Int32] $lang, [Parameter(HelpMessage="Definition: ")][String] $definition, [Parameter(HelpMessage="Wikidata ID: ")][String] $wikidata, [Parameter(Mandatory = $true, HelpMessage="English lemmas, delimited by commas")][String[]] $english, [Parameter(HelpMessage="Native lemmas, delimited by commas ")][String[]] $native, [Parameter(Mandatory = $true, HelpMessage="Family ID ")][Int32] $family, [Parameter(Mandatory = $true, HelpMessage="Hypernym ID ")][Int32] $hypernym ) class Feature { [string]$index [string]$value [string]$type Feature([string]$index, [string]$value, [string]$type) { $this.index = $index $this.value = $value $this.type = $type } } $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding Login-Lamp $description = $english -join ', ' $grammar=@() $grammar += @([Feature]::new("1", "ADJ", "Grammar")) $grammar += @([Feature]::new("26", "ALL", "Grammar")) if (-not $definition) { $definition = '???' } $newFamily = @{ id=$family description=$description definition=$definition phraseType='not a phrase' grammar=$grammar wikidataId=$wikidata } Write-Host "Creating family...`r`n" -ForegroundColor Green $newFamily $familyJson = ConvertTo-Json -InputObject $newFamily $response = Invoke-WebRequest -Uri "$global:lampHost/family" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($familyJson)) -UseBasicParsing $parsedResponse = ConvertFrom-Json -InputObject $response if (-not $parsedResponse.success -or -not $parsedResponse.id) { Write-Host "Can't continue, no valid acknowledgement: $response" -ForegroundColor Red break # end the script } if ($family -ne $parsedResponse.id) { $actualFamilyId = $parsedResponse.id Write-Host "$family was occupied, inserted at $actualFamilyId" -ForegroundColor Yellow } $family = $parsedResponse.id Write-Host "Linking hypernym $hypernym...`r`n" -ForegroundColor Green # link the hypernym $whatever = Invoke-WebRequest -Uri "$global:lampHost/familyLinks?family=$family&type=hypernyms&links=$hypernym" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing Write-Host "Lexeme(s) in English...`r`n" -ForegroundColor Green # set the language to English $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=7" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $english | Foreach-Object { Add-Lexeme $family $_ } if ($native -and $native.length -gt 0 -and $lang -gt 0) { Write-Host "Lexeme(s) in language $lang...`r`n" -ForegroundColor Green # set the native language $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=$lang" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $native | Foreach-Object { Add-Lexeme $family $_ } } } #EndRegion '.\Public\New-AdjFamily.ps1' 133 #Region '.\Public\New-FormulaicFamily.ps1' 0 ## ============================================================================= ## ## This script's purpose is to add a new Formulaic family ## ============================================================================= function New-FormulaicFamily{ <# .SYNOPSIS Creates a new formulaic family in the LAMP system. .DESCRIPTION The New-FormulaicFamily function creates a new formulaic family in the LAMP (Lexical Acquisition through Machine Parsing) system. It allows you to specify various parameters such as language code, definition, English lemmas, native lemmas, family ID, and hypernym ID. .PARAMETER lang The language code for the formulaic family. Specify it as an integer. .PARAMETER definition The definition of the formulaic family. .PARAMETER english An array of English lemmas for the formulaic family, delimited by commas. This parameter is mandatory. .PARAMETER native An array of native lemmas for the formulaic family, delimited by commas. .PARAMETER family The ID of the formulaic family. This parameter is mandatory. .PARAMETER hypernym The ID of the hypernym for the formulaic family. .EXAMPLE New-FormulaicFamily -lang 26 -definition "Some definition" -english "lemma1,lemma2" -native "lemma3,lemma4" -family 123 -hypernym 456 Creates a new formulaic family with the specified parameters. #> [CmdletBinding()] Param( [Parameter(HelpMessage="Language code: ")][Int32] $lang, [Parameter(HelpMessage="Definition: ")][String] $definition, [Parameter(Mandatory = $true, HelpMessage="English lemmas, delimited by commas")][String[]] $english, [Parameter(HelpMessage="Native lemmas, delimited by commas ")][String[]] $native, [Parameter(Mandatory = $true, HelpMessage="Family ID ")][Int32] $family, [Parameter(Mandatory = $false, HelpMessage="Hypernym ID ")][Int32] $hypernym ) #$lang = 26 # code found in the language details class Feature { [string]$index [string]$value [string]$type Feature([string]$index, [string]$value, [string]$type) { $this.index = $index $this.value = $value $this.type = $type } } Login-Lamp $description = $english -join ', ' $grammar=@() $grammar += @([Feature]::new("1", "FORE", "Grammar")) $grammar += @([Feature]::new("26", "ALL", "Grammar")) if (-not $definition) { $definition = '???' } $newFamily = @{ id=$family description=$description definition=$definition phraseType='mainClause' phraseTag='S' grammar=$grammar } Write-Host "Creating family...`r`n" -ForegroundColor Green $newFamily $familyJson = ConvertTo-Json -InputObject $newFamily $response = Invoke-WebRequest -Uri "$global:lampHost/family" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($familyJson)) -UseBasicParsing $parsedResponse = ConvertFrom-Json -InputObject $response if (-not $parsedResponse.success -or -not $parsedResponse.id) { Write-Host "Can't continue, no valid acknowledgement: $response" -ForegroundColor Red break # end the script } if ($family -ne $parsedResponse.id) { $actualFamilyId = $parsedResponse.id Write-Host "$family was occupied, inserted at $actualFamilyId" -ForegroundColor Yellow } $family = $parsedResponse.id If ($hypernym -gt 0) { Write-Host "Linking hypernym $hypernym...`r`n" -ForegroundColor Green # link the hypernym $whatever = Invoke-WebRequest -Uri "$global:lampHost/familyLinks?family=$family&type=hypernyms&links=$hypernym" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing } Write-Host "Lexeme(s) in English...`r`n" -ForegroundColor Green # set the language to English $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=7" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $english | Foreach-Object { Add-Lexeme $family $_ } if ($native -and $native.length -gt 0 -and $lang -gt 0) { Write-Host "Lexeme(s) in language $lang...`r`n" -ForegroundColor Green # set the native language $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=$lang" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $native | Foreach-Object { Add-Lexeme $family $_ } } } #EndRegion '.\Public\New-FormulaicFamily.ps1' 125 #Region '.\Public\New-NounFamily.ps1' 0 ## ============================================================================= ## ## This script's purpose is to add a new noun family and new lexemes in English and another language, link them, and tag them ## ## ============================================================================= function New-NounFamily{ <# .SYNOPSIS Creates a new noun family with the specified parameters. .DESCRIPTION The New-NounFamily function creates a new noun family with the specified parameters such as language code, definition, Wikidata ID, lemmas, family ID, hypernym ID, domain ID, and other optional features. .PARAMETER lang Language code for the noun family. .PARAMETER definition Definition of the noun family. .PARAMETER wikidata Wikidata ID of the noun family. .PARAMETER english Array of English lemmas, delimited by commas. .PARAMETER native Array of native lemmas, delimited by commas. .PARAMETER family Family ID for the noun family. .PARAMETER hypernym Hypernym ID for the noun family. .PARAMETER domain Domain ID for the noun family. .PARAMETER proper Indicates if the noun family is a proper noun. Default value is 0. .PARAMETER person Indicates if the noun family represents a person. Default value is 0. .EXAMPLE New-NounFamily -lang 7 -definition "A group of animals" -wikidata Q35407 -english "dog, cat, horse" -native "Hund, Katze, Pferd" -family 123 -hypernym 456 Creates a new noun family with the specified parameters. #> [CmdletBinding()] Param( [Parameter(HelpMessage="Language code: ")][Int32] $lang, [Parameter(HelpMessage="Definition: ")][String] $definition, [Parameter(HelpMessage="Wikidata ID: ")][String] $wikidata, [Parameter(Mandatory = $true, ValueFromPipelineByPropertyName=$true, HelpMessage="English lemmas, delimited by commas")][String[]] $english, [Parameter(HelpMessage="Native lemmas, delimited by commas ")][String[]] $native, [Parameter(Mandatory = $true, HelpMessage="Family ID ")][Int32] $family, [Parameter(Mandatory = $true, HelpMessage="Hypernym ID ")][Int32] $hypernym, [Parameter(Mandatory = $false, HelpMessage="Hypernym ID ")][Int32] $domain, [Parameter()][Int32] $proper, [Parameter()][Int32] $person ) $global:nativeLanguageIETF = '' class Feature { [string]$index [string]$value [string]$type Feature([string]$index, [string]$value, [string]$type) { $this.index = $index $this.value = $value $this.type = $type } } Login-Lamp $description = $english -join ', ' if ($description.Length -gt 95) { $description = $description.Substring(0, 95) } $grammar=@() $grammar += @([Feature]::new("1", "NOUN", "Grammar")) if ($proper -gt 0) { $grammar += @([Feature]::new("3", "NO", "Grammar")) $grammar += @([Feature]::new("14", "NA", "Grammar")) } else { $grammar += @([Feature]::new("3", "YES", "Grammar")) $grammar += @([Feature]::new("14", "NPA", "Grammar")) } $grammar += @([Feature]::new("4", "REG", "Grammar")) if ($person -gt 0) { $grammar += @([Feature]::new("22", "PERS", "Grammar")) $grammar += @([Feature]::new("23", "BODY", "Grammar")) $grammar += @([Feature]::new("24", "BODY", "Grammar")) } else { $grammar += @([Feature]::new("22", "NO", "Grammar")) $grammar += @([Feature]::new("23", "THNG", "Grammar")) $grammar += @([Feature]::new("24", "THNG", "Grammar")) } $grammar += @([Feature]::new("26", "ALL", "Grammar")) if (-not $definition) { $definition = '???' } $newFamily = @{ id=$family properNoun=$proper description=$description definition=$definition phraseType='not a phrase' grammar=$grammar wikidataId=$wikidata } Write-Host "Creating family...`r`n" -ForegroundColor Green $newFamily $familyJson = ConvertTo-Json -InputObject $newFamily $response = Invoke-WebRequest -Uri "$global:lampHost/family" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($familyJson)) -UseBasicParsing $parsedResponse = ConvertFrom-Json -InputObject $response if (-not $parsedResponse.success -or -not $parsedResponse.id) { Write-Host "Can't continue, no valid acknowledgement: $response" -ForegroundColor Red break # end the script } if ($family -ne $parsedResponse.id) { $actualFamilyId = $parsedResponse.id Write-Host "$family was occupied, inserted at $actualFamilyId" -ForegroundColor Yellow } $family = $parsedResponse.id Write-Host "Linking hypernym $hypernym...`r`n" -ForegroundColor Green # link the hypernym $whatever = Invoke-WebRequest -Uri "$global:lampHost/familyLinks?family=$family&type=hypernyms&links=$hypernym" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing if ($domain -gt 0) { Write-Host "Linking domain $domain...`r`n" -ForegroundColor Green # link the hypernym $whatever = Invoke-WebRequest -Uri "$global:lampHost/familyLinks?family=$family&type=domains&links=$domain" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing } Write-Host "Lexeme(s) in English...`r`n" -ForegroundColor Green # set the language to English $activeLanguage = Invoke-RestMethod -Uri "$global:lampHost/setLanguage?language=7" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $english | Foreach-Object { Add-Lexeme $family $_ } if ($native -and $native.length -gt 0 -and $lang -gt 0) { Write-Host "Lexeme(s) in language $lang...`r`n" -ForegroundColor Green # set the native language $activeLanguage = Invoke-RestMethod -Uri "$global:lampHost/setLanguage?language=$lang" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $global:nativeLanguageIETF = $activeLanguage.ISOCode $native | Foreach-Object { Add-Lexeme $family $_ } } } #EndRegion '.\Public\New-NounFamily.ps1' 166 #Region '.\Public\New-VerbFamily.ps1' 0 ## ============================================================================= ## ## This script's purpose is to add a new verb family and new lexemes in English and another language, link them, and tag them ## ## ============================================================================= function New-VerbFamily{ <# .SYNOPSIS Creates a new verb family and performs various operations on it. .DESCRIPTION The New-VerbFamily function creates a new verb family and performs the following operations: - Creates a family with the specified parameters. - Links a hypernym to the family. - Adds lexemes in English to the family. - Adds lexemes in a native language (if provided) to the family. .PARAMETER lang The language code for the native language. Enter an integer representing the language code. .PARAMETER definition The definition of the verb family. Enter a string representing the definition. .PARAMETER english An array of English lemmas, delimited by commas. This parameter is mandatory. .PARAMETER native An array of native language lemmas, delimited by commas. .PARAMETER family The family ID. This parameter is mandatory. .PARAMETER hypernym The hypernym ID. This parameter is mandatory. .PARAMETER person An optional parameter indicating the person. Enter an integer representing the person. .EXAMPLE New-VerbFamily -lang 1 -definition "Sample definition" -english "lemma1, lemma2" -native "native1, native2" -family 123 -hypernym 456 -person 1 Creates a new verb family with the specified parameters. Adds lexemes in English and the native language. .EXAMPLE New-VerbFamily -english "lemma1, lemma2" -family 123 -hypernym 456 Creates a new verb family with the specified parameters. Adds lexemes in English only. #> [CmdletBinding()] Param( [Parameter(HelpMessage="Language code: ")][Int32] $lang, [Parameter(HelpMessage="Definition: ")][String] $definition, [Parameter(Mandatory = $true, HelpMessage="English lemmas, delimited by commas")][String[]] $english, [Parameter(HelpMessage="Native lemmas, delimited by commas ")][String[]] $native, [Parameter(Mandatory = $true, HelpMessage="Family ID ")][Int32] $family, [Parameter(Mandatory = $true, HelpMessage="Hypernym ID ")][Int32] $hypernym, [Parameter()][Int32] $person ) class Feature { [string]$index [string]$value [string]$type Feature([string]$index, [string]$value, [string]$type) { $this.index = $index $this.value = $value $this.type = $type } } Login-Lamp $description = $english -join ', ' $grammar=@() $grammar += @([Feature]::new("1", "VERB", "Grammar")) $grammar += @([Feature]::new("14", "TRAN", "Grammar")) $grammar += @([Feature]::new("8", "REG", "Grammar")) if ($person -gt 0) { $grammar += @([Feature]::new("23", "BODY", "Grammar")) } else { $grammar += @([Feature]::new("23", "THNG", "Grammar")) } $grammar += @([Feature]::new("24", "ALL", "Grammar")) $grammar += @([Feature]::new("26", "ALL", "Grammar")) if (-not $definition) { $definition = '???' } $newFamily = @{ id=$family description=$description definition=$definition phraseType='not a phrase' grammar=$grammar } # . ".\normalizationLib.ps1" Write-Host "Creating family...`r`n" -ForegroundColor Green $newFamily $familyJson = ConvertTo-Json -InputObject $newFamily $response = Invoke-WebRequest -Uri "$global:lampHost/family" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($familyJson)) -UseBasicParsing $parsedResponse = ConvertFrom-Json -InputObject $response if (-not $parsedResponse.success -or -not $parsedResponse.id) { Write-Host "Can't continue, no valid acknowledgement: $response" -ForegroundColor Red break # end the script } if ($family -ne $parsedResponse.id) { $actualFamilyId = $parsedResponse.id Write-Host "$family was occupied, inserted at $actualFamilyId" -ForegroundColor Yellow } $family = $parsedResponse.id Write-Host "Linking hypernym $hypernym...`r`n" -ForegroundColor Green # link the hypernym $whatever = Invoke-WebRequest -Uri "$global:lampHost/familyLinks?family=$family&type=hypernyms&links=$hypernym" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing Write-Host "Lexeme(s) in English...`r`n" -ForegroundColor Green # set the language to English $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=7" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $english | Foreach-Object { Add-Lexeme $family $_ } if ($native -and $native.length -gt 0 -and $lang -gt 0) { Write-Host "Lexeme(s) in language $lang...`r`n" -ForegroundColor Green # set the native language $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=$lang" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $native | Foreach-Object { Add-Lexeme $family $_ } } } #EndRegion '.\Public\New-VerbFamily.ps1' 141 #Region '.\Public\Normalization-Lib.ps1' 0 Function GetNormalizedWikidataLemma([String] $language,[String] $entry) { if ($entry -like '* *') { return $entry } else { $suffixes = [ordered]@{} switch ($language) { 'sq' { $suffixes.Add('i', '') $suffixes.Add('u', '') $suffixes.Add('(?<=[^aeëiouy])ulla', 'ull') $suffixes.Add('(?<=[^aeëiouy])ura', 'ur') $suffixes.Add('(?<=[^aeëiouy])eja', 'e') $suffixes.Add('(?<=[^aeëiouy])ëza', 'ëz') $suffixes.Add('(?<=[^aeëiouy])ia', 'i') $suffixes.Add('(?<=[^aeëiouy])ra', 'ër') $suffixes.Add('(?<=[^aeëiouy])rra', 'ërr') $suffixes.Add('(?<=[^aeëiouy])a', 'ë') $suffixes.Add('ca', 'cë') $suffixes.Add('da', 'dë') $suffixes.Add('dea', 'de') $suffixes.Add('fa', 'fë') $suffixes.Add('ga', 'gë') $suffixes.Add('dha', 'dhë') $suffixes.Add('sha', 'shë') $suffixes.Add('cia', 'ci') $suffixes.Add('media', 'medie') $suffixes.Add('pedia', 'pedi') $suffixes.Add('logia', 'logji') $suffixes.Add('ogjia', 'ogji') $suffixes.Add('ia', 'i') $suffixes.Add('aja', 'a') $suffixes.Add('dja', 'dje') $suffixes.Add('anija', 'anije') $suffixes.Add('fëmija', 'fëmijë') $suffixes.Add('bizelja', 'bizelja') $suffixes.Add('lulja', 'lule') $suffixes.Add('lja', 'lje') $suffixes.Add('mja', 'mje') $suffixes.Add('nja', 'një') $suffixes.Add('goja', 'gojë') $suffixes.Add('soja', 'sojë') $suffixes.Add('qja', 'qe') $suffixes.Add('tja', 'te') $suffixes.Add('ka', 'kë') $suffixes.Add('(?<=[^l])la', 'lë') $suffixes.Add('kumbulla', 'kumbull') $suffixes.Add('ma', 'më') $suffixes.Add('na', 'në') $suffixes.Add('ra', 'rë') $suffixes.Add('sa', 'së') $suffixes.Add('ta', 'të') $suffixes.Add('va', 'vë') $suffixes.Add('ëza', 'ëz') $suffixes.Add('za', 'zë') } 'ru' { $suffixes.Add('ые', 'ое') $suffixes.Add('ы', '') $suffixes.Add('и', '') # '(\w+[оиые]е[ ])+\w+[иыяа]' } default { return $entry } } $suffixes.Keys | ForEach-Object { $fullRegex = $_ + '$' if ($entry -match $fullRegex) { $res = $entry -replace $fullRegex, $suffixes[$_] return $res } } | Select -First 1 #if (-not($res)) { # return $entry #} return $entry } } Function GetVerbSameInflectionAs([int] $languageId,[String] $entry) { $entries = $entry.Split(" ") $lexemeId = 0 $stem = $entry if ($entries -and $entries.Length -gt 1) { $firstWord = $entries[0] $sql = "USE tisane; SELECT TOP 1 l.Id, l.Stem FROM dbo.Lexemes l WHERE l.LanguageId = $languageId AND l.MainLemma = N'$firstWord' AND dbo.HasFamilyFeature(l.Id, 1, 'VERB') = 1" $ds = Invoke-Sqlcmd -Query $sql if ($ds) { $lexemeId = $ds.Id $headStem = $ds.Stem $stem = "$headStem " + $entry.Substring($firstWord.Length + 1) } } #känna som sin egen ficka return $lexemeId, $stem } Function GetSegmentSQL([String] $language,[String] $entry) { $res = '' $segmentCount = $entry.Split(" ").Length $penultimateIndex = $segmentCount - 1 $regexes = [ordered]@{} # it's important to keep it ordered / unsorted, because the order matters switch -regex ($language) { 'he' { $regexes.Add("\w+[ ]\w+[י]", "InflectingSegment = 1, AgreeingSegment1st = 2, AgreeingSegmentLast = $segmentCount") $regexes.Add("\w+ה([ ]\w+[תה])+", "InflectingSegment = 1, AgreeingSegment1st = 2, AgreeingSegmentLast = $segmentCount") $regexes.Add("\w+[ה][ ]\w+", "InflectingSegment = 1, AgreeingSegment1st = 2, AgreeingSegmentLast = $segmentCount") $regexes.Add("\w+ות([ ]\w+[תה])+", "InflectingSegment = 1, AgreeingSegment1st = 2, AgreeingSegmentLast = $segmentCount") $regexes.Add("\w+[ת][ ]\w+[^תה]", "InflectingSegment = NULL, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") } 'de' { $regexes.Add("[\p{Ll} ]+[ ](\p{Lu}\p{Ll}+[ ])+\p{Ll}+en", "InflectingSegment = $segmentCount, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") # verb phrase $regexes.Add("\p{Ll}+en[ ][\p{Ll}+ ]*([ ]\p{Lu}\p{Ll}+)+", "InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") # verb phrase } 'sq' { } 'ru' { $regexes.Add("(\w+ая[ ])+\w+[аья]", "InflectingSegment = $segmentCount, AgreeingSegment1st = 1, AgreeingSegmentLast = $penultimateIndex") $regexes.Add("(\w+[ое][е][ ])+\w+[ое]", "InflectingSegment = $segmentCount, AgreeingSegment1st = 1, AgreeingSegmentLast = $penultimateIndex") $regexes.Add("\w+([ ][\w\-]+(ых|го|а|ы|[ое]в))+", "InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") $regexes.Add("\w+[ ]\w{1,3}([ ][\w\-]+)+", "InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") $regexes.Add("(\w+[оиы][й][ ])+\w+", "InflectingSegment = $segmentCount, AgreeingSegment1st = 1, AgreeingSegmentLast = $penultimateIndex") $regexes.Add("\w+т[ьи][ ][\w\-]+","InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") $regexes.Add("(\w+[оиые]е[ ])+\w+[иыяа]", "InflectingSegment = $segmentCount, AgreeingSegment1st = 1, AgreeingSegmentLast = $penultimateIndex") $regexes.Add("\w+([ ]+\w+[ой])?[ ]\w+[иаы]([ ][\w\-]+)*", "InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") } 'es|fr|it|pt' { $regexes.Add("\w+[ ]d[']\w+", 'InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL') $regexes.Add("\w+[ ]\w{2,4}[ ]\w+", 'InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL') $regexes.Add("\w+([ ]\w+)+", "InflectingSegment = 1, AgreeingSegment1st = 2, AgreeingSegmentLast = $segmentCount") } } $regexes.Keys | ForEach-Object { $fullRegex = '^' + $_ + '$' if ($entry -match $fullRegex) { $res = $regexes[$_] return $res } } | Select -First 1 } Function GetSegmentValues([String] $lang,[String] $ent) { $sql = GetSegmentSQL -language $lang -entry $ent if ($sql) { $sql.Split(', ') | ForEach-Object { $seg = $_.Trim().Split('=') $currentMember = $seg[0].Trim() if ($currentMember -match '^[0-9]$' -or $currentMember -eq 'NULL') { return $currentMember } } } else { return $sql } } #EndRegion '.\Public\Normalization-Lib.ps1' 163 #Region '.\Public\Process-GenAITestCSV.ps1' 0 Function Process-GenAITestCSV { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Path to the CSV file.")][String] $csvPath, [Parameter(Mandatory = $true, HelpMessage="Specify a supported provider. Options: OpenAI, AzureOpenAI, Anthropic, GooglePaLM.")][String] $provider, [Parameter(Mandatory = $true, HelpMessage="Template including columns to be part of the prompt.")][String] $promptTemplate, [Parameter(Mandatory = $true, HelpMessage="Name of the column where the GenAI output is stored.")][String] $outputColumn, [Parameter(Mandatory = $true, HelpMessage="Name of the column where the cleaned GenAI output is stored.")][String] $cleanedOutputColumn, [Parameter(Mandatory = $false, HelpMessage="Regex to extract the part of the response we need.")][String] $cleanupRegex, [Parameter(Mandatory = $false, HelpMessage="Name of the column where the correct answer sits.")][String] $goldStandardColumn, [Parameter(Mandatory = $false, HelpMessage="Name of the column to place the score in.")][String] $scoreColumn ) if ($scoreColumn -and !$goldStandardColumn) { throw "Gold standard column must be specified if score column is specified." } # Login-Lamp $csvObjects = @() $csvData = Import-Csv -Path $csvPath -Encoding UTF8 # $csvData $csvColumns = $csvData[0].PSObject.Properties.Name # Check if all the specified column names exist in the CSV $requiredColumns = @($outputColumn, $cleanedOutputColumn, $goldStandardColumn) | Where-Object { $_ } # if ($requiredColumns | Where-Object { $_ -notin $csvColumns }) { # throw "One or more specified column names do not exist in the CSV." # } for ($i=0; $i -lt $csvColumns.Count; $i++) { $promptTemplate = $promptTemplate.Replace("{$($csvColumns[$i])}", "{$i}") } # $promptTemplate foreach ($row in $csvData) { # $row if ($row.PSObject.Properties.Name -notcontains $outputColumn) { $row | Add-Member -MemberType NoteProperty -Name $outputColumn -Value $null } if ($row.PSObject.Properties.Name -notcontains $cleanedOutputColumn) { $row | Add-Member -MemberType NoteProperty -Name $cleanedOutputColumn -Value $null } if ([string]::IsNullOrEmpty($row.Lexemes)) { $csvObjects += $row continue } # $row.PSObject.Properties.Value $prompt = $promptTemplate -f $row.PSObject.Properties.Value # $prompt $urlEncodedPrompt = [System.Web.HttpUtility]::UrlEncode($prompt) $response = Invoke-WebRequest -Method Post -Uri "$global:LampHost/invokeGenAI?provider=$provider&prompt=$urlEncodedPrompt" -Headers $global:authorizationToken -UseBasicParsing $row.$outputColumn = $response.Content if ($cleanupRegex) { $row.$cleanedOutputColumn = [regex]::Match($row.$outputColumn, $cleanupRegex).Value } else { $row.$cleanedOutputColumn = $row.$outputColumn } $cleanedOutput = $row.$cleanedOutputColumn -replace "`n", "," -replace "`r", "" -replace "`"", "" | ForEach-Object { $_.Trim() } $row.$cleanedOutputColumn = $cleanedOutput.ToLower() if ($goldStandardColumn -and $scoreColumn) { if ($row.PSObject.Properties.Name -notcontains $scoreColumn) { $row | Add-Member -MemberType NoteProperty -Name $scoreColumn -Value $null } $expected = $row.$goldStandardColumn.Split(',') | ForEach-Object { $_.Trim() } $generated = $row.$cleanedOutputColumn.Split(',') | ForEach-Object { $_.Trim() } $truePositives = ($expected | Where-Object { $generated -contains $_ }).Count $falsePositives = ($generated | Where-Object { $expected -notcontains $_ }).Count $falseNegatives = ($expected | Where-Object { $generated -notcontains $_ }).Count $precision = if ($truePositives + $falsePositives -eq 0) { 0 } else { $truePositives / ($truePositives + $falsePositives) } $recall = if ($truePositives + $falseNegatives -eq 0) { 0 } else { $truePositives / ($truePositives + $falseNegatives) } $row.$scoreColumn = if ($precision + $recall -eq 0) { 0 } else { 2 * $precision * $recall / ($precision + $recall) } } $csvObjects += $row # $csvObjects $row # return } # $averageScore = ($csvObjects | Measure-Object -Property $scoreColumn -Average).Average # $averageRow = New-Object PSObject -Property @{ # $scoreColumn = $averageScore # } # $csvObjects += $averageRow if ($goldStandardColumn) { $averageScore = ($csvObjects | Measure-Object -Property $scoreColumn -Average).Average $averageRow = "$($provider)_Average,$averageScore" } $csvObjects | Export-Csv -Path $csvPath -NoTypeInformation -Encoding UTF8 $averageRow | Add-Content -Path $csvPath } # Process-GenAITestCSV -provider "GooglePaLM" -promptTemplate "select Chinese words/phrases from the following list that do not mean :{Family_Definition} and do not translate as : '{Family_ID}'\nList of words:\n{Noisy_List_of_Lexemes}\nOutput only the matches delimited by comma (no explanations). If there are no matches, output n/a" -outputColumn "GenAI_Response" -cleanedOutputColumn "Cleaned_GenAI_Response" -csvPath "TisaneFamilyCleanupTest_zh_CN.csv" -goldStandardColumn "Gold_Standard_for_Noisy_lexemes" -scoreColumn "Score_for_Noisy_Input" # Process-GenAITestCSV -provider "OpenAI" -promptTemplate "Phrase to translate: {Lexemes}\nTranslate from: {From_Language}\nTranslate to: {To_Language}\nDefinition: {Family_Definition}\nSample Usage: {Sample_Usage}\nOutput: Translated phrase only." -outputColumn "GenAI_Response" -cleanedOutputColumn "Cleaned_GenAI_Response" -csvPath "output.csv" ##### #hi # ar # fr # he # id # ru #EndRegion '.\Public\Process-GenAITestCSV.ps1' 108 #Region '.\Public\Retag.ps1' 0 function Retag{ [CmdletBinding()] <# .SYNOPSIS Retags using the specified range ID and language ID, with the option to reset list IDs. .DESCRIPTION The Retag function is used to perform retagging operations by invoking REST API endpoints. It sets the language and initiates the retagging process using the specified range ID. Optionally, you can reset list IDs during the retagging process. .PARAMETER languageId The numeric ID of the language to be used for retagging. .PARAMETER rangeId The ID of the range to be used for retagging. .PARAMETER reset Specifies whether to reset list IDs during the retagging process. If provided, it should contain a comma-separated list of list IDs to reset. .EXAMPLE Retag -languageId 123 -rangeId 456 -reset "list1, list2" This example retags using the range ID 456 and language ID 123. It also resets "list1" and "list2" during the retagging process. #> Param( [Parameter(Mandatory = $true, HelpMessage="Language numeric ID: ")][int] $languageId, [Parameter(Mandatory = $true, HelpMessage="Range ID: ")][int] $rangeId, [Parameter(Mandatory = $false, HelpMessage="Reset list IDs: ")][String] $reset ) Login-Lamp $languageStructure = Invoke-RestMethod -Uri "$global:lampHost/setLanguage?language=$languageId" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $languageEnglishName = $languageStructure.EnglishName Write-Host "Retagging using range: $rangeId ($languageEnglishName), resetting listIDs: $reset" -ForegroundColor Green Invoke-WebRequest -Uri "$global:lampHost/retag?arg=$rangeId&type=range&reset=$reset" -Method PUT -Headers $global:authorizationToken -Body ' ' -UseBasicParsing } #EndRegion '.\Public\Retag.ps1' 40 #Region '.\Public\Run-CommandsMultipleTimes.ps1' 0 function Run-CommandMultipleTimes { param ( [Parameter(Mandatory=$true)] [string]$jsonFilePath, [Parameter(Mandatory=$true)] [string]$commandName ) $command = Get-Command -Name $commandName $providersParams = Get-Content -Path $jsonFilePath | ConvertFrom-Json foreach ($params in $providersParams) { Write-Host "RUNNING $params" $hashtableParams = @{} $params.PSObject.Properties | ForEach-Object { $hashtableParams[$_.Name] = $_.Value } & $command.Name @hashtableParams } } # Run-CommandMultipleTimes -jsonFilePath "params.json" -commandName "Process-GenAITestCSV" #EndRegion '.\Public\Run-CommandsMultipleTimes.ps1' 21 #Region '.\Public\temp.ps1' 0 function Get-Ids{ param($range,$tableName,$max) try { $Ids = Invoke-RestMethod -Uri "$global:lampHost/ids?table=$tableName&range=$range&max=$max&basic=true" -Method GET -ContentType 'application/json; charset=utf-8'-Headers $global:authorizationToken -UseBasicParsing return $Ids }catch{ Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } } function Call-MethodForIds{ param($Ids,$tableName,$url,$methodType,$fieldsToShow) $totalIds = $Ids.Length $i = 0 Write-Host "Found $totalIds $tableName in the given range" foreach ($Id in $Ids) { $pct = $i / $totalIds * 100 Write-Progress -Activity "Processing $Id" -Status "$pct% $Id" -PercentComplete $pct $i += 1 $finalUrl = $url -f $Id try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8' -Headers $global:authorizationToken -UseBasicParsing if ($fieldsToShow.Length -ne 0) { $filteredResponse = $lampMethodResponse | Select-Object -Property $fieldsToShow $output = "" foreach ($fieldToShow in $fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } Write-Host "[$familyId] $output" } else { $filteredResponse = $lampMethodResponse | Select-Object -Property * Write-Host "[$familyId]" $filteredResponse } } catch { Write-Host "Invocation failed: $_" -ForegroundColor Red return 1 } } } function Call-MethodForSingleId { param($Id, $tableName, $url, $methodType, $fieldsToShow) $finalUrl = $url -f $Id try { $lampMethodResponse = Invoke-RestMethod -Uri $finalUrl -Method $methodType -ContentType 'application/json; charset=utf-8' -Headers $global:authorizationToken -UseBasicParsing if ($fieldsToShow.Length -ne 0) { $filteredResponse = $lampMethodResponse | Select-Object -Property $fieldsToShow $output = "" foreach ($fieldToShow in $fieldsToShow) { $value = $filteredResponse.$fieldToShow $output += " $value" } Write-Output "[$Id] $output" } else { $filteredResponse = $lampMethodResponse | Select-Object -Property * Write-Output "[$Id] $filteredResponse" } } catch { Write-Error "Invocation failed: $_" return 1 } } function Call-MethodForIdsParallel { param($Ids, $tableName, $url, $methodType, $fieldsToShow) $totalIds = $Ids.Length $jobs = @() foreach ($Id in $Ids) { $job = Start-Job -ScriptBlock $using:Call-MethodForSingleId -ArgumentList $Id, $tableName, $url, $methodType, $fieldsToShow $jobs += $job } while ($jobs.State -contains "Running") { Start-Sleep -Seconds 5 $completed = ($jobs | Where-Object { $_.State -eq "Completed" }).Count $pct = ($completed / $totalIds) * 100 Write-Progress -Activity "Processing IDs" -Status "$pct% Complete" -PercentComplete $pct } $results = $jobs | ForEach-Object { Receive-Job -Job $_ Remove-Job -Job $_ } $results } #EndRegion '.\Public\temp.ps1' 93 |