#Region '.\Private\Add-Emoji.ps1' 0 Function Add-Emoji($emoji, $longName, $shortName) { $properNoun = '' $hypernym = 0 if ($shortName -like ':flag-*') { $longName = [cultureinfo]::GetCultureInfo("en-US").TextInfo.ToTitleCase($longName) $properNoun = '1' $hypernym = 54389 } if ($longName -like 'woman*') { $assign = "5=F" $longName = $longName -replace "woman" } if ($longName -like 'man*') { $assign = "5=M" $longName = $longName -replace "man" } if ($longName -like '*:*') { $longName = $longName.Substring(0, $longName.IndexOf(':')) } if ($longName -like '*symbol') { $longName = $longName -replace "symbol" } if ($longName -like '*sign') { $longName = $longName -replace "sign" } if ($longName -like '*with*') { $longName = $longName.Substring(0, $longName.IndexOf('with')) } if ($longName -like 'squared*') { $longName = $longName -replace "squared" } if ($longName -like 'open*') { $longName = $longName -replace "open" } if ($longName -like 'closed*') { $longName = $longName -replace "closed"} if ($longName -like 'oncoming*') { $longName = $longName -replace "oncoming" } if ($longName -like 'large*') { $longName = $longName -replace "large" } if ($longName -like 'small*') { $longName = $longName -replace "small" } if ($longName -like '*orange*' ) { $longName = $longName.Substring($longName.IndexOf('orange')) } if ($longName -like '*yellow*') { $longName = $longName.Substring($longName.IndexOf('yellow')) } if ($longName -like '*green*') { $longName = $longName.Substring($longName.IndexOf('green')) } if ($longName -like '*purple*') { $longName = $longName.Substring($longName.IndexOf('purple')) } if ($longName -like '*brown*') { $longName = $longName.Substring($longName.IndexOf('brown')) } if ($longName -like '*face') { $longName = $longName -replace "face" } if ($longName -like '*hand') { $longName = $longName -replace "hand"} if ($longName -like '*ing') { $longName = $longName -replace "ing" $feature = 'VERB' } $longName = [System.Web.HttpUtility]::UrlEncode($longName.Trim()) $shortName = [System.Web.HttpUtility]::UrlEncode($shortName.Trim()) $emoji = $emoji.Trim() $encodedAssign = [System.Web.HttpUtility]::UrlEncode($assign) $uri = "$global:lampHost/import?lexeme=$emoji&reference=$longName&proper=$properNoun&hypernym=$hypernym&source=$source&orgId=$shortName&assign=$assign" #$uri $response = Invoke-WebRequest -Uri $uri -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing #$uri if ($assign) { $assign = "51=EXPL,$assign" } else { $assign = "51=EXPL" } $encodedAssign = [System.Web.HttpUtility]::UrlEncode($assign) $uri = "$global:lampHost/import?lexeme=$shortName&reference=$longName&proper=$properNoun&hypernym=$hypernym&source=$source&orgId=$shortName&assign=$encodedAssign" $uri $response = Invoke-WebRequest -Uri $uri -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing #$response } #EndRegion '.\Private\Add-Emoji.ps1' 58 #Region '.\Private\Add-Lexeme.ps1' 0 Function Add-Lexeme($familyId, $word) { Write-Host "Creating lexeme $word and linking it to the newly created family $familyId`r`n" -ForegroundColor Green # 1. Insert a new lexeme $newLexeme = @{ id=0 lemma=$word stem=$word } $lexemeJson = ConvertTo-Json -InputObject $newLexeme $response = Invoke-WebRequest -Uri "$global:lampHost/lexeme" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($lexemeJson)) -UseBasicParsing #'Lexeme insert response: ' + $response $parsedResponse = ConvertFrom-Json -InputObject $response if ($parsedResponse.success -and $parsedResponse.id) { $lexemeId = $parsedResponse.id # 2. Link to the family $whatever = Invoke-WebRequest -Uri "$global:lampHost/lexemeFamilies?lexeme=$lexemeId&families=$familyId" -Method POST -Headers $global:authorizationToken -UseBasicParsing # 3. Tag $newLexeme.id = $lexemeId $lexemeJson = ConvertTo-Json -InputObject $newLexeme #"Tagging: " + $lexemeJson $taggedLexeme = Invoke-RestMethod -Uri "$global:lampHost/tagLemma" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($lexemeJson)) -UseBasicParsing #'Tagged lexeme: ' + $taggedLexeme #$parsedtaggedLexeme = ConvertFrom-Json -InputObject $taggedLexeme # 4. Update from the tag results $newLexeme.grammar = $taggedLexeme.grammar $newLexeme.stem = $taggedLexeme.stem #"Updating: " + $lexemeJson $response = Invoke-RestMethod -Uri "$global:lampHost/lexeme?id=$lexemeId" -Method GET -Headers $global:authorizationToken -UseBasicParsing #'Updating lexeme: ' + $response $newLexeme.requestId = $response.requestId $lexemeJson = ConvertTo-Json -InputObject $newLexeme $response = Invoke-RestMethod -Uri "$global:lampHost/lexeme" -Method PUT -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($lexemeJson)) -UseBasicParsing #$response } } #EndRegion '.\Private\Add-Lexeme.ps1' 38 #Region '.\Private\Convert-UposToPenn.ps1' 0 function Convert-UposToPenn{ Param( [Parameter(Mandatory = $true, HelpMessage="Comma sepereated string of tags")][System.Object[]]$tags ) $UniversalToPenn = @{ "ADJ" = "JJ" "ADP" = "IN" "ADV" = "RB" "CONJ" = "CC" "DET" = "DT" "NOUN" = "NN" "NUM" = "CD" "PRT" = "RP" "PRON" = "PRP" "VERB" = "VB" "." = "." "X" = "NN" "PART" = "RP" "SCONJ" = "IN" "V" = "VB" "VM" = "VB" "INTJ" = "UH" } $output = "" $tags = $tags.Split(",") foreach ($tag in $tags) { $tag=$tag.ToString().Trim() if ($UniversalToPenn.ContainsKey($tag)) { $output += $UniversalToPenn[$tag] + "," } else { $output += $tag + "," } } # Write-Host $output return $output.TrimEnd(",") } # $tags = "NOUN,NOUN,VERB,PUNCT" # Convert-UposToPenn -tags $tags #EndRegion '.\Private\Convert-UposToPenn.ps1' 40 #Region '.\Private\Make-OneRowOfCsv.ps1' 0 function Make-OneRowOfCsv{ Param( [Parameter(Mandatory=$true, HelpMessage="Family Id ")][string]$familyId, [Parameter(Mandatory=$true, HelpMessage="Suggested Expression ")][string]$suggestedExpression ) $response=Invoke-WebRequest -Uri "$global:lampHost/knowledgeGraph?type=id&arg=$familyId&basic=true" -Method GET -Headers $global:authorizationToken -UseBasicParsing if (-not ($response.Content -eq "[]")){ $response = ConvertFrom-Json $response.Content $description = $response.representativeLemma $definition = $response.definition } else { $description = "<family not found>" $definition = "<family not found>" } $csvObject = [PSCustomObject]@{ "description" = $description "definition" = $definition "familyId" = $familyId "suggested_expression" = $suggestedExpression } return $csvObject } function Remove-Punctuations { param( [string]$rawString ) $result ="" for ($i = 0; $i -lt $rawString.Length; $i++) { $char = $rawString[$i] if (([System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($char) -eq "UppercaseLetter") -or ([System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($char) -eq "LowercaseLetter") -or ([System.Globalization.CharUnicodeInfo]::GetUnicodeCategory($char) -eq "OtherLetter")) { $result = $rawString.Substring($i) break } } # $result = $rawString -Replace '^[^a-zA-Z0-9]+', '' # $result = $result + $rawString.Substring($result.Length) $result = $result -Replace '[?|!|.|…]','' # $result = $result -replace "^[^a-zA-Z]*", "" $result = $result.Trim() $result = $result.Substring(0,1).ToLower() + $result.Substring(1) return $result } function ExtractRomanianTags($row) { # Split the English and Romanian sentences into arrays of words $enWords = "i sent you a letter last week" $roWords = "V-am trimis o scrisoare săptămână trecută È™i vă trimit încă una azi." # Split the English and Romanian tags into arrays $enTags = "PRP,VBD,PRP,DT,NN" $roTags = "VBP,VBN,DT,NN,NN,VBN,CC,PRP,VBP,RB,DT,NN.,." # Find the index of the first English tag in the list $startIndex = $enTags.IndexOf("PRP") # Loop through each English tag and find the similar Romanian tag $result = @() for ($i = $startIndex; $i -lt $enTags.Count; $i++) { $enTag = $enTags[$i] $enWord = $enWords[$i] # Find the corresponding Romanian word and tag by searching for the same word in the Romanian sentence $roWordIndex = [array]::IndexOf($roWords, $enWord) $roTag = $roTags[$roWordIndex] # If the Romanian tag is not found, try to find a similar tag based on the English tag if (!$roTag) { switch ($enTag) { "PRP" { $roTag = "PRP" } "VBD" { $roTag = "VBN" } "DT" { $roTag = "DT" } "NN" { $roTag = "NN" } "JJ" { $roTag = "JJ" } "CC" { $roTag = "CC" } "MD" { $roTag = "MD" } "VB" { $roTag = "VB" } "CD" { $roTag = "CD" } "JJR" { $roTag = "JJR" } } } # Add the corresponding Romanian tag and word to the result array $result += "$roTag" } # Return the result array return $result } $row = "" $romanianTags = ExtractRomanianTags $row Write-Output $romanianTags # $englishText = "i sent you a letter last week" # $romanianText = "V-am trimis o scrisoare săptămână trecută È™i vă trimit încă una azi." # $englishTags = "PRP,VBD,PRP,DT,NN" # $romanianTags = "VBP,VBN,DT,NN,NN,VBN,CC,PRP,VBP,RB,DT,NN.,." # FindTagPattern $englishTags $romanianText $romanianTags #EndRegion '.\Private\tagsToPattern.ps1' 128 #Region '.\Public\Check-EnglishTranslation.ps1' 0 function Check-EnglishTranslation { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Path to csv with transated column : ")][String] $inputCsv, [Parameter(Mandatory = $true, HelpMessage="Path to output csv file : ")][String]$outputCsv, [Parameter(Mandatory = $true, HelpMessage="ip for Tisane web service : ")][String]$tisaneWebService, [Parameter(Mandatory = $false, HelpMessage="iso code of traget language from where to verify families, default is en")][String]$language="en" ) Login-Lamp Set-LampLanguage -languageId 7 $df = Import-Csv $inputCsv -Encoding UTF8 $csvObjects = @() $i=0 foreach ($row in $df){ $pct = $i/ $df.Length *100 Write-Progress -Activity "Checking Lexeme [$lexemeId]" -Status "$pct%" -PercentComplete $pct $i ++ $lexemeId = $row.lexemeID $familyId = $row.familyId $lexeme = $row.word $translatedSentence = $row.englishTranslation.Trim() $language = $language if ([string]::IsNullOrEmpty($translatedSentence)){ continue } $definitionResponse=Invoke-RestMethod -Uri "$global:lampHost/knowledgeGraph?type=id&arg=$familyId&basic=true" -Method GET -Headers $global:authorizationToken -UseBasicParsing $definitionResponse = $definitionResponse.definition $body = @{ "language" = $language "content" = $translatedSentence "settings" = @{"words" = $true "deteministic" = $false} } | ConvertTo-Json $response = Invoke-RestMethod -Uri "$tisaneWebService/parse" -Method POST -ContentType 'application/json' -Body $body -UseBasicParsing $familyList = $response.sentence_list.words.family Write-Host $familyList if (-not($familyList.Contains([int]$familyId))) { Write-Host "bad lexeme : $lexemeId" -ForegroundColor Green $csvObject = [PSCustomObject]@{ "word" = $lexeme "lexemeID" = $lexemeId "familyId" = $familyId "definition" = $definitionResponse } # Write-Host $csvObject $csvObjects += $csvObject } } # Write-Host $csvObjects $csvObjects | Export-Csv $outputCsv -NoTypeInformation -Encoding UTF8 } # Check-EnglishTranslation -inputCsv "tsample.csv" -outputCsv "otsample.csv" -tisaneWebService #EndRegion '.\Public\Check-EnglishTranslation.ps1' 56 #Region '.\Public\Copy-InflectionPatterns.ps1' 0 ## ============================================================================= ## ## Copies a set of inflection patterns, replacing advanced criteria and / or strings ## ## ============================================================================= function Copy-InflectionPatterns { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Language ID: ")][int] $languageId, [Parameter(Mandatory = $true, HelpMessage="Range ID: ")][int] $range, [Parameter(Mandatory = $false, HelpMessage="Advanced criteria to replace: ")][int[]] $oldCriteria, [Parameter(Mandatory = $false, HelpMessage="Advanced criteria to replace with: ")][int[]] $newCriteria, [Parameter(Mandatory = $false, HelpMessage="Replace strings: ")][String[]] $replaceRegexes, # an array of Tisane feature values [Parameter(Mandatory = $false, HelpMessage="Replace with: ")][String[]] $replaceWith, [Parameter(Mandatory = $false, HelpMessage="Old trigger: ")][int] $oldTrigger, [Parameter(Mandatory = $false, HelpMessage="New trigger: ")][int[]] $newTriggers ) if (-not($replaceRegexes -or $oldCriteria -or ($oldTrigger -gt 0 -and $newTriggers))) { Write-Host "Nothing to replace. Either supply an array of regexes/strings to replace (under replaceRegexes parameter, array of strings) or an array of criteria (under oldCriteria parameter, array of integers), or new triggers" -ForegroundColor Red Break } if ($replaceRegexes -and (-not($replaceWith) -or $replaceRegexes.Length -ne $replaceWith.Length)) { Write-Host "replaceRegexes and replaceWith must have the same number of elements" -ForegroundColor Red Break } if ($oldCriteria -and (-not($newCriteria) -or $oldCriteria.Length -ne $newCriteria.Length)) { Write-Host "oldCriteria and newCriteria must have the same number of elements" -ForegroundColor Red Break } Login-Lamp Set-LampLanguage -languageId $language $affixList = Invoke-RestMethod -Uri "$global:lampHost/affixList?tagging=false&arg=$range&type=range" -Headers $global:authorizationToken -UseBasicParsing ## create an array of inflection IDs to retrieve and copy $affixList | ForEach-Object { $_.inflectionRules | ForEach-Object { $currentInflectionPatternId = $_.inflectionRuleId #"Passsing $currentInflectionPatternId" $inflectionPattern = Invoke-RestMethod -Uri "$global:lampHost/inflectionRule?id=$currentInflectionPatternId" -Headers $global:authorizationToken -UseBasicParsing $changed = $false $targetTriggers = @() $targetTriggers += $inflectionPattern.triggerId if ($oldTrigger -eq $inflectionPattern.triggerId) { $inflectionPattern.triggerId = 0 # $newTrigger $targetTriggers = $newTriggers $changed = $true } $originalPatternId = $inflectionPattern.id $inflectionPattern.affixes | ForEach-Object { if ($oldCriteria) { $indexInLabelArray = [array]::IndexOf($oldCriteria, $_.advancedCriteriaId) #"Index: $indexInLabelArray" #"Advanced criteria: " + $_.advancedCriteriaId if ($indexInLabelArray -gt -1) { $_.advancedCriteriaId = $newCriteria[$indexInLabelArray] $changedAdvancedCriteriaId = $_.advancedCriteriaId $changed = $true Write-Host "New advanced criteria in inflection pattern $originalPatternId : $changedAdvancedCriteriaId" -ForegroundColor Green } } if ($replaceRegexes) { For ($i = 0; $i -lt $replaceRegexes.length; $i++) { $currentRegex = $replaceRegexes[$i] $currentReplacement = $replaceWith[$i] $affixText = $_.affix #"Replacing $currentRegex with $currentReplacement in $affixText" $_.affix = $affixText -replace $currentRegex, $currentReplacement if ($affixText -ne $_.affix) { $affixText = $_.affix $changed = $true Write-Host "New affix text in inflection pattern $originalPatternId : $affixText" -ForegroundColor Green } } } } if ($changed) { "New pattern will be created based on pattern $originalPatternId" $inflectionPattern.id = 0 ## when adding new inflection patterns, it has to be set to 0 $targetTriggers | ForEach-Object { $inflectionPattern.triggerId = $_ if ($newTriggers) { $currentTrigger = $_ Write-Host "Copying to trigger $currentTrigger" -ForegroundColor Green $inflectionPattern } ##$inflectionPattern | Get-Member $outJson = $inflectionPattern | ConvertTo-Json -Depth 30 #$outJson ##Read-Host -Prompt "Press Enter to continue" ## Non-ASCII strings have issues. Workaround from here: https://stackoverflow.com/questions/15290185/invoke-webrequest-issue-with-special-characters-in-json $response = Invoke-WebRequest -Uri "$global:lampHost/inflectionRule" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($outJson)) -UseBasicParsing } } } } } #EndRegion '.\Public\Copy-InflectionPatterns.ps1' 100 #Region '.\Public\Edits-2CSV.ps1' 0 function Edits-2CSV { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $csv, [Parameter(Mandatory = $true, HelpMessage="Target user")][String] $targetUser ) Login-Lamp Set-LampLanguage -languageId $languageId $edits = Invoke-RestMethod -Uri "$global:lampHost/editList?arg=1176&type=range&user=$targetUser" -Method GET -Headers $global:authorizationToken -UseBasicParsing $outContent = 'Transaction ID, Action, Data Table, Date/time, Lemma' $edits | ForEach-Object { $lemma = '' if ($_.requestBody) { $parsedRequestBody = ConvertFrom-Json -InputObject $_.requestBody if ($parsedRequestBody.lemma) { $lemma = $parsedRequestBody.lemma } } $line = "" + $_.transaction + "," + $_.action + "," + $_.table + "," + $_.datetime + ", $lemma" $outContent = $outContent + [Environment]::NewLine + $line } $outContent | Set-Content $csv -encoding UTF8 [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Edits-2CSV.ps1' 31 #Region '.\Public\Find-RedundantInflections.ps1' 0 function Find-RedundantInflections{[CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Lamp Language Id")][String] $languageId, [Parameter(Mandatory = $true, HelpMessage="Array of lexeme Id's (Integer)")][Array] $lexeme ) Write-Host -ForegroundColor Yellow "WARNING: This process will take long time to complete" Login-Lamp Set-LampLanguage -languageId $languageId $first,$lexemes=$lexeme $firstResponse = Invoke-WebRequest -Uri "$global:lampHost/unusedForms?lexeme=$first" -Method GET -Headers $global:authorizationToken -UseBasicParsing | ConvertFrom-Json # $firstResponse = Get-Content "$first.json" | ConvertFrom-Json [System.Collections.ArrayList]$firstIds = @() # $firstResponse $propId="" $spawnId="" foreach ($generatedByObj in $firstResponse.generatedBy){ if ($generatedByObj) { if ($generatedByObj -match "propagatedId"){ $propId = $generatedByObj.propagatedId.ToString() } else { $propId = "" } if ($generatedByObj -match "spawnedFrom") { $spawnId=$generatedByObj.spawnedFrom.ToString() } else { $spawnId = "" } $stringId = $generatedByObj.id.ToString() + $propId + $spawnId #"String ID: $stringId" [void]$firstIds.Add($stringId) } } #"So far: $firstIds" $propId="" $spawnId="" [System.Collections.ArrayList]$unMatchedIds = @() [System.Collections.ArrayList]$unMatchedStringIds = @() foreach ($lexemeId in $lexemes) { $i = 0 $pct = $i / $lexemes.length * 100 Write-Progress -Activity "Processing lexeme Id" -Status "$pct% $lexemeId" -PercentComplete $pct # $Response = Get-Content "$lexemeId.json" | ConvertFrom-Json $Response = Invoke-WebRequest -Uri "$global:lampHost/unusedForms?lexeme=$lexemeId" -Method GET -Headers $global:authorizationToken -UseBasicParsing | ConvertFrom-Json # $Response $responeIds=@() if ($Response.generatedBy -match "id"){ # $responeIds += $Response.generatedBy.id foreach ($generatedByObj in $Response.generatedBy){ if ($generatedByObj) { if ($generatedByObj -match "propagatedId"){ $propId=$generatedByObj.propagatedId.ToString() } else { $propId = "" } if ($generatedByObj -match "spawnedFrom"){ $spawnId=$generatedByObj.spawnedFrom.ToString() } else { $spawnId = "" } $stringId = $generatedByObj.id.ToString() + $propId + $spawnId if ($stringId -in $firstIds){ [void]$unMatchedIds.Add($generatedByObj.id) [void]$unMatchedStringIds.Add($stringId) } } } $firstIds = @() $firstIds = $firstIds+$unMatchedStringIds $unMatchedStringIds.Clear() $i += 1 } else { Write-Host "No inflection patterns (unused in all the provided lexemes) found" exit } } if ($unMatchedIds -and $unMatchedIds.Count -gt 0) { Write-Host "Following inflection patterns were Unmatched in all the lexemes given" # Write-Host($firstIds -join ", ") -ForegroundColor Yellow $unMatchedIds_ = $unMatchedIds | select -Unique Write-Host($unMatchedIds_ -join ", ") -ForegroundColor Yellow } else { Write-Host "No inflection patterns (unused in all the provided lexemes) found" } } ############################################################################################################################################################### # # # END OF SCRIPT # ############################################################################################################################################################### # try { # foreach ($unMatchedId in $firstIds) { # # Write-Host $unMatchedId -ForegroundColor Yellow # ($firstResponse.generatedBy.Where({$_.id -eq $unMatchedId})) # } # } # catch{ # $firstResponse.generatedBy # } # ($rep.generatedBy | Get-Member id).Definition -eq ("int id=$id") # ($rep.generatedBy.Where({$_.id -eq 32692})) # @(32497, 32692, 32499, 31749, 31778, 31769, 31772) #hindi lexeme # 3414989 # 3266941 # 3415040 #EndRegion '.\Public\Find-RedundantInflections.ps1' 115 #Region '.\Public\Generate-FormulaicExpressionSynonyms.ps1' 0 function Generate-FormulaicExpressionSynonyms{ [CmdletBinding()] Param( [Parameter(Mandatory=$true, HelpMessage="English language name : ")][string]$language, [Parameter(Mandatory=$true, HelpMessage="Path to CSV file to process")][string]$inputFile, [Parameter(Mandatory=$false, HelpMessage="Open ai api key")][string]$apiKey, [Parameter(Mandatory=$true, HelpMessage="Path to CSV file to save the result (example 'C:\\tisane\output.csv')")][string]$outputFile, [Parameter(Mandatory=$false, HelpMessage="end of the prompt for query. Default : 'in <language>'")][string]$endPrompt, [Parameter(Mandatory=$false, HelpMessage="start of the prompt for query. Default : 'similar phrases to'")][string]$startPrompt = "similar phrases to", [Parameter(Mandatory=$false, HelpMessage="Number to input phrase sample to use when generating similar phrases. Default : 10")][decimal]$inputSample = 10, [Parameter(Mandatory=$false, HelpMessage="The model to use for the API call. default text-davinci-003")][string]$model = "text-davinci-003", [Parameter(Mandatory=$false, HelpMessage="The value of the top_p parameter. Defaults to 1.")][decimal]$topP = 1, [Parameter(Mandatory=$false, HelpMessage="The value of the temperature to use for the API call. Defaults to 0.5.")][decimal]$temperature = 0.5, [Parameter(Mandatory=$false, HelpMessage="The maximum number of tokens to generate. Defaults to 128.")][int]$maxTokens = 128, [Parameter(Mandatory=$false, HelpMessage="The value of the frequency_penalty parameter. Defaults to 0.")][decimal]$frequencyPenalty = 0, [Parameter(Mandatory=$false, HelpMessage="The value of the presence_penalty parameter. Defaults to 0.")][decimal]$presencePenalty = 0 ) Login-Lamp if (-not [string]::IsNullOrEmpty($apiKey)){ Save-LampSetting -settingName 'OpenAiApiKey' -settingValue $apiKey }else{ $apiKey = Get-LampSetting -settingName 'OpenAiApiKey' -defaultValue $apiKey } $englishLanguageName = $language if (-not $endPrompt){ $endPrompt = "in $englishLanguageName,1 per line" } $csvObjects = @() $df = Import-Csv $inputFile -Encoding UTF8 $groupedDf = $df | Group-Object -Property FamilyId $groupedDf | ForEach-Object -Begin {$index = 0} -Process { $mainLemma = $groupedDf[$index].Group.MainLemma if ($mainLemma.GetType().Name -eq "String"){ $samplePhrases = $mainLemma }else{ $samplePhrases = $mainLemma[0..$inputSample] } $familyId = $groupedDf[$index].Name $samplePhrases = $samplePhrases -join "','" $prompt = "{0} '{1}' {2}" -f $startPrompt,$samplePhrases,$endPrompt Write-Host "Generating similar phrases to [$familyId] $samplePhrases" -ForegroundColor Green $response = Invoke-OpenAiApiCall -prompt $prompt -model $model -apiKey $apiKey -temperature $temperature -maxTokens $maxTokens -topP $topP -frequencyPenalty $frequencyPenalty -presencePenalty $presencePenalty $response_1 = $response.Content | ConvertFrom-Json $response = [System.Text.Encoding]::UTF8.GetString($response_1.choices[0].text[0..10000]) $FESynonyms = $response -split "\n" foreach ($FESynonym in $FESynonyms){ if (-not ($FESynonym.Length -gt 1)){ continue } Write-Host $FESynonym $cleanSynonym = Remove-Punctuations -rawString $FESynonym if ($groupedDf[$index].Group.MainLemma.Contains($cleanSynonym)){ continue } $csvObject = Make-OneRowOfCsv -familyId $familyId -suggestedExpression $cleanSynonym $csvObjects += $csvObject } $pct = $index / $groupedDf.Length *100 Write-Progress -Activity "Generating similar phrases to [$familyId] $samplePhrases " -Status "$pct%" -PercentComplete $pct $index ++ } $csvObjects | Export-Csv $outputFile -NoTypeInformation -Encoding UTF8 } $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding Generate-FormulaicExpressionSynonyms -language "russian" -inputFile "TisaneLampClient\RussianFormulaicExpressions.csv" -outputFile "TisaneLampClient\aug_RussianFormulaicExpressions.csv" #EndRegion '.\Public\Generate-FormulaicExpressionSynonyms.ps1' 71 #Region '.\Public\Generate-PhrasesWithLexemes.ps1' 0 function Generate-PhrasesWithLexemes { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Language Id: ")][int]$languageId, [Parameter(Mandatory = $true, HelpMessage="Path to csv file : ")][String]$inputCsv, [Parameter(Mandatory = $true, HelpMessage="Path to output csv file : ")][String]$outputCsv, [Parameter(Mandatory = $true, HelpMessage="Path to corpus of the same language : ")][String]$corpusPath, [Parameter(Mandatory = $false, HelpMessage="string of punctuations : ")][String]$punctuations = ".!?" ) $punctuations = $punctuations -split '' $df = Import-Csv $inputCsv -Encoding UTF8 $corpusDf = Import-Csv -Path $corpusPath -Delimiter "`t" -Header "originalSentenceId","originalSentence","translatedSentenceId","translatedSentence" $csvObjects = @() $i = 0 foreach ($row in $df) { $lexeme = $row.word $pct = $i/$df.length * 100 Write-Progress -Activity "Generating sentences for" -Status "$pct% $lexeme" -PercentComplete $pct $i +=1 $sentences = $corpusDf | Where-Object {$_.originalSentence -like "*$lexeme*"} #removing rows with more then one terminal punctuation $sentences = $sentences | Where-Object {$punctuations | % {([regex]::Matches($_.sentence, [regex]::Escape($_)).Count -le 1)} } $originalSentences = $sentences.originalSentence -join "," $translatedSentences = $sentences.translatedSentence -join "," $csvObject = [PSCustomObject]@{ "word" = $lexeme "lexemeID" = $row.lexemeID "familyId" = $row.familyId "originalSentence" = $originalSentences "englishTranslation" = $translatedSentences } $csvObjects += $csvObject } $csvObjects | Export-Csv $outputCsv -NoTypeInformation -Encoding UTF8 } # Generate-PhrasesWithLexemes -languageId 36 -inputCsv "sample.csv" -outputCsv "tsample.csv" -corpusPath "hindi-english.tsv" #EndRegion '.\Public\Generate-PhrasesWithLexemes.ps1' 42 #Region '.\Public\Get-SmallPhrase.ps1' 0 # function Make-PhrasalPatterns{ # Param( # [Parameter(Mandatory = $true, HelpMessage="Language Id: ")][int]$languageId, # [Parameter(Mandatory = $true, HelpMessage="Path to input csv file : ")][String]$InputCsv, # [Parameter(Mandatory = $true, HelpMessage="Path to output csv file : ")][String]$outputCsv, # [Parameter(Mandatory = $true, HelpMessage="ip for Tisane web service : ")][String]$tisaneWebService, # [Parameter(Mandatory = $true, HelpMessage="Path to csv file to store phrases which could not be processed : ")][String]$errorOutputCsv # ) # $df = Import-Csv $InputCsv -Encoding UTF8 # foreach ($row in $df){ # $translatedSentence = $row.translatedSentence # $translatedTags = $row.trnaslatedTags # $originalSentence = $row.originalSentence # $originalTags = $row.originalTags # $body = @{ # "language" = $language # "content" = $translatedSentence # "settings" = @{"parses" = $true # "debug" = $true # "feature_standard" = "penn" # } # } | ConvertTo-Json # $translatedSentenceParse = Invoke-RestMethod -Uri "$tisaneWebService/parse" -Method POST -ContentType 'application/json' -Body $body -UseBasicParsing # } # } function Get-SmallPhrase { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Language Id: ")][int]$languageId, [Parameter(Mandatory = $true, HelpMessage="Path to output csv file : ")][String]$outputCsv, [Parameter(Mandatory = $true, HelpMessage="Path to corpus of the same language : ")][String]$corpusPath, [Parameter(Mandatory = $true, HelpMessage="ip for Tisane web service : ")][String]$tisaneWebService ) # Login-Lamp $language = "en" # if (-not [string]::IsNullOrEmpty($apiKey)){ # Save-LampSetting -settingName 'OpenAiApiKey' -settingValue $apiKey # }else{ # $apiKey = Get-LampSetting -settingName 'OpenAiApiKey' -defaultValue $apiKey # } $headers=@{} $headers.Add("Authorization", "43f65b4c-38bc-4786-bf10-2073e14eff13") $dfAll = Import-Csv -Path $corpusPath -Delimiter "`t" -Header "originalSentenceId","originalSentence","translatedSentenceId","translatedSentence" -Encoding UTF8 #sorting the df by length of sentences #$df = $df | Sort-Object {$_.originalSentence.Length} #selecting only those sentences which have less than 4 words in the original sentence $df = $dfAll | Where-Object {$_.originalSentence.Split().Length -in 2..3} $csvObjects = @() $erroredCsvObjects = @() $i = 0 foreach ($row in $df) { $originalSentence = $row.originalSentence $translatedSentence = $row.translatedSentence #Removing termianl punctuations # $translatedSentence = Remove-Punctuations -rawString $translatedSentence $pct = $i/$df.length * 100 Write-Progress -Activity "Getting Pos tags for : " -Status "$pct% $originalSentence" -PercentComplete $pct $i +=1 $body = @{ "language" = $language "content" = $translatedSentence "settings" = @{"parses" = $true "debug" = $true "feature_standard" = "penn" } } | ConvertTo-Json $translatedSentenceParse = Invoke-RestMethod -Uri "$tisaneWebService/parse" -Method POST -ContentType 'application/json' -Body $body -UseBasicParsing } } #EndRegion '.\Public\Get-SmallPhrase.ps1' 80 #Region '.\Public\Import-CustomJson.ps1' 0 function Import-CustomJson { [CmdletBinding()] Param( # [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="LaMP user: ")][String] $user, # [Parameter(Mandatory = $true, HelpMessage="LaMP password: ")][String] $password, [Parameter(Mandatory = $true, HelpMessage="Filename including path: ")][String] $path, [Parameter(Mandatory = $true, HelpMessage="Corpora ID: ")][int] $corpora ) $fileSize = (Get-Item $path).Length $PROGRESS_UPDATE_INTERVAL = 10 $lastReported = 0 $processed = 0 $TISANE_PATH = 'C:\Tisane\TestConsole\' $config_path = $TISANE_PATH + "Tisane.TestConsole.exe.Config" [System.AppDomain]::CurrentDomain.SetData("APP_CONFIG_FILE", $config_path) # assign the configuration file [Reflection.Assembly]::LoadFrom($tisane_path + "Tisane.Runtime.dll") # load the type Login-Lamp Set-LampLanguage -languageId $languageId $r = [IO.File]::OpenText($path) $startedAt = Get-Date "Everything is loaded. Starting at " + $startedAt.ToString('T') $outContent = '' $detected = 0 $control = 'not yet' $totalPostCount = 0 while ($r.Peek() -ge 0) { $line = $r.ReadLine() #$processed += $line.Length # Process $line here... $inJson = ConvertFrom-Json -InputObject $line $objectCount = $inJson.length $inJson | ForEach-Object { $totalPostCount += 1 $processed += $line.Length / $objectCount if ($processed - $lastReported -gt $PROGRESS_UPDATE_INTERVAL) { $pct = ($processed / $fileSize) * 100 $timeTaken = ((Get-Date) - $startedAt).TotalMinutes Write-Progress -Activity "Importing $path" -Status "$pct% complete" -PercentComplete $pct $lastReported = $processed } $content = [System.Web.HttpUtility]::UrlEncode($_.body) $response = Invoke-WebRequest -Uri "$global:lampHost/testfragment?corpora=$corpora&fragment=$content&critical=false&test=true" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing #$response } } #"Control: $control" $r.Dispose() # save content to a new file $finishedAt = Get-Date $timeTaken = ($finishedAt - $startedAt).TotalMinutes "Done at $finishedAt after $timeTaken minutes" } #EndRegion '.\Public\Import-CustomJson.ps1' 62 #Region '.\Public\Import-CX.ps1' 0 # ======================================================================================= # Imports lexemes from XML files # ======================================================================================= # script parameters function Import-CX { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $cx, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $true, HelpMessage="Feature value")][String] $feature, [Parameter(Mandatory = $false, HelpMessage="Feature ID")][String] $featureId, [Parameter(Mandatory = $false, HelpMessage="Source")][String] $source, [Parameter(Mandatory = $false, HelpMessage="Is proper noun")][Boolean] $proper ) # read the content of the XML and parse it [xml]$dictX = Get-Content $cx if (-not($featureId)) { # if the feature ID was not supplied, use part of speech $featureId = 1 } Login-Lamp Set-LampLanguage -languageId $languageId # process XML entries $dictX.list.row | ForEach-Object { $word = $_.native $referenceWord = $_.english if ($word -and $referenceWord) { # we have the word and its translation to English. Call the POST /import method "Importing $word -> $referenceWord" $response = Invoke-WebRequest -Uri "$global:lampHost/import?lexeme=$word&reference=$referenceWord&proper=$proper&hypernym=0&featureList=$featureId&featureValue=$feature&source=$source&orgId=$referenceWord" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing #$response } } # clean up RAM [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-CX.ps1' 43 #Region '.\Public\Import-Emoji.ps1' 0 function Import-Emoji{ [CmdletBinding()] Param( # [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="LaMP user: ")][String] $user, # [Parameter(Mandatory = $true, HelpMessage="LaMP password: ")][String] $password, [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $csv, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $false, HelpMessage="Feature ID")][String] $featureId, [Parameter(Mandatory = $false, HelpMessage="Source")][String] $source ) # BEGIN fix for Powershell bug: in some cases, the configuration files aren't read properly # Add-Type -AssemblyName System.Configuration # [Configuration.ConfigurationManager].GetField("s_initState", "NonPublic, Static").SetValue($null, 0) # [Configuration.ConfigurationManager].GetField("s_configSystem", "NonPublic, Static").SetValue($null, $null) # ([Configuration.ConfigurationManager].Assembly.GetTypes() | where {$_.FullName -eq "System.Configuration.ClientConfigPaths"}).GetField("s_current", "NonPublic, Static").SetValue($null, $null) # [Configuration.ConfigurationManager]::ConnectionStrings[0].Name # END fix for Powershell bug: in some cases, the configuration files aren't read properly ## overcome the HTTPS error: https://stackoverflow.com/questions/11696944/powershell-v3-invoke-webrequest-https-error # If (-not ("TrustAllCertsPolicy" -as [type])) { # Add-Type @" # using System.Net; # using System.Web; # using System.Security.Cryptography.X509Certificates; # public class TrustAllCertsPolicy : ICertificatePolicy { # public bool CheckValidationResult( # ServicePoint srvPoint, X509Certificate certificate, # WebRequest request, int certificateProblem) { # return true; # } # } # "@ # } # [System.Net.ServicePointManager]::CertificatePolicy = New-Object TrustAllCertsPolicy # Write-Progress -Activity "Authenticating" -Status "Normally takes a couple of seconds" # $productionHost = '' # #$productionHost = 'https://lampws.tisane.ai:443' # $authenticationBody = '["' + $user + '", "' + $password + '"]' # $productionAuthentication = Invoke-WebRequest -Uri "$productionHost/authenticate" -Method POST -Body $authenticationBody # $inJson = ConvertFrom-Json -InputObject $productionAuthentication.Content # $authorizationToken = @{} # $authorizationToken.Add('Authorization', $inJson.token) # $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding Login-Lamp $fileLines = Get-Content $csv Set-LampLanguage -languageId $languageId # $whatever = Invoke-WebRequest -Uri "$productionHost/setLanguage?language=$languageId" -Method POST -Headers $authorizationToken -Body ' ' $fileLines | ForEach-Object { $emoji, $longName, $shortName = $_.Split(",") AddEmoji -emoji $emoji -longName $longName -shortName $shortName } [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-Emoji.ps1' 65 #Region '.\Public\Import-ExtractionTestDataset.ps1' 0 function Import-ExtractionTestDataset { Param( [Parameter(Mandatory = $true, HelpMessage="Input file (UTF-8 plain text)")][String] $filename, [Parameter(Mandatory = $true, HelpMessage="Target corpora")][int] $corpora, [Parameter(Mandatory = $true, HelpMessage="Extraction type (abuse / entity / sentiment_expressions)")][String] $extractionType, [Parameter(Mandatory = $true, HelpMessage="Extraction value (positive or negative for sentiment, entity or abuse type for entities and abuse)")][String] $extractionValue, [Parameter(HelpMessage="Auxiliary label (optional)")][String] $auxLabel, [Parameter(HelpMessage="Tags (optional)")][String] $tags, [Parameter(HelpMessage="Test daily (optional)")][bool] $testDaily, [Parameter(Mandatory = $true, HelpMessage="Templates for the test fragments")][String[]] $templates ) $sMustTest = '' if ($testDaily) { $sMustTest = 'true' } else { $sMustTest = 'false' } #Checking templeats foreach ($template in $templates) { if (-not ($template -match "\{0\}")) { Write-Host "template [ $template ] does not contain {0}, recheck and run code again" -ForegroundColor RED Write-Host "Exiting" -ForegroundColor RED EXIT 1 } } Login-Lamp $lines = Get-Content $filename -Encoding UTF8 foreach ($line in $lines) { if (-not [string]::IsNullOrWhiteSpace($line)) { foreach ($template in $templates) { $fragment = $template -f $line $originalLineLength = $line.length $positionInTemplate = $fragment.IndexOf($line) Write-Host "Adding [ $fragment ] to LaMP" $fragmentIdResponse = Invoke-RestMethod -Uri "$global:lampHost/testfragment?corpora=$corpora&fragment=$fragment&test=$sMustTest" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing if (-not $fragmentIdResponse.success) { Write-Host "Could not add [ $fragment ] to gold standard" -ForegroundColor RED Write-Host $fragmentIdResponse.error -ForegroundColor RED continue } $fragmentId = $fragmentIdResponse.id Write-Host "Adding [ $fragment ] to gold standard records" $response = Invoke-WebRequest -Uri "$global:lampHost/gold?corpora=$corpora&fragment=$fragmentId&provider=1&type=information%20extraction&subtype=$extractionType&attribute=$extractionType&offset=$positionInTemplate&length=$originalLineLength&num&entityId&label=$extractionValue&aux=$auxLabel&tags=$tags" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } } } } #EndRegion '.\Public\Import-ExtractionTestDataset.ps1' 51 #Region '.\Public\Import-Jargon.ps1' 0 function Import-Jargon { Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $csv, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $false, HelpMessage="Source")][String] $source ) # BEGIN fix for Powershell bug: in some cases, the configuration files aren't read properly Add-Type -AssemblyName System.Configuration [Configuration.ConfigurationManager].GetField("s_initState", "NonPublic, Static").SetValue($null, 0) [Configuration.ConfigurationManager].GetField("s_configSystem", "NonPublic, Static").SetValue($null, $null) ([Configuration.ConfigurationManager].Assembly.GetTypes() | where {$_.FullName -eq "System.Configuration.ClientConfigPaths"}).GetField("s_current", "NonPublic, Static").SetValue($null, $null) [Configuration.ConfigurationManager]::ConnectionStrings[0].Name $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding $fileLines = Get-Content $csv Login-Lamp Set-LampLanguage -languageId $languageId $fileLines | ForEach-Object { $word,$familyId,$extraFeatures = $_.Split(",") if ($word -and $familyId) { $assign = $extraFeatures if ($assign) { $assign = [System.Web.HttpUtility]::UrlEncode("$assign,40=JAR") } else { $assign = [System.Web.HttpUtility]::UrlEncode("40=JAR") } "Importing $word -> Family $familyId" $response = Invoke-WebRequest -Uri "$global:lampHost/importFamilies?lexeme=$word&families=$familyId&behavior=complement&source=$source&orgId=$familyId&assign=$assign" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } } [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-Jargon.ps1' 43 #Region '.\Public\Import-LexemesFromWiktionary.ps1' 0 ## ============================================================================= ## ## This script's purpose is import new lexemes from Wiktionary ## ## ============================================================================= function Import-LexemesFromWiktionary{ [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Language code: ")][String] $language, [Parameter(Mandatory = $true, HelpMessage="Path: ")][String] $path, [Parameter(Mandatory = $true, HelpMessage="Part of speech: ")][String] $pos, [Parameter(Mandatory = $false, HelpMessage="Prune Feature List ID: ")][int] $pruneListId, [Parameter(Mandatory = $false, HelpMessage="Prune Feature Value: ")][String] $pruneValue, [Parameter(Mandatory = $false, HelpMessage="Feature List ID: ")][int] $listId, [Parameter(Mandatory = $false, HelpMessage="Wiktionary labels: ")][String[]] $labels, [Parameter(Mandatory = $false, HelpMessage="Feature values: ")][String[]] $values, # an array of Tisane feature values [Parameter(Mandatory = $true, HelpMessage="Wiktionary category: ")][String] $category, [Parameter(Mandatory = $false, HelpMessage="Complement existing lexemes: ")][bool] $complement, [Parameter(Mandatory = $false, HelpMessage="Regex to verify: ")][String] $validatingRegex ) class Feature { [string]$index [string]$value [string]$type Feature([string]$index, [string]$value, [string]$type) { $this.index = $index $this.value = $value $this.type = $type } } [Reflection.Assembly]::LoadFrom($path + "Tisane.Runtime.dll") Login-Lamp $languageJSON = Invoke-RestMethod -Uri "$global:lampHost/languages" -Method GET -UseBasicParsing $languageNamesToCodes = @{} $languageID = 0 $languageJSON | foreach { if ($_.ISOCode -eq $language) { $languageID = $_.id $languageName = $_.englishName $commaPos = $languageName.IndexOf(',') if ($commaPos -gt 0) { $languageName = $languageName.Substring(0, $commaPos) } $languageNamesToCodes.Add($languageName, $_.ISOCode) } } $languageNamesToCodes.Add('Norwegian BokmÃ¥l', 'no') $languageNamesToCodes.Add('Norwegian Nynorsk', 'no') $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding Set-LampLanguage -languageId $languageId $posFeatureList = '1' $tisanePOSValue = $pos.ToUpper() switch ($tisanePOSValue) { 'ADJECTIVE' { $tisanePOSValue = 'ADJ' } 'ADVERB' { $tisanePOSValue = 'ADV' } 'PREPOSITION' { $tisanePOSValue = 'PREP' } 'POSTPOSITION' { $tisanePOSValue = 'PREP' } 'CONJUNCTION' { $tisanePOSValue = 'CJ' } 'INTERJECTION' { $tisanePOSValue = 'INTJ' } 'PHRASE' { $tisanePOSValue = '' } 'PROVERB' { $tisanePOSValue = 'FORE' } 'IDIOM' { $tisanePOSValue = '' } 'NUMERAL' { $tisanePOSValue = 'NOUN' } 'PREPOSITIONAL PHRASE' { $tisanePOSValue = '' } 'PREPOSITIONAL_PHRASE' { $tisanePOSValue = '' } 'POSTPOSITIONAL PHRASE' { $tisanePOSValue = '' } 'POSTPOSITIONAL_PHRASE' { $tisanePOSValue = '' } } if ($tisanePOSValue.Length -lt 1) { $posFeatureList = '0' } $tagsFound = @() $articleCount = 0 $sComplement = '0' if ($complement) { $sComplement = '1' } do { $wikidataUrl = "https://en.wiktionary.org/w/api.php?action=query&generator=categorymembers&format=json&gcmtitle=Category:$category&prop=pageprops&gcmlimit=500&gcmcontinue=$bookmark" "Loading entries from $wikidataUrl" $wikidataResponse = Invoke-WebRequest -Uri $wikidataUrl -Method GET -UseBasicParsing $wikidataResponse = $wikidataResponse.Content # -replace '(?<=pages["]:{["])[^"]+', 'results' $listOfInstances = ConvertFrom-Json -InputObject $wikidataResponse if ($listOfInstances.continue) { $bookmark = $listOfInstances.continue.gcmcontinue } else { $bookmark = $null } #"Bookmark: $bookmark" $listOfInstances.query.pages.PSObject.Properties | foreach { $word = $_.Value.title #$word if ($word -and (-not ($validatingRegex) -or $word -match $validatingRegex)) { $articleCount += 1 $pageId = $_.Value.pageid $normalizedWord = $word -replace ' ', '_' #"PageId=$pageId word=$normalizedWord" try { $wiktionaryParser = New-Object Tisane.Helper.EnglishWiktionaryParser -ArgumentList ($normalizedWord, $languageNamesToCodes) $fullDoc = $wiktionaryParser.ToJson() $articleJSON = (($fullDoc | Where Key -eq $language).Value | Where Key -eq $pos).Value if ($articleJSON) { if ($language -like 'zh-*') { #$sFullDoc = $fullDoc.ToString() $orgWord = $word switch ($language) { 'zh-CN' { $word = $fullDoc['simplified'].ToString() -replace '"', '' } 'zh-TW' { $word = $fullDoc['traditional'].ToString() -replace '"', '' } } if ($word.Length -gt $orgWord.Length) { Write-Host "Chinese script adjustment inconsistent: $word / $orgWord" -ForegroundColor Red $word = '' } #"Result: $orgWord / $word -> $sFullDoc" } $sArticleJSON = $articleJSON.ToString() if ($sArticleJSON) { #"Parsing: $sArticleJSON" $article = ConvertFrom-Json -InputObject $sArticleJSON $referenceWords = '' $tag = $article.tag $article.interpretations | ForEach-Object { $english = $_.english if ($english) { $referenceWords = $english -join "," if ($labels) { $indexInLabelArray = [array]::IndexOf($labels, $tag) } $assign = "" if ($indexInLabelArray -gt -1 -and $values) { if ($values[$indexInLabelArray] -like '*=*') { $assign = "assign=" + [System.Web.HttpUtility]::UrlEncode($values[$indexInLabelArray]) } else { $assign = "assign=" + [System.Web.HttpUtility]::UrlEncode("$listId=" + $values[$indexInLabelArray]) } } $assignments = @() if ($pos -eq 'Noun' -and ($tag -eq 'f' -or $tag -eq 'm')) { $tag = $tag.ToUpper() $assignments += [System.Web.HttpUtility]::UrlEncode("5=$tag") } if ($_.figurative -or $_.idiomatic) { $assignments += [System.Web.HttpUtility]::UrlEncode("51=FS") } if ($_.literary) { $assignments += [System.Web.HttpUtility]::UrlEncode("40=BOOK") } if ($_.aspect -or $tag -eq 'impf' -or $tag -eq 'pf') { $aspect = $_.aspect if (-not($aspect)) { $aspect = $tag } switch ($aspect) { 'impf' { $assignments += [System.Web.HttpUtility]::UrlEncode("27=IPI") } 'pf' { $assignments += [System.Web.HttpUtility]::UrlEncode("27=PI") } } } if ($_.colloquial) { $assignments += [System.Web.HttpUtility]::UrlEncode("40=TALK") } if ($_.formal) { $assignments += [System.Web.HttpUtility]::UrlEncode("40=OFCL") } if ($_.dialect) { $assignments += [System.Web.HttpUtility]::UrlEncode("43=DIAL") } if ($_.slang) { $assignments += [System.Web.HttpUtility]::UrlEncode("40=JAR") } if ($_.obscure) { $assignments += [System.Web.HttpUtility]::UrlEncode("51=OBSC") } if ($_.nonstandard) { $assignments += [System.Web.HttpUtility]::UrlEncode("49=ERR") } if ($_.humorous) { $assignments += [System.Web.HttpUtility]::UrlEncode("51=HUM") } if ($_.offensive) { $assignments += [System.Web.HttpUtility]::UrlEncode("47=DIRT") } if ($_.derogatory) { $assignments += [System.Web.HttpUtility]::UrlEncode("47=DER") } if ($_.euphemism) { $assignments += [System.Web.HttpUtility]::UrlEncode("47=EUPH") } if ($_.familiar) { $assignments += [System.Web.HttpUtility]::UrlEncode("47=FAM") } if ($_.obsolete) { $assignments += [System.Web.HttpUtility]::UrlEncode("41=OBSL") } if ($_.polite) { $assignments += [System.Web.HttpUtility]::UrlEncode("47=POS") } if ($_.childish) { $assignments += [System.Web.HttpUtility]::UrlEncode("40=CHLD") } if ($_.military) { $assignments += [System.Web.HttpUtility]::UrlEncode("42=MILI") } if ($assignments -and $assignments.Length -gt 0) { $assignmentsTogether = $assignments -join ',' if ($assign -and $assign.Length -gt 0) { $assign = "$assign,$assignmentsTogether" } else { $assign = $assignmentsTogether } } if ($pruneListId -gt 0) { $assign = "$assign&featureList2=$pruneListId&featurValue2=$pruneValue" } $assignDescription = "" if ($assign) { $assign = "assign=$assign" $assignDescription = "[ASSIGNING: $assign]" } $example = $article.example if ($example) { $exampleTranslation = $article.example_translation $example = "E.g. '$example'" if ($exampleTranslation) { $example = "$example ('$exampleTranslation')" } } if ($referenceWords -and $word) { "Importing $word -> $referenceWords $tag $assignDescription $example" $response = Invoke-WebRequest -Uri "$global:lampHost/import?lexeme=$word&reference=$referenceWords&proper=0&hypernym=0&featureList=$posFeatureList&featureValue=$tisanePOSValue&source=wiktionary&orgId=$pageId&$assign&complement=$sComplement¬e=$example" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } } } } } } catch { Write-Host "IMPORT FAILED FOR $word $_" -ForegroundColor Red } } } } while ($bookmark) "Tags encountered:" $tagsFound "Number of articles: $articleCount" # cleanup: #"DELETE l FROM Lexemes l WHERE l.SourceType = 'wiktionary' AND l.Note LIKE '%Generated by matching: proper=0%' AND l.LastUpdatedBy = 'bulkimport' # "DELETE f FROM Lexemes l INNER JOIN Features f ON f.ConnectionType = 1 AND f.EntityId = l.Id AND f.FeatureListId IN (1, 3, 4, 7, 8, 9, 13, 22, 23, 24, 26, 32, 33, 34) WHERE l.SourceType = 'wiktionary'" # "DELETE f FROM Lexemes l INNER JOIN Features f ON f.ConnectionType = 1 AND f.EntityId = l.Id AND f.FeatureListId = 2 AND f.FeatureValue = '1' WHERE l.SourceType = 'wiktionary'" # "DELETE f FROM Lexemes l INNER JOIN Features f ON f.ConnectionType = 1 AND f.EntityId = l.Id AND f.FeatureListId = 5 INNER JOIN LexemeFamilies lf ON lf.LexemeId = l.Id INNER JOIN Features pos ON pos.ConnectionType = 2 AND pos.EntityID = lf.FamilyID AND pos.FeatureListId = 1 AND pos.FeatureValue <> 'NOUN' WHERE l.SourceType = 'wiktionary'" } #EndRegion '.\Public\Import-LexemesFromWiktionary.ps1' 271 #Region '.\Public\Import-LexemesWithFamily.ps1' 0 function Import-LexemesWithFamily { [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $csv, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $false, HelpMessage="Source")][String] $source ) $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding $fileLines = Get-Content $csv Login-Lamp Set-LampLanguage -languageId $languageId $fileLines | ForEach-Object { $word,$familyId,$extraFeatures = $_.Split(",") if ($word -and $familyId) { $assign = $extraFeatures if ($extraFeatures) { $assign = [System.Web.HttpUtility]::UrlEncode($extraFeatures) } "Importing $word -> Family $familyId $extraFeatures" $response = Invoke-WebRequest -Uri "$global:lampHost/importFamilies?lexeme=$word&families=$familyId&behavior=complement&source=$source&orgId=$familyId&assign=$assign" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } } [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-LexemesWithFamily.ps1' 31 #Region '.\Public\Import-LinkedLexemes.ps1' 0 function Import-LinkedLexemes { [CmdletBinding()] Param ( [Parameter(Mandatory = $true, HelpMessage="Target language code")][String] $language, [Parameter(Mandatory = $true, HelpMessage="Path: to translated CSV file")][string] $path ) Login-Lamp $languageJSON = Invoke-RestMethod -Uri "$global:lampHost/languages" -Method GET -UseBasicParsing $languageId = 0 $languageJSON | foreach { if ($_.ISOCode -eq $language) { $languageId = $_.id } } Set-LampLanguage -languageId $languageId $csv = Import-Csv $path $columnNames=$csv[0].psobject.properties.name if (("Id" -in $columnNames) -And ("MainLemma" -in $columnNames) -And ("styleFeatures" -in $columnNames) -And ("translated" -in $columnNames)) { $i=0 foreach($item in $csv) { $word=$($item.translated) $englishLexemeId=$($item.Id) $styleFeatures=$($item.styleFeatures) $englishLexeme=$($item.MainLemma) #use -eq for non-case sensitive comparision and -ceq for case sensitive comparision if (($language -ne "de") -And ($word -eq $englishLexeme)){ $word = $englishLexeme } $note = "Created by machine-translating '$englishLexeme'" $response = Invoke-WebRequest -Uri "$global:lampHost/import?lexeme=$word&sameFamiliesAs=$englishLexemeId&assign=$styleFeatures&source=mt&orgId=$englishLexemeId¬e=$note" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing $pct = $i / $csv.length * 100 Write-Progress -Activity "Writing lexeme" -Status "$pct% $word" -PercentComplete $pct $i +=1 } } Else { Write-Host "Error : required columns not found" -ForegroundColor DarkRed Write-Host "Ensure : you have following columns in your csv " -ForegroundColor DarkRed Write-Host "Id MainLemma styleFeatures translated" -ForegroundColor DarkRed } } ################################################################################################################################################### # TEST SCRIPT # ################################################################################################################################################## # $csv = Import-Csv "C:\D\ML_tisane\1109\google sheet tranlatinos\en-esTranlated.csv" # $columnNames=$csv[0].psobject.properties.name #for getting headers of the CSV fileee # "Id" -in $columnNames # if (("Id" -in $columnNames) -And ("MainLemma" -in $columnNames) -And ("styleFeatures" -in $columnNames) -And ("translated" -in $columnNames)) # { # $i=0 # foreach($item in $csv) # { # $pct = $i / $csv.length * 100 # Write-Progress -Activity "Writing lexeme" -Status "$pct% $item" -PercentComplete $pct # $word=$($item.translated) # $englishLexemeId=$($item.Id) # $styleFeatures=$($item.styleFeatures) # $englishLexeme=$($item.MainLemma) # $note = "Created by machine-translating '$englishLexeme'" # # Write-Host "$word and $englishLexemeId and $styleFeatures" # } # } # Else { # Write-Host "Error : required columns not found" -BackgroundColor DarkRed # Write-Host "Ensure : you have following columns in your csv " -BackgroundColor DarkRed # Write-Host "Id MainLemma styleFeatures translated" -BackgroundColor DarkRed # exit # } #EndRegion '.\Public\Import-LinkedLexemes.ps1' 76 #Region '.\Public\Import-MonolingualLog.ps1' 0 function Import-MonolingualLog{ [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $pathname, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $true, HelpMessage="Corpora ID")][String] $corpora ) $fileLines = Get-Content $pathname Login-Lamp Set-LampLanguage -languageId $languageId $i = 0 $fileLines | ForEach-Object { $originalLine = $_ $ln = [System.Web.HttpUtility]::UrlEncode($_) $pct = $i / $fileLines.length * 100 $i += 1 Write-Progress -Activity "Importing" -Status "$pct% $originalLine" -PercentComplete $pct $response = Invoke-WebRequest -Uri "$global:lampHost/testfragment?corpora=$corpora&fragment=$ln&test=true&ref=cht" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } "$i lines imported" [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-MonolingualLog.ps1' 30 #Region '.\Public\Import-WordNet.ps1' 0 function Import-WordNet{ [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Filename")][String] $inputXml, [Parameter(Mandatory = $true, HelpMessage="Language ID")][String] $languageId, [Parameter(Mandatory = $false, HelpMessage="Source")][String] $source ) [xml]$wnXml = Get-Content $inputXml Login-Lamp Set-LampLanguage -languageId $languageId $wnXml.list.synset | ForEach-Object { $familyId = $_.family $sourceId = $_.id #$localization = Invoke-RestMethod -Uri "$productionHost/familyLocalization?id=$familyId" -Method GET -Headers $authorizationToken #$localization.definition = $_.definition #$localization.description = $_.word[0] #$localizationJson = ConvertTo-Json -InputObject $localization #"$familyId : $localizationJson" #Invoke-WebRequest -Uri "$productionHost/familyLocalization" -Method PUT -Headers $authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($localizationJson)) $word = $_.native # $_.matchTokens[0].matchToken[1] $referenceWord = $_.english # [0] $_.word | ForEach-Object { $word = $_ "Importing $word -> $familyId" $response = Invoke-WebRequest -Uri "$global:lampHost/importFamilies?lexeme=$word&families=$familyId&behavior=complement&source=$source&orgId=$sourceId" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes('')) -UseBasicParsing } } [gc]::Collect() [gc]::WaitForPendingFinalizers() } #EndRegion '.\Public\Import-WordNet.ps1' 35 #Region '.\Public\Invoke-OpenAiApiCall.ps1' 0 function Invoke-OpenAiApiCall{ [CmdletBinding()] Param( [Parameter(Mandatory=$true, HelpMessage="The prompt to send to the API.")][string]$prompt, [Parameter(Mandatory=$false, HelpMessage="API key")][string]$apiKey, [Parameter(Mandatory=$false, HelpMessage="The model to use for the API call. default text-davinci-003")][string]$model = "text-davinci-003", [Parameter(Mandatory=$false, HelpMessage="The temperature to use for the API call. Defaults to 0.5.")][decimal]$temperature = 0.5, [Parameter(Mandatory=$false, HelpMessage="The maximum number of tokens to generate. Defaults to 128.")][int]$maxTokens = 128, [Parameter(Mandatory=$false, HelpMessage="The value of the top_p parameter. Defaults to 1.")][decimal]$topP = 1, [Parameter(Mandatory=$false, HelpMessage="The value of the frequency_penalty parameter. Defaults to 0.")][decimal]$frequencyPenalty = 0, [Parameter(Mandatory=$false, HelpMessage="The value of the presence_penalty parameter. Defaults to 0.")][decimal]$presencePenalty = 0 ) $url = "https://api.openai.com/v1/completions" if (-not $apiKey){ $apiKey = $global:OpenAiApiKey } $body = @{ model = $model prompt = $prompt temperature = $temperature max_tokens = $maxTokens top_p = $topP frequency_penalty = $frequencyPenalty presence_penalty = $presencePenalty } | ConvertTo-Json # Write-Host $body $response = Invoke-WebRequest -Method Post -Uri $url -Headers @{ "Content-Type" = "application/json;charset=utf-8"; "Authorization" = "Bearer $apiKey" } -ContentType 'application/json; charset=utf-8' -Body $body -UseBasicParsing return $response } # $c= Get-Content -Encoding UTF8 "TisaneLampClient\br.txt" # $response = Invoke-OpenAIAPICall -prompt $c -apiKey # $response_1 = $response.Content | ConvertFrom-Json # Write-Host $response_1.choices[0].text # $response_2 = [System.Text.Encoding]::UTF8.GetString($response_1.choices[0].text[0..1000]) # Write-Host $response_2 #EndRegion '.\Public\Invoke-OpenAiApiCall.ps1' 42 #Region '.\Public\New-AdjFamily.ps1' 0 ## ============================================================================= ## ## This script's purpose is to add a new noun family and new lexemes in English and another language, link them, and tag them ## ## ============================================================================= function New-AdjFamily{ [CmdletBinding()] Param( [Parameter(HelpMessage="Language code: ")][Int32] $lang, [Parameter(HelpMessage="Definition: ")][String] $definition, [Parameter(HelpMessage="Wikidata ID: ")][String] $wikidata, [Parameter(Mandatory = $true, HelpMessage="English lemmas, delimited by commas")][String[]] $english, [Parameter(HelpMessage="Native lemmas, delimited by commas ")][String[]] $native, [Parameter(Mandatory = $true, HelpMessage="Family ID ")][Int32] $family, [Parameter(Mandatory = $true, HelpMessage="Hypernym ID ")][Int32] $hypernym ) class Feature { [string]$index [string]$value [string]$type Feature([string]$index, [string]$value, [string]$type) { $this.index = $index $this.value = $value $this.type = $type } } $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding Login-Lamp $description = $english -join ', ' $grammar=@() $grammar += @([Feature]::new("1", "ADJ", "Grammar")) $grammar += @([Feature]::new("26", "ALL", "Grammar")) if (-not $definition) { $definition = '???' } $newFamily = @{ id=$family description=$description definition=$definition phraseType='not a phrase' grammar=$grammar wikidataId=$wikidata } Write-Host "Creating family...`r`n" -ForegroundColor Green $newFamily $familyJson = ConvertTo-Json -InputObject $newFamily $response = Invoke-WebRequest -Uri "$global:lampHost/family" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($familyJson)) -UseBasicParsing $parsedResponse = ConvertFrom-Json -InputObject $response if (-not $parsedResponse.success -or -not $parsedResponse.id) { Write-Host "Can't continue, no valid acknowledgement: $response" -ForegroundColor Red break # end the script } if ($family -ne $parsedResponse.id) { $actualFamilyId = $parsedResponse.id Write-Host "$family was occupied, inserted at $actualFamilyId" -ForegroundColor Yellow } $family = $parsedResponse.id Write-Host "Linking hypernym $hypernym...`r`n" -ForegroundColor Green # link the hypernym $whatever = Invoke-WebRequest -Uri "$global:lampHost/familyLinks?family=$family&type=hypernyms&links=$hypernym" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing Write-Host "Lexeme(s) in English...`r`n" -ForegroundColor Green # set the language to English $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=7" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $english | Foreach-Object { Add-Lexeme $family $_ } if ($native -and $native.length -gt 0 -and $lang -gt 0) { Write-Host "Lexeme(s) in language $lang...`r`n" -ForegroundColor Green # set the native language $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=$lang" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $native | Foreach-Object { Add-Lexeme $family $_ } } } #EndRegion '.\Public\New-AdjFamily.ps1' 100 #Region '.\Public\New-FormulaicFamily.ps1' 0 ## ============================================================================= ## ## This script's purpose is to add a new noun family and new lexemes in English and another language, link them, and tag them ## ## ============================================================================= function New-FormulaicFamily{ [CmdletBinding()] Param( [Parameter(HelpMessage="Language code: ")][Int32] $lang, [Parameter(HelpMessage="Definition: ")][String] $definition, [Parameter(Mandatory = $true, HelpMessage="English lemmas, delimited by commas")][String[]] $english, [Parameter(HelpMessage="Native lemmas, delimited by commas ")][String[]] $native, [Parameter(Mandatory = $true, HelpMessage="Family ID ")][Int32] $family, [Parameter(Mandatory = $false, HelpMessage="Hypernym ID ")][Int32] $hypernym ) #$lang = 26 # code found in the language details class Feature { [string]$index [string]$value [string]$type Feature([string]$index, [string]$value, [string]$type) { $this.index = $index $this.value = $value $this.type = $type } } Login-Lamp $description = $english -join ', ' $grammar=@() $grammar += @([Feature]::new("1", "FORE", "Grammar")) $grammar += @([Feature]::new("26", "ALL", "Grammar")) if (-not $definition) { $definition = '???' } $newFamily = @{ id=$family description=$description definition=$definition phraseType='mainClause' phraseTag='S' grammar=$grammar } Write-Host "Creating family...`r`n" -ForegroundColor Green $newFamily $familyJson = ConvertTo-Json -InputObject $newFamily $response = Invoke-WebRequest -Uri "$global:lampHost/family" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($familyJson)) -UseBasicParsing $parsedResponse = ConvertFrom-Json -InputObject $response if (-not $parsedResponse.success -or -not $parsedResponse.id) { Write-Host "Can't continue, no valid acknowledgement: $response" -ForegroundColor Red break # end the script } if ($family -ne $parsedResponse.id) { $actualFamilyId = $parsedResponse.id Write-Host "$family was occupied, inserted at $actualFamilyId" -ForegroundColor Yellow } $family = $parsedResponse.id If ($hypernym -gt 0) { Write-Host "Linking hypernym $hypernym...`r`n" -ForegroundColor Green # link the hypernym $whatever = Invoke-WebRequest -Uri "$global:lampHost/familyLinks?family=$family&type=hypernyms&links=$hypernym" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing } Write-Host "Lexeme(s) in English...`r`n" -ForegroundColor Green # set the language to English $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=7" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $english | Foreach-Object { Add-Lexeme $family $_ } if ($native -and $native.length -gt 0 -and $lang -gt 0) { Write-Host "Lexeme(s) in language $lang...`r`n" -ForegroundColor Green # set the native language $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=$lang" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $native | Foreach-Object { Add-Lexeme $family $_ } } } #EndRegion '.\Public\New-FormulaicFamily.ps1' 96 #Region '.\Public\New-NounFamily.ps1' 0 ## ============================================================================= ## ## This script's purpose is to add a new noun family and new lexemes in English and another language, link them, and tag them ## ## ============================================================================= function New-NounFamily{ [CmdletBinding()] Param( [Parameter(HelpMessage="Language code: ")][Int32] $lang, [Parameter(HelpMessage="Definition: ")][String] $definition, [Parameter(HelpMessage="Wikidata ID: ")][String] $wikidata, [Parameter(Mandatory = $true, ValueFromPipelineByPropertyName=$true, HelpMessage="English lemmas, delimited by commas")][String[]] $english, [Parameter(HelpMessage="Native lemmas, delimited by commas ")][String[]] $native, [Parameter(Mandatory = $true, HelpMessage="Family ID ")][Int32] $family, [Parameter(Mandatory = $true, HelpMessage="Hypernym ID ")][Int32] $hypernym, [Parameter(Mandatory = $false, HelpMessage="Hypernym ID ")][Int32] $domain, [Parameter()][Int32] $proper, [Parameter()][Int32] $person ) $global:nativeLanguageIETF = '' class Feature { [string]$index [string]$value [string]$type Feature([string]$index, [string]$value, [string]$type) { $this.index = $index $this.value = $value $this.type = $type } } Login-Lamp $description = $english -join ', ' if ($description.Length -gt 95) { $description = $description.Substring(0, 95) } $grammar=@() $grammar += @([Feature]::new("1", "NOUN", "Grammar")) if ($proper -gt 0) { $grammar += @([Feature]::new("3", "NO", "Grammar")) $grammar += @([Feature]::new("14", "NA", "Grammar")) } else { $grammar += @([Feature]::new("3", "YES", "Grammar")) $grammar += @([Feature]::new("14", "NPA", "Grammar")) } $grammar += @([Feature]::new("4", "REG", "Grammar")) if ($person -gt 0) { $grammar += @([Feature]::new("22", "PERS", "Grammar")) $grammar += @([Feature]::new("23", "BODY", "Grammar")) $grammar += @([Feature]::new("24", "BODY", "Grammar")) } else { $grammar += @([Feature]::new("22", "NO", "Grammar")) $grammar += @([Feature]::new("23", "THNG", "Grammar")) $grammar += @([Feature]::new("24", "THNG", "Grammar")) } $grammar += @([Feature]::new("26", "ALL", "Grammar")) if (-not $definition) { $definition = '???' } $newFamily = @{ id=$family properNoun=$proper description=$description definition=$definition phraseType='not a phrase' grammar=$grammar wikidataId=$wikidata } Write-Host "Creating family...`r`n" -ForegroundColor Green $newFamily $familyJson = ConvertTo-Json -InputObject $newFamily $response = Invoke-WebRequest -Uri "$global:lampHost/family" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($familyJson)) -UseBasicParsing $parsedResponse = ConvertFrom-Json -InputObject $response if (-not $parsedResponse.success -or -not $parsedResponse.id) { Write-Host "Can't continue, no valid acknowledgement: $response" -ForegroundColor Red break # end the script } if ($family -ne $parsedResponse.id) { $actualFamilyId = $parsedResponse.id Write-Host "$family was occupied, inserted at $actualFamilyId" -ForegroundColor Yellow } $family = $parsedResponse.id Write-Host "Linking hypernym $hypernym...`r`n" -ForegroundColor Green # link the hypernym $whatever = Invoke-WebRequest -Uri "$global:lampHost/familyLinks?family=$family&type=hypernyms&links=$hypernym" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing if ($domain -gt 0) { Write-Host "Linking domain $domain...`r`n" -ForegroundColor Green # link the hypernym $whatever = Invoke-WebRequest -Uri "$global:lampHost/familyLinks?family=$family&type=domains&links=$domain" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing } Write-Host "Lexeme(s) in English...`r`n" -ForegroundColor Green # set the language to English $activeLanguage = Invoke-RestMethod -Uri "$global:lampHost/setLanguage?language=7" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $english | Foreach-Object { Add-Lexeme $family $_ } if ($native -and $native.length -gt 0 -and $lang -gt 0) { Write-Host "Lexeme(s) in language $lang...`r`n" -ForegroundColor Green # set the native language $activeLanguage = Invoke-RestMethod -Uri "$global:lampHost/setLanguage?language=$lang" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $global:nativeLanguageIETF = $activeLanguage.ISOCode $native | Foreach-Object { Add-Lexeme $family $_ } } } #EndRegion '.\Public\New-NounFamily.ps1' 131 #Region '.\Public\New-VerbFamily.ps1' 0 ## ============================================================================= ## ## This script's purpose is to add a new noun family and new lexemes in English and another language, link them, and tag them ## ## ============================================================================= function New-VerbFamily{ [CmdletBinding()] Param( # [Parameter(Mandatory = $true, valueFromPipeline=$true, HelpMessage="LaMP user: ")][String] $user, # [Parameter(Mandatory = $true, HelpMessage="LaMP password: ")][String] $password, [Parameter(HelpMessage="Language code: ")][Int32] $lang, [Parameter(HelpMessage="Definition: ")][String] $definition, [Parameter(Mandatory = $true, HelpMessage="English lemmas, delimited by commas")][String[]] $english, [Parameter(HelpMessage="Native lemmas, delimited by commas ")][String[]] $native, [Parameter(Mandatory = $true, HelpMessage="Family ID ")][Int32] $family, [Parameter(Mandatory = $true, HelpMessage="Hypernym ID ")][Int32] $hypernym, [Parameter()][Int32] $person ) #$lang = 26 # code found in the language details ## overcome the HTTPS error: https://stackoverflow.com/questions/11696944/powershell-v3-invoke-webrequest-https-error # If (-not ("TrustAllCertsPolicy" -as [type])) { # Add-Type @" # using System.Net; # using System.Security.Cryptography.X509Certificates; # public class TrustAllCertsPolicy : ICertificatePolicy { # public bool CheckValidationResult( # ServicePoint srvPoint, X509Certificate certificate, # WebRequest request, int certificateProblem) { # return true; # } # } # "@ # } # [System.Net.ServicePointManager]::CertificatePolicy = New-Object TrustAllCertsPolicy class Feature { [string]$index [string]$value [string]$type Feature([string]$index, [string]$value, [string]$type) { $this.index = $index $this.value = $value $this.type = $type } } # $global:lampHost = 'https://lampws.tisane.ai:443' # #$pPass = ConvertFrom-SecureString $password # $authenticationBody = '["' + $user + '", "' + $password + '"]' # $productionAuthentication = Invoke-WebRequest -Uri "$global:lampHost/authenticate" -Method POST -Body $authenticationBody # $inJson = ConvertFrom-Json -InputObject $productionAuthentication.Content # $global:authorizationToken = @{} # $global:authorizationToken.Add('Authorization', $inJson.token) # $OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding Login-Lamp $description = $english -join ', ' $grammar=@() $grammar += @([Feature]::new("1", "VERB", "Grammar")) $grammar += @([Feature]::new("14", "TRAN", "Grammar")) $grammar += @([Feature]::new("8", "REG", "Grammar")) if ($person -gt 0) { $grammar += @([Feature]::new("23", "BODY", "Grammar")) } else { $grammar += @([Feature]::new("23", "THNG", "Grammar")) } $grammar += @([Feature]::new("24", "ALL", "Grammar")) $grammar += @([Feature]::new("26", "ALL", "Grammar")) if (-not $definition) { $definition = '???' } $newFamily = @{ id=$family description=$description definition=$definition phraseType='not a phrase' grammar=$grammar } # . ".\normalizationLib.ps1" Write-Host "Creating family...`r`n" -ForegroundColor Green $newFamily $familyJson = ConvertTo-Json -InputObject $newFamily $response = Invoke-WebRequest -Uri "$global:lampHost/family" -Method POST -Headers $global:authorizationToken -Body ([System.Text.Encoding]::UTF8.GetBytes($familyJson)) -UseBasicParsing $parsedResponse = ConvertFrom-Json -InputObject $response if (-not $parsedResponse.success -or -not $parsedResponse.id) { Write-Host "Can't continue, no valid acknowledgement: $response" -ForegroundColor Red break # end the script } if ($family -ne $parsedResponse.id) { $actualFamilyId = $parsedResponse.id Write-Host "$family was occupied, inserted at $actualFamilyId" -ForegroundColor Yellow } $family = $parsedResponse.id Write-Host "Linking hypernym $hypernym...`r`n" -ForegroundColor Green # link the hypernym $whatever = Invoke-WebRequest -Uri "$global:lampHost/familyLinks?family=$family&type=hypernyms&links=$hypernym" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing Write-Host "Lexeme(s) in English...`r`n" -ForegroundColor Green # set the language to English $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=7" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $english | Foreach-Object { Add-Lexeme $family $_ } if ($native -and $native.length -gt 0 -and $lang -gt 0) { Write-Host "Lexeme(s) in language $lang...`r`n" -ForegroundColor Green # set the native language $whatever = Invoke-WebRequest -Uri "$global:lampHost/setLanguage?language=$lang" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $native | Foreach-Object { Add-Lexeme $family $_ } } } #EndRegion '.\Public\New-VerbFamily.ps1' 138 #Region '.\Public\Normalization-Lib.ps1' 0 Function GetNormalizedWikidataLemma([String] $language,[String] $entry) { if ($entry -like '* *') { return $entry } else { $suffixes = [ordered]@{} switch ($language) { 'sq' { $suffixes.Add('i', '') $suffixes.Add('u', '') $suffixes.Add('(?<=[^aeëiouy])ulla', 'ull') $suffixes.Add('(?<=[^aeëiouy])ura', 'ur') $suffixes.Add('(?<=[^aeëiouy])eja', 'e') $suffixes.Add('(?<=[^aeëiouy])ëza', 'ëz') $suffixes.Add('(?<=[^aeëiouy])ia', 'i') $suffixes.Add('(?<=[^aeëiouy])ra', 'ër') $suffixes.Add('(?<=[^aeëiouy])rra', 'ërr') $suffixes.Add('(?<=[^aeëiouy])a', 'ë') $suffixes.Add('ca', 'cë') $suffixes.Add('da', 'dë') $suffixes.Add('dea', 'de') $suffixes.Add('fa', 'fë') $suffixes.Add('ga', 'gë') $suffixes.Add('dha', 'dhë') $suffixes.Add('sha', 'shë') $suffixes.Add('cia', 'ci') $suffixes.Add('media', 'medie') $suffixes.Add('pedia', 'pedi') $suffixes.Add('logia', 'logji') $suffixes.Add('ogjia', 'ogji') $suffixes.Add('ia', 'i') $suffixes.Add('aja', 'a') $suffixes.Add('dja', 'dje') $suffixes.Add('anija', 'anije') $suffixes.Add('fëmija', 'fëmijë') $suffixes.Add('bizelja', 'bizelja') $suffixes.Add('lulja', 'lule') $suffixes.Add('lja', 'lje') $suffixes.Add('mja', 'mje') $suffixes.Add('nja', 'një') $suffixes.Add('goja', 'gojë') $suffixes.Add('soja', 'sojë') $suffixes.Add('qja', 'qe') $suffixes.Add('tja', 'te') $suffixes.Add('ka', 'kë') $suffixes.Add('(?<=[^l])la', 'lë') $suffixes.Add('kumbulla', 'kumbull') $suffixes.Add('ma', 'më') $suffixes.Add('na', 'në') $suffixes.Add('ra', 'rë') $suffixes.Add('sa', 'së') $suffixes.Add('ta', 'të') $suffixes.Add('va', 'vë') $suffixes.Add('ëza', 'ëz') $suffixes.Add('za', 'zë') } 'ru' { $suffixes.Add('ые', 'ое') $suffixes.Add('ы', '') $suffixes.Add('и', '') # '(\w+[оиые]е[ ])+\w+[иыяа]' } default { return $entry } } $suffixes.Keys | ForEach-Object { $fullRegex = $_ + '$' if ($entry -match $fullRegex) { $res = $entry -replace $fullRegex, $suffixes[$_] return $res } } | Select -First 1 #if (-not($res)) { # return $entry #} return $entry } } Function GetVerbSameInflectionAs([int] $languageId,[String] $entry) { $entries = $entry.Split(" ") $lexemeId = 0 $stem = $entry if ($entries -and $entries.Length -gt 1) { $firstWord = $entries[0] $sql = "USE tisane; SELECT TOP 1 l.Id, l.Stem FROM dbo.Lexemes l WHERE l.LanguageId = $languageId AND l.MainLemma = N'$firstWord' AND dbo.HasFamilyFeature(l.Id, 1, 'VERB') = 1" $ds = Invoke-Sqlcmd -Query $sql if ($ds) { $lexemeId = $ds.Id $headStem = $ds.Stem $stem = "$headStem " + $entry.Substring($firstWord.Length + 1) } } #känna som sin egen ficka return $lexemeId, $stem } Function GetSegmentSQL([String] $language,[String] $entry) { $res = '' $segmentCount = $entry.Split(" ").Length $penultimateIndex = $segmentCount - 1 $regexes = [ordered]@{} # it's important to keep it ordered / unsorted, because the order matters switch -regex ($language) { 'he' { $regexes.Add("\w+[ ]\w+[י]", "InflectingSegment = 1, AgreeingSegment1st = 2, AgreeingSegmentLast = $segmentCount") $regexes.Add("\w+ה([ ]\w+[תה])+", "InflectingSegment = 1, AgreeingSegment1st = 2, AgreeingSegmentLast = $segmentCount") $regexes.Add("\w+[ה][ ]\w+", "InflectingSegment = 1, AgreeingSegment1st = 2, AgreeingSegmentLast = $segmentCount") $regexes.Add("\w+ות([ ]\w+[תה])+", "InflectingSegment = 1, AgreeingSegment1st = 2, AgreeingSegmentLast = $segmentCount") $regexes.Add("\w+[ת][ ]\w+[^תה]", "InflectingSegment = NULL, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") } 'de' { $regexes.Add("[\p{Ll} ]+[ ](\p{Lu}\p{Ll}+[ ])+\p{Ll}+en", "InflectingSegment = $segmentCount, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") # verb phrase $regexes.Add("\p{Ll}+en[ ][\p{Ll}+ ]*([ ]\p{Lu}\p{Ll}+)+", "InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") # verb phrase } 'sq' { } 'ru' { $regexes.Add("(\w+ая[ ])+\w+[аья]", "InflectingSegment = $segmentCount, AgreeingSegment1st = 1, AgreeingSegmentLast = $penultimateIndex") $regexes.Add("(\w+[ое][е][ ])+\w+[ое]", "InflectingSegment = $segmentCount, AgreeingSegment1st = 1, AgreeingSegmentLast = $penultimateIndex") $regexes.Add("\w+([ ][\w\-]+(ых|го|а|ы|[ое]в))+", "InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") $regexes.Add("\w+[ ]\w{1,3}([ ][\w\-]+)+", "InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") $regexes.Add("(\w+[оиы][й][ ])+\w+", "InflectingSegment = $segmentCount, AgreeingSegment1st = 1, AgreeingSegmentLast = $penultimateIndex") $regexes.Add("\w+т[ьи][ ][\w\-]+","InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") $regexes.Add("(\w+[оиые]е[ ])+\w+[иыяа]", "InflectingSegment = $segmentCount, AgreeingSegment1st = 1, AgreeingSegmentLast = $penultimateIndex") $regexes.Add("\w+([ ]+\w+[ой])?[ ]\w+[иаы]([ ][\w\-]+)*", "InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL") } 'es|fr|it|pt' { $regexes.Add("\w+[ ]d[']\w+", 'InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL') $regexes.Add("\w+[ ]\w{2,4}[ ]\w+", 'InflectingSegment = 1, AgreeingSegment1st = NULL, AgreeingSegmentLast = NULL') $regexes.Add("\w+([ ]\w+)+", "InflectingSegment = 1, AgreeingSegment1st = 2, AgreeingSegmentLast = $segmentCount") } } $regexes.Keys | ForEach-Object { $fullRegex = '^' + $_ + '$' if ($entry -match $fullRegex) { $res = $regexes[$_] return $res } } | Select -First 1 } Function GetSegmentValues([String] $lang,[String] $ent) { $sql = GetSegmentSQL -language $lang -entry $ent if ($sql) { $sql.Split(', ') | ForEach-Object { $seg = $_.Trim().Split('=') $currentMember = $seg[0].Trim() if ($currentMember -match '^[0-9]$' -or $currentMember -eq 'NULL') { return $currentMember } } } else { return $sql } } #EndRegion '.\Public\Normalization-Lib.ps1' 163 #Region '.\Public\Retag.ps1' 0 function Retag{ [CmdletBinding()] Param( [Parameter(Mandatory = $true, HelpMessage="Language numeric ID: ")][int] $languageId, [Parameter(Mandatory = $true, HelpMessage="Range ID: ")][int] $rangeId, [Parameter(Mandatory = $false, HelpMessage="Reset list IDs: ")][String] $reset ) Login-Lamp $languageStructure = Invoke-RestMethod -Uri "$global:lampHost/setLanguage?language=$languageId" -Method POST -Headers $global:authorizationToken -Body ' ' -UseBasicParsing $languageEnglishName = $languageStructure.EnglishName Write-Host "Retagging using range: $rangeId ($languageEnglishName), resetting listIDs: $reset" -ForegroundColor Green Invoke-WebRequest -Uri "$global:lampHost/retag?arg=$rangeId&type=range&reset=$reset" -Method PUT -Headers $global:authorizationToken -Body ' ' -UseBasicParsing } #EndRegion '.\Public\Retag.ps1' 16 |