Functions/GenXdev.Queries.Webbrowser/Get-GoogleSearchResultUrls.ps1
<##############################################################################
Part of PowerShell module : GenXdev.Queries.Webbrowser Original cmdlet filename : Get-GoogleSearchResultUrls.ps1 Original author : René Vaessen / GenXdev Version : 1.300.2025 ################################################################################ Copyright (c) René Vaessen / GenXdev Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ################################################################################> ############################################################################### <# .SYNOPSIS Performs a google search and returns the links .DESCRIPTION Performs a google search and returns the links .PARAMETER Query The google query to perform .PARAMETER Max The maximum number of results to obtain, defaults to 200 .EXAMPLE PS C:\> $Urls = Get-GoogleSearchResultUrls "site:github.com PowerShell module"; $Urls .NOTES Requires the Windows 10+ Operating System ############################################################################## #> ############################################################################### function Get-GoogleSearchResultUrls { [CmdletBinding()] [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseSingularNouns', '')] [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSAvoidGlobalVars', '')] [System.Diagnostics.CodeAnalysis.SuppressMessageAttribute('PSUseDeclaredVarsMoreThanAssignments', '')] [Alias('qlinksget')] param( [parameter( Mandatory, Position = 0, ValueFromRemainingArguments, ValueFromPipeline, ValueFromPipelineByPropertyName, HelpMessage = 'The query to perform' )] [Alias('q', 'Name', 'Text', 'Query')] [string[]] $Queries, # [parameter( Mandatory = $false, HelpMessage = 'The maximum number of results to obtain, defaults to 200' )] [int] $Max = 200, ############################################################################### [ValidateSet( 'Afrikaans', 'Akan', 'Albanian', 'Amharic', 'Arabic', 'Armenian', 'Azerbaijani', 'Basque', 'Belarusian', 'Bemba', 'Bengali', 'Bihari', 'Bork, bork, bork!', 'Bosnian', 'Breton', 'Bulgarian', 'Cambodian', 'Catalan', 'Cherokee', 'Chichewa', 'Chinese (Simplified)', 'Chinese (Traditional)', 'Corsican', 'Croatian', 'Czech', 'Danish', 'Dutch', 'Elmer Fudd', 'English', 'Esperanto', 'Estonian', 'Ewe', 'Faroese', 'Filipino', 'Finnish', 'French', 'Frisian', 'Ga', 'Galician', 'Georgian', 'German', 'Greek', 'Guarani', 'Gujarati', 'Hacker', 'Haitian Creole', 'Hausa', 'Hawaiian', 'Hebrew', 'Hindi', 'Hungarian', 'Icelandic', 'Igbo', 'Indonesian', 'Interlingua', 'Irish', 'Italian', 'Japanese', 'Javanese', 'Kannada', 'Kazakh', 'Kinyarwanda', 'Kirundi', 'Klingon', 'Kongo', 'Korean', 'Krio (Sierra Leone)', 'Kurdish', 'Kurdish (Soranî)', 'Kyrgyz', 'Laothian', 'Latin', 'Latvian', 'Lingala', 'Lithuanian', 'Lozi', 'Luganda', 'Luo', 'Macedonian', 'Malagasy', 'Malay', 'Malayalam', 'Maltese', 'Maori', 'Marathi', 'Mauritian Creole', 'Moldavian', 'Mongolian', 'Montenegrin', 'Nepali', 'Nigerian Pidgin', 'Northern Sotho', 'Norwegian', 'Norwegian (Nynorsk)', 'Occitan', 'Oriya', 'Oromo', 'Pashto', 'Persian', 'Pirate', 'Polish', 'Portuguese (Brazil)', 'Portuguese (Portugal)', 'Punjabi', 'Quechua', 'Romanian', 'Romansh', 'Runyakitara', 'Russian', 'Scots Gaelic', 'Serbian', 'Serbo-Croatian', 'Sesotho', 'Setswana', 'Seychellois Creole', 'Shona', 'Sindhi', 'Sinhalese', 'Slovak', 'Slovenian', 'Somali', 'Spanish', 'Spanish (Latin American)', 'Sundanese', 'Swahili', 'Swedish', 'Tajik', 'Tamil', 'Tatar', 'Telugu', 'Thai', 'Tigrinya', 'Tonga', 'Tshiluba', 'Tumbuka', 'Turkish', 'Turkmen', 'Twi', 'Uighur', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Welsh', 'Wolof', 'Xhosa', 'Yiddish', 'Yoruba', 'Zulu')] [parameter( Mandatory = $false, HelpMessage = 'The language of the returned search results' )] [string] $Language ) begin { Microsoft.PowerShell.Utility\Write-Verbose "Starting Google search operation" } process { foreach ($query in $Queries) { Microsoft.PowerShell.Utility\Write-Verbose "Processing query: $query with language: $($Language ?? 'default')" # prepare language key for search URL $langKey = '&hl=en' if ($Language) { $langKey = "&hl=en&lr=lang_$([Uri]::EscapeUriString((GenXdev.Helpers\Get-WebLanguageDictionary)[$Language]))" } # prepare search URL $encodedQuery = [Uri]::EscapeUriString($query) $url = "https://www.google.com/search?q=$encodedQuery$langKey" $results = [System.Collections.Generic.List[string]]::new() # navigate to search page Microsoft.PowerShell.Utility\Write-Verbose "Navigating to: $url" GenXdev.Webbrowser\Set-WebbrowserTabLocation $url $more = $true $i = 0 do { Microsoft.PowerShell.Utility\Write-Verbose 'Scanning page for URLs...' GenXdev.Webbrowser\Get-WebbrowserTabDomNodes a "e.getAttribute('href')" | Microsoft.PowerShell.Core\Where-Object { -not ("$PSItem" -like '*google*') } | Microsoft.PowerShell.Core\Where-Object { "$PSItem" -like 'http?://*' } | Microsoft.PowerShell.Core\ForEach-Object { $urlString = $_ if ($results.Count -ge $Max) { return } if (-not $results.Contains($urlString)) { $results.Add($urlString) } } try { $Global:chromeController.GetByText('Next')[0].ClickAsync().Wait(); $Global:chromeController.WaitForNavigationAsync().Wait(); $more = $true } catch { $more = $i++ -gt 3; } Microsoft.PowerShell.Utility\Start-Sleep -Seconds 1 } while ($more -and ($results.Count -lt $Max)) Microsoft.PowerShell.Utility\Write-Verbose "Found $($results.Count) unique URLs" $results | Microsoft.PowerShell.Core\ForEach-Object { $PSItem } } } } |