Public/Get-MartijnKatan.ps1
function Get-MartijnKatan { Invoke-WebRequest -Uri https://www.nrc.nl/rubriek/martijn-katan/ ` | Select-Object -ExpandProperty Content ` | pup '.compact-grid__item a attr{href}' --plain ` | ForEach-Object { $Url = "https://www.nrc.nl$($_)" $DateText = (($Url -split '/')[4..6]) -join '-' $Content = Invoke-WebRequest -Uri $Url | Select-Object -ExpandProperty Content [PSCustomObject]@{ PSTypeName = 'UncommonSense.Nrc.Article' Url = $Url Date = [DateTime]::ParseExact($DateText, 'yyyy-MM-dd', $null) Title = ($Content | pup 'h1[data-flowtype="headline"] text{}' --plain) Body = ($Content | pup 'div.content p text{}' --plain) -join ' ' } Start-Sleep -Seconds 1 # Prevent nginx 429 error (too many requests) } } |