Public/Get-RoosSchlikker.ps1
|
function Get-RoosSchlikker { [CmdletBinding()] param() $DutchCulture = New-Object -TypeName System.Globalization.CultureInfo -ArgumentList 'nl-NL' curl --silent --location 'https://www.parool.nl/auteur/roos-schlikker' ` | Join-String -Separator ' ' | ConvertTo-HtmlDocument | ForEach-Object { $_.DocumentNode.SelectNodes('//a') } | Where-Object { $_.HasAttributes } | ForEach-Object { $_.GetAttributeValue('href', '') } | Where-Object { $_ -like 'https://www.parool.nl/columns-opinie/?*' } | ForEach-Object { $Document = curl --silent --location $_ | Join-String -Separator ' ' | ConvertTo-HtmlDocument $DateText = (($Document | Select-HtmlNode -XPath '//meta[@property="article:published_time"]').GetAttributeValue("content", "") -split 'T')[0] [PSCustomObject][Ordered]@{ PSTypeName = 'UncommonSense.Parool.Article' Url = $_ Date = [DateTime]::ParseExact($DateText, 'yyyy\-MM\-dd', $DutchCulture) Title = $Document | Select-HtmlNode -CssSelector h1 | Get-HtmlNodeText Body = (($Document | Select-HtmlNode -CssSelector 'p.z3lfzo5' -All | Get-HtmlNodeText -SkipRemoveLineBreaks) -join ' ') -replace 'Reageren\? r\.schlikker@parool\.nl\.?', '' -replace 'Roos Schlikker \(1975\) is journalist en schrijfster van boeken en toneelstukken\. Elke zaterdag schrijft ze een column voor Het Parool\.\s*$', '' } } } |