Public/Get-Spotprent.ps1
function Get-Spotprent { $DutchCulture = New-Object -TypeName System.Globalization.CultureInfo -ArgumentList 'nl-NL' [string[]]$PossibleDateFormats = @('d MMMM yyyy', 'd MMMM') $Content = Invoke-WebRequest -Uri https://www.trouw.nl/achterpagina/spotprenten~b529bf91/ | Select-Object -ExpandProperty Content $Container = $Content | ConvertTo-HtmlDocument | Select-HtmlNode -CssSelector '.artstyle__main--container' $Dates = $Container.ChildNodes | Where-Object NodeType -EQ Element | Where-Object Name -In 'h3', 'p' | ForEach-Object { $_ | Get-HtmlNodeText | ForEach-Object { $_ -replace 'augsutus', 'augustus' } # work around a typo | Where-Object { $_ -notlike '*Meer*spotprenten*' } | ForEach-Object { [DateTime]::ParseExact($_, $PossibleDateFormats, $DutchCulture, 'AllowWhiteSpaces') } } $PreviousDate = [DateTime]::MaxValue $Dates = $Dates | ForEach-Object { if ($_ -gt $PreviousDate) { $PreviousDate = $_.AddYears(-1) $_.AddYears(-1) } else { $PreviousDate = $_ $_ } } $Images = $Container.Elements('figure') | ForEach-Object { $_.Element('picture') } | ForEach-Object { $_.Element('img') } | ForEach-Object { $_.GetAttributes() } | Where-Object Name -EQ 'data-original' | Select-Object -ExpandProperty Value (0..($Images.Length - 1)) | ForEach-Object { [PSCustomObject]@{ PSTypeName = 'UncommonSense.Trouw.Article' Url = $Images[$_] Date = $Dates[$_] Title = 'Spotprent' Body = $Images[$_] } } } |