Commands/Get-JsonLD.ps1

function Get-JsonLD {
    <#
    .SYNOPSIS
        Gets JSON-LD data from a given URL.
    .DESCRIPTION
        Gets JSON Linked Data from a given URL.
        
        This is a format used by many websites to provide structured data about their content.
    .EXAMPLE
        # Want to get information about a movie? Linked Data to the rescue!
        Get-JsonLD -Url https://letterboxd.com/film/amelie/
    .EXAMPLE
        # Want information about an article? Lots of news sites use this format.
        Get-JsonLD https://www.thebulwark.com/p/mahmoud-khalil-immigration-detention-first-amendment-free-speech-rights
    .EXAMPLE
        # Want to get information about a schema?
        jsonld https://schema.org/Movie
        # Get-JSONLD will output the contents of a `@Graph` object if no `@type` is found.
    #>

    [Alias('jsonLD','json-ld')]
    param(
    # The URL that may contain JSON-LD data
    [Parameter(Mandatory, ValueFromPipeline, ValueFromPipelineByPropertyName)]
    [Alias('href')]
    [Uri]
    $Url,

    <#
    
    If set, will the output as:

    |as|is|
    |-|-|
    |html|the response as text|
    |json|the match as json|
    |*jsonld`|ld`|linkedData*|the match as linked data|'
    |script|the script tag|
    |xml|the script tag, as xml|
    
    #>
    
    [ValidateSet('html', 'json', 'jsonld', 'ld', 'linkedData', 'script', 'xml')]
    [string]
    $as = 'jsonld',

    [switch]
    $RawHtml,

    # If set, will force the request to be made even if the URL has already been cached.
    [switch]
    $Force
    )

    begin {
        # Create a pattern to match the JSON-LD script tag
        $linkedDataRegex = [Regex]::new(@'
(?<HTML_LinkedData>
<script # Match <script tag
\s{1,} # Then whitespace
type= # Then the type= attribute (this regex will only match if it is first)
[\"\'] # Double or Single Quotes
application/ld\+json # The type that indicates linked data
[\"\'] # Double or Single Quotes
[^>]{0,} # Match anything until the end of the start tag
\> # Match the end of the start tag
(?<JsonContent>(?:.|\s){0,}?(?=\z|</script>)) # Anything until the end tag is JSONContent
)
'@
, 'IgnoreCase,IgnorePatternWhitespace','00:00:00.1')

        # Initialize the cache for JSON-LD requests
        if (-not $script:Cache) {
            $script:Cache = [Ordered]@{}
        }

        filter output {
            $in = $_
            $mySelf = $MyInvocation.MyCommand
            if ($in.'@context' -is [string]) {
                $context  = $in.'@context'
            }
            if ($in.'@graph') {
                if ($in.pstypenames -ne 'application/ld+json') {
                    $in.pstypenames.insert(0,'application/ld+json')
                }
                foreach ($graphObject in $in.'@graph') {
                    $null = $graphObject |
                        & $mySelf
                }
            }
            elseif ($in.'@type') {

                $typeName = if ($context) {
                    $context, $in.'@type' -join '/'
                } else {
                    $in.'@type'
                }

                if ($in.pstypenames -ne 'application/ld+json') {
                    $in.pstypenames.insert(0,'application/ld+json')
                }
                if ($in.pstypenames -ne $typeName) {
                    $in.pstypenames.insert(0,$typeName)
                }

                foreach ($property in $in.psobject.properties) {
                    if ($property.value.'@type') {
                        $null = $property.value |
                            & $mySelf
                    }                    
                }                                
            }
            $in
        }

        $foreachFile = {
            $inFile = $_.FullName
            try {
                
                Get-Content -LiteralPath $_.FullName -Raw | 
                    ConvertFrom-Json |
                        output
            } catch {
                Write-Verbose "$($inFile.FullName) : $_"
            }
        }
    }

    process {        
        if ($url.IsFile -or 
            -not $url.AbsoluteUri
        ) {
            if (Test-Path $url.OriginalString) {
                Get-ChildItem $url.OriginalString -File |
                    Foreach-Object $foreachFile
            } elseif ($MyInvocation.MyCommand.Module -and 
                (Test-Path (
                    Join-Path (
                        $MyInvocation.MyCommand.Module | Split-Path
                    ) $url.OriginalString
                ))
            ) {
                Get-ChildItem -Path (
                    Join-Path (
                        $MyInvocation.MyCommand.Module | Split-Path
                    ) $url.OriginalString  
                ) -File |
                    Foreach-Object $foreachFile
            }
            
            return
        }
            
        $restResponse = 
            if ($Force -or -not $script:Cache[$url]) {
                $script:Cache[$url] = Invoke-RestMethod -Uri $Url
                $script:Cache[$url]
            } else {
                $script:Cache[$url]
            }

        if ($as -eq 'html') {
            return $restResponse
        }                
        
        # Find all linked data tags within the response
        foreach ($match in $linkedDataRegex.Matches("$restResponse")) {
            # If we want the result as xml
            if ($As -eq 'xml') {
                # try to cast it
                $matchXml ="$match" -as [xml]
                if ($matchXml) {
                    # and output it if found.
                    $matchXml
                    continue
                } else {
                    # otherwise, fall back to the `<script>` tag
                    $As = 'script'
                }
            }

            # If we want the tag, that should be the whole match
            if ($As -eq 'script') {
                "$match"
                continue
            }
            
            # If we want it as json, we have a match group.
            if ($As -eq 'json') {
                $match.Groups['JsonContent'].Value
                continue
            }
            # Otherwise, we want it as linked data, so convert from the json
            foreach ($jsonObject in 
                $match.Groups['JsonContent'].Value | 
                    ConvertFrom-Json
            ) {
                # If there was a `@type` or `@graph` property
                if (
                    $jsonObject.'@type' -or 
                    $jsonObject.'@graph'
                ) {
                    # output the object as jsonld
                    $jsonObject | output
                    continue                    
                }                
                # If there is neither a `@type` or a `@graph`
                else {
                    # just output the object.
                    $jsonObject
                }                
            }
        }        
    }
}