NoGit.psm1

#Region '.\Public\Get-NoGitHubRepoContents.ps1' -1

function Get-NoGitHubRepoContents {
    <#
    .SYNOPSIS
    Recursively downloads the contents of a GitHub repository to a local directory using the GitHub REST API.
 
    .DESCRIPTION
    This function connects to the GitHub API and performs a breadth-first traversal of the specified repository
    and branch. It downloads each file to a local directory, preserving folder structure. A typed queue is used
    instead of recursion for safe and scalable folder traversal.
 
    .PARAMETER Token
    GitHub personal access token for authentication.
 
    .PARAMETER Owner
    GitHub username or organization name.
 
    .PARAMETER Repo
    Name of the GitHub repository.
 
    .PARAMETER Branch
    Optional. Branch to download from. Defaults to 'main'.
 
    .PARAMETER TargetDir
    Local directory path to save downloaded content.
 
    .EXAMPLE
    Get-NoGitHubRepoContents -Token 'ghp_...' -Owner 'octocat' -Repo 'Hello-World' -TargetDir 'C:\Git\Hello' -Verbose
    #>


    [CmdletBinding()]
    param (
        [Parameter(Mandatory)]
        [string] $Token,

        [Parameter(Mandatory)]
        [string] $Owner,

        [Parameter(Mandatory)]
        [string] $Repo,

        [string] $Branch = 'main',

        [Parameter(Mandatory)]
        [string] $TargetDir
    )

    # Start a timer to measure execution time
    $stopwatch = [System.Diagnostics.Stopwatch]::StartNew()

    # Set up GitHub API request headers
    $headers = @{
        Authorization = "token $Token"
        'User-Agent'  = $Owner
    }

    # Ensure target directory exists
    if (-not (Test-Path -Path $TargetDir)) {
        try {
            New-Item -ItemType Directory -Path $TargetDir -Force | Out-Null
            Write-Verbose "Created directory: $TargetDir"
        }
        catch {
            Write-Error "Failed to create directory: $TargetDir - $($_.Exception.Message)"
            return
        }
    }

    # Initialize counters for success and failure
    $SuccessCount = 0
    $FailCount = 0

    # Initialize a strongly typed queue for BFS directory traversal
    $queue = [System.Collections.Generic.Queue[PSObject]]::new()
    $queue.Enqueue([PSCustomObject]@{
            Url     = "https://api.github.com/repos/$Owner/$Repo/contents?ref=$Branch"
            RelPath = ''
        })

    # Process directories and files in a breadth-first manner
    while ($queue.Count -gt 0) {
        $current = $queue.Dequeue()

        try {
            $items = Invoke-RestMethod -Uri $current.Url -Headers $headers -ErrorAction Stop -Verbose:$false
            Write-Verbose "Fetched: $($current.Url) ($(@($items).Count) item(s))"
        }
        catch {
            Write-Error "Failed to fetch: $($current.Url) - $($_.Exception.Message)"
            $FailCount++
            continue
        }

        foreach ($item in $items) {
            $path = if ($current.RelPath) {
                Join-Path -Path $current.RelPath -ChildPath $item.name
            }
            else {
                $item.name
            }
        
            if ($item.type -eq 'dir') {
                $nextUrl = if ($item.url -like '*?ref=*') { $item.url } else { "$($item.url)?ref=$Branch" }
                $queue.Enqueue([PSCustomObject]@{
                        Url     = $nextUrl
                        RelPath = $path
                    })
                continue
            }
        
            # Are there any other item types?
            if ($item.type -ne 'file') {
                continue
            }

            $outPath = Join-Path $TargetDir $path
            $outDir = Split-Path $outPath -Parent

            try {
                if (-not (Test-Path $outDir)) {
                    New-Item -ItemType Directory -Path $outDir -Force | Out-Null
                }

                Invoke-WebRequest -Uri $item.download_url -Headers $headers -OutFile $outPath -ErrorAction Stop -Verbose:$false
                Write-Verbose "Downloaded: $path"
                $SuccessCount++
            }
            catch {
                Write-Error "Failed to download: $path - $($_.Exception.Message)"
                $FailCount++
            }
        }
    }

    # Stop the timer and show summary
    $stopwatch.Stop()
    $elapsed = $stopwatch.Elapsed
    $formattedTime = '{0:D2}:{1:D2}:{2:D2}' -f $elapsed.Hours, $elapsed.Minutes, $elapsed.Seconds

    Write-Verbose "--- Summary for $Owner/$Repo ---"
    Write-Verbose ("Success : {0}" -f $SuccessCount)
    Write-Verbose ("Fail : {0}" -f $FailCount)
    Write-Verbose ("OutputDir : {0}" -f $TargetDir)
    Write-Verbose ("Elapsed : {0}" -f $formattedTime)
}
#EndRegion '.\Public\Get-NoGitHubRepoContents.ps1' 145
#Region '.\Public\Get-NoGitHubRepoTreeContents.ps1' -1

function Get-NoGitHubRepoTreeContents { 
    <#
    .SYNOPSIS
        Downloads files from a GitHub repository using the Git Trees API.
 
    .DESCRIPTION
        Uses the Git Trees API to recursively traverse a repository tree
        and download all blob (file) entries to a local directory.
 
        The -SourcePath parameter specifies where within the repository to begin copying.
        Only files under this path are downloaded, and the SourcePath itself is removed
        from the output folder structure. Subfolders under SourcePath are preserved.
 
        Handles SHA resolution, recursive trees, blob retrieval, and writes
        file contents to disk. Skips directories and submodules.
 
    .PARAMETER Token
        The GitHub Personal Access Token (PAT).
 
        Use a fine-grained personal access token with `repo contents:read` permission:
        https://github.com/settings/personal-access-tokens
 
    .PARAMETER Owner
        The repository owner (user or organization).
 
    .PARAMETER Repo
        The name of the repository.
 
    .PARAMETER Branch
        The branch to download (default: main).
 
    .PARAMETER TargetDir
        Directory to save the files to.
 
    .PARAMETER SourcePath
        The folder or path within the repository to start copying from.
 
        - Acts as a starting point filter.
        - Downloads all files and subfolders under this path, recursively.
        - The SourcePath folder itself is not included in your local output – only its contents and subfolders are preserved.
 
        For example:
            If SourcePath is 'Build/DTect' and TargetDir is 'C:\Temp\DTect',
            then a file at 'Build/DTect/0.0.637/file.psd1' in the repo will be saved as:
                'C:\Temp\DTect\0.0.637\file.psd1'
 
        Note:
            Any directory path in the repository that matches SourcePath will be downloaded.
 
        This is useful when you want to extract specific folders or modules from a repository
        without keeping their entire parent folder structure.
 
    .EXAMPLE
        Get-NoGitHubRepoTreeContents -Token 'abc' -Owner 'octocat' -Repo 'Hello-World' -TargetDir './repo' -SourcePath 'Build/DTect'
 
    .NOTES
        Use Get-NoGitHubRepoTreeContents when working with repositories that:
 
        - Contain directories with large numbers of files (over 1000), where the standard Contents API may truncate results.
        - Require efficient retrieval and fine-grained filtering of specific subfolders and their contents.
 
        This approach ensures reliable downloads without missing files due to API listing limits.
 
        For more details and examples, see:
        https://github.com/kevinblumenfeld/NoGit
 
    #>

    [CmdletBinding()]
    param (
        [Parameter(Mandatory)]
        [string]
        $Token,

        [Parameter(Mandatory)]
        [string]
        $Owner,

        [Parameter(Mandatory)]
        [string]
        $Repo,

        [Parameter()]
        [string] 
        $Branch = 'main',

        [Parameter(Mandatory)]
        [string]
        $TargetDir,

        [string[]]
        $SourcePath
    )

    $stopwatch = [System.Diagnostics.Stopwatch]::StartNew()

    $headers = @{
        Authorization = "token $Token"
        'User-Agent'  = 'NoGit'
        Accept        = 'application/vnd.github+json'
    }

    if (-not (Test-Path -Path $TargetDir)) {
        New-Item -ItemType Directory -Path $TargetDir -Force | Out-Null
    }

    # Step 1: Get commit SHA from branch reference
    $refUrl = "https://api.github.com/repos/$Owner/$Repo/git/refs/heads/$Branch"
    try {
        $refResponse = Invoke-RestMethod -Uri $refUrl -Headers $headers
        $commitUrl = $refResponse.object.url
    }
    catch {
        Write-Error "Failed to resolve branch '$Branch'. Verify that it exists."
        return
    }

    # Step 2: Get commit object to find tree SHA
    $commitResponse = Invoke-RestMethod -Uri $commitUrl -Headers $headers
    $treeSha = $commitResponse.tree.sha

    # Step 3: Get tree recursively
    $treeUrl = "https://api.github.com/repos/$Owner/$Repo/git/trees/${treeSha}?recursive=1"
    $treeResponse = Invoke-RestMethod -Uri $treeUrl -Headers $headers

    if ($treeResponse.truncated -eq $true) {
        Write-Warning "Tree listing was truncated. Not all files may be downloaded."
    }

    $script:SuccessCount = 0
    $script:FailCount = 0

    foreach ($entry in $treeResponse.tree) {
        if ($entry.type -ne 'blob') { continue }

        # 🔹 Only process paths matching SourcePath if specified
        if ($SourcePath -and (-not ($SourcePath | ForEach-Object { $entry.path -like "$_*" }))) {
            continue
        }

        # 🔹 Strip SourcePath prefix from path before joining with TargetDir
        $relativePath = $entry.path
        foreach ($src in $SourcePath) {
            if ($relativePath -like "$src*") {
                $relativePath = $relativePath.Substring($src.Length).TrimStart('/', '\')
                break
            }
        }

        $outputPath = Join-Path -Path $TargetDir -ChildPath $relativePath
        $outputDir = Split-Path -Path $outputPath -Parent

        if (-not (Test-Path -Path $outputDir)) {
            New-Item -ItemType Directory -Path $outputDir -Force | Out-Null
        }

        try {
            $blobUrl = "https://api.github.com/repos/$Owner/$Repo/git/blobs/$($entry.sha)"
            $blobHeaders = $headers.Clone()
            $blobHeaders['Accept'] = 'application/vnd.github.v3.raw'

            Invoke-WebRequest -Uri $blobUrl -Headers $blobHeaders -OutFile $outputPath -Verbose:$false
            Write-Verbose "Downloaded: $($entry.path) -> $relativePath"
            $script:SuccessCount++
        }
        catch {
            Write-Error "Failed to download: $($entry.path) - $_"
            $script:FailCount++
        }
    }

    $stopwatch.Stop()

    $elapsed = $stopwatch.Elapsed
    $formattedTime = '{0:D2}:{1:D2}:{2:D2}' -f $elapsed.Hours, $elapsed.Minutes, $elapsed.Seconds

    Write-Verbose "--- Summary for $Owner/$Repo ---"
    Write-Verbose ("Success : {0}" -f $script:SuccessCount)
    Write-Verbose ("Fail : {0}" -f $script:FailCount)
    Write-Verbose ("OutputDir : {0}" -f $TargetDir)
    Write-Verbose ("Elapsed : {0}" -f $formattedTime)
}
#EndRegion '.\Public\Get-NoGitHubRepoTreeContents.ps1' 182