Public/ConvertTo-AIPrompt.ps1

function ConvertTo-AIPrompt {
    <#
    .SYNOPSIS
        Converts a GitHub repository into a single XML file optimized for AI tools.
 
    .DESCRIPTION
        This function downloads files from a GitHub repository and packages them into a single XML file
        that can be easily used with AI tools like ChatGPT, Claude, Gemini, etc.
         
        The repository content is organized into a structured format with each file's content
        encapsulated in separate document sections with paths and other metadata.
 
    .PARAMETER RepoSlug
        The GitHub repository slug in format 'owner/repo'. Optional subfolder can be specified using 'owner/repo/subfolder'.
 
    .PARAMETER OutputPath
        Path to save the generated XML file. If not provided, the output is returned as a string.
 
    .PARAMETER Exclude
        Array of file patterns to exclude (wildcards supported, e.g., *.jpg, *.xlsx).
        By default, common binary and non-text formats are excluded (see Notes for the list).
 
    .PARAMETER Include
        Array of file patterns to include (wildcards supported, e.g., *.ps1, *.md). If not specified, all files are included.
 
    .PARAMETER Token
        GitHub API token for private repositories. Optional for public repos but recommended to avoid rate limiting.
        If not provided, the function will attempt to use $env:GITHUB_TOKEN.
 
    .PARAMETER IncludeBinary
        Switch to override the default binary file exclusions. When specified, only the files explicitly
        mentioned in the Exclude parameter will be excluded.
 
    .EXAMPLE
        ConvertTo-AIPrompt -RepoSlug "dfinke/ImportExcel" -OutputPath "D:\ImportExcel.xml" -Exclude "*.xlsx","*.jpg"
         
        Exports the entire dfinke/ImportExcel repository, excluding xlsx and jpg files and all default binary formats.
 
    .EXAMPLE
        ConvertTo-AIPrompt -RepoSlug "dfinke/ImportExcel/Examples" -Include "*.ps1","*.md" | Set-Content -Path "ExcelExamples.xml"
         
        Exports only PowerShell and Markdown files from the Examples folder of the ImportExcel repository.
 
    .EXAMPLE
        ConvertTo-AIPrompt -RepoSlug "owner/repo" -IncludeBinary
         
        Exports all files from the repository, including binary files that would normally be excluded.
 
    .NOTES
        Requires connectivity to api.github.com.
        Consider using a token to avoid GitHub API rate limits.
        You can set $env:GITHUB_TOKEN environment variable for authentication instead of passing the token parameter.
         
        Default excluded binary and non-text formats:
        - Images: *.jpg, *.jpeg, *.png, *.gif, *.bmp, *.ico, *.svg, *.webp
        - Documents: *.pdf, *.docx, *.xlsx, *.pptx, *.odt, *.ods, *.odp
        - Archives: *.zip, *.tar, *.gz, *.7z, *.rar
        - Executables: *.exe, *.dll, *.so, *.dylib, *.bin
        - Media: *.mp3, *.mp4, *.wav, *.avi, *.mov, *.flac, *.mkv
        - Others: *.dat, *.db, *.sqlite, *.pyc, *.class, *.jar, *.iso, *.pdb
    #>

    
    [CmdletBinding()]
    param(
        [Parameter(Mandatory = $true, Position = 0)]
        [string]$RepoSlug,
        
        [Parameter(Mandatory = $false)]
        [string]$OutputPath,
        
        [Parameter(Mandatory = $false)]
        [string[]]$Exclude,
        
        [Parameter(Mandatory = $false)]
        [string[]]$Include,
        
        [Parameter(Mandatory = $false)]
        [string]$Token,
        
        [Parameter(Mandatory = $false)]
        [switch]$IncludeBinary
    )

    # Define common binary file formats to exclude by default
    $defaultBinaryExclusions = @(
        # Images
        "*.jpg", "*.jpeg", "*.png", "*.gif", "*.bmp", "*.ico", "*.svg", "*.webp",
        # Documents
        "*.pdf", "*.docx", "*.xlsx", "*.pptx", "*.odt", "*.ods", "*.odp",
        # Archives
        "*.zip", "*.tar", "*.gz", "*.7z", "*.rar",
        # Executables
        "*.exe", "*.dll", "*.so", "*.dylib", "*.bin",
        # Media
        "*.mp3", "*.mp4", "*.wav", "*.avi", "*.mov", "*.flac", "*.mkv",
        # Others
        "*.dat", "*.db", "*.sqlite", "*.pyc", "*.class", "*.jar", "*.iso", "*.pdb"
    )
    
    # Merge default exclusions with user-provided ones unless IncludeBinary is specified
    if (-not $IncludeBinary) {
        if ($Exclude) {
            $Exclude = $Exclude + $defaultBinaryExclusions | Select-Object -Unique
        }
        else {
            $Exclude = $defaultBinaryExclusions
        }
        Write-Verbose "Excluding binary files by default. Use -IncludeBinary to override."
    }

    # Parse repository information
    $repoInfo = $RepoSlug -split '/'
    if ($repoInfo.Count -lt 2) {
        throw "Invalid repository slug format. Expected 'owner/repo' or 'owner/repo/subfolder'."
    }

    $owner = $repoInfo[0]
    $repo = $repoInfo[1]
    
    # Check if a specific subfolder was requested
    $subfolder = ""
    $originalSubfolder = ""
    if ($repoInfo.Count -gt 2) {
        $originalSubfolder = [string]::Join('/', $repoInfo[2..$($repoInfo.Count - 1)])
        $subfolder = $originalSubfolder
    }

    Write-Verbose "Processing repository: $owner/$repo, subfolder: $($subfolder ? $subfolder : '(root)')"
    
    if ($Exclude -and $Exclude.Count -gt 0) {
        Write-Verbose "Excluding file patterns: $($Exclude -join ', ')"
    }
    
    if ($Include -and $Include.Count -gt 0) {
        Write-Verbose "Including only file patterns: $($Include -join ', ')"
    }

    # Setup API headers
    $headers = @{
        'Accept' = 'application/vnd.github.v3+json'
    }
    
    # Add token if provided, otherwise check for environment variable
    if ($Token) {
        Write-Verbose "Using provided token for authentication"
        $headers['Authorization'] = "token $Token"
    }
    elseif ($env:GITHUB_TOKEN) {
        Write-Verbose "Using GITHUB_TOKEN environment variable for authentication"
        $headers['Authorization'] = "token $env:GITHUB_TOKEN"
    }
    else {
        Write-Verbose "No authentication token provided. Accessing public repositories only."
    }

    # First check if the repository exists and get the correct case for the repo name
    try {
        Write-Progress -Activity "Verifying Repository" -Status "Checking $owner/$repo" -PercentComplete 0
        $repoUrl = "https://api.github.com/repos/$owner/$repo"
        Write-Verbose "Verifying repository: $repoUrl"
        $repoInfo = Invoke-RestMethod -Uri $repoUrl -Headers $headers -ErrorAction Stop
        
        # Use the correct case from the API response
        $owner = $repoInfo.owner.login
        $repo = $repoInfo.name
        
        Write-Verbose "Using repository with correct case: $owner/$repo"
    }
    catch {
        Write-Progress -Activity "Verifying Repository" -Status "Repository not found, trying case-insensitive search" -PercentComplete 50
        
        if ($_ -match "404") {
            Write-Verbose "Repository not found with exact case. Attempting case-insensitive search..."
            
            # Try to search for the repository using the GitHub search API
            try {
                # Use the search API to find repositories case-insensitively
                $searchUrl = "https://api.github.com/search/repositories?q=$repo+user:$owner"
                Write-Verbose "Searching for repository: $searchUrl"
                $searchResult = Invoke-RestMethod -Uri $searchUrl -Headers $headers -ErrorAction Stop
                
                # Check if any repositories were found
                if ($searchResult.total_count -gt 0) {
                    # Find the repository that matches case-insensitively
                    $matchedRepo = $searchResult.items | Where-Object { $_.name -ieq $repo -and $_.owner.login -ieq $owner } | Select-Object -First 1
                    
                    if ($matchedRepo) {
                        # Use the correct case from the search results
                        $owner = $matchedRepo.owner.login
                        $repo = $matchedRepo.name
                        
                        Write-Verbose "Found repository with correct case: $owner/$repo"
                        Write-Progress -Activity "Verifying Repository" -Completed
                    }
                    else {
                        Write-Progress -Activity "Verifying Repository" -Completed
                        throw "Repository not found: $owner/$repo. Please check that the repository exists and is spelled correctly."
                    }
                }
                else {
                    Write-Progress -Activity "Verifying Repository" -Completed
                    throw "Repository not found: $owner/$repo. Please check that the repository exists and is spelled correctly."
                }
            }
            catch {
                Write-Progress -Activity "Verifying Repository" -Completed
                throw "Repository not found: $owner/$repo. Please check that the repository exists and is spelled correctly. Error: $_"
            }
        }
        else {
            Write-Progress -Activity "Verifying Repository" -Completed
            throw "Error accessing repository information: $_"
        }
    }

    # Function to recursively get all files from a path in the repo
    function Get-RepoContents {
        param (
            [string]$Path,
            [hashtable]$Headers,
            [string]$Owner,
            [string]$Repo
        )

        # Correctly format the URL for the GitHub API
        # If the path is empty, don't include it in the URL
        $apiPath = if ([string]::IsNullOrEmpty($Path)) { "" } else { "/$Path" }
        $url = "https://api.github.com/repos/$Owner/$Repo/contents$apiPath"
        
        Write-Verbose "Fetching: $url"
        Write-Progress -Activity "Discovering Files" -Status "Scanning $Owner/$Repo/$Path" -PercentComplete -1
        
        try {
            $response = Invoke-RestMethod -Uri $url -Headers $Headers -ErrorAction Stop
            
            $files = @()
            
            # Handle case when response is a single item (not an array)
            if ($response -isnot [System.Array]) {
                $response = @($response)
            }
            
            foreach ($item in $response) {
                if ($item.type -eq "dir") {
                    # Show progress when navigating directories
                    Write-Progress -Activity "Discovering Files" -Status "Scanning directory: $($item.path)" -PercentComplete -1
                    
                    # Recursively get files from subdirectory
                    $subFiles = Get-RepoContents -Path $item.path -Headers $Headers -Owner $Owner -Repo $Repo
                    $files += $subFiles
                }
                elseif ($item.type -eq "file") {
                    # Check if file should be excluded
                    $shouldExclude = $false
                    if ($Exclude) {
                        foreach ($pattern in $Exclude) {
                            if ($item.name -like $pattern) {
                                $shouldExclude = $true
                                Write-Verbose "Excluding file (matched pattern '$pattern'): $($item.path)"
                                break
                            }
                        }
                    }
                    
                    # Check if file should be included
                    $shouldInclude = $true
                    if ($Include) {
                        $shouldInclude = $false
                        foreach ($pattern in $Include) {
                            if ($item.name -like $pattern) {
                                $shouldInclude = $true
                                break
                            }
                        }
                        
                        if (-not $shouldInclude) {
                            Write-Verbose "Skipping file (no match in Include patterns): $($item.path)"
                        }
                    }
                    
                    if (-not $shouldExclude -and $shouldInclude) {
                        Write-Verbose "Including file: $($item.path)"
                        $files += $item
                    }
                }
            }
            
            return $files
        }
        catch {
            # Make error message more helpful
            if ($_ -match "404") {
                # If the subfolder isn't found, we'll try different case variations
                if (-not [string]::IsNullOrEmpty($Path)) {
                    Write-Verbose "Path not found, checking parent directory for case-insensitive match"
                    
                    # Get the parent directory
                    $parentPath = Split-Path -Path $Path -Parent
                    $leafName = Split-Path -Path $Path -Leaf
                    
                    # If we're already at the root, there's no parent to check
                    if ([string]::IsNullOrEmpty($parentPath)) {
                        Write-Error "Path not found: $Path. Check that the path exists and is spelled correctly (GitHub is case-sensitive)."
                        throw
                    }
                    
                    try {
                        # Get the contents of the parent directory
                        $parentUrl = "https://api.github.com/repos/$Owner/$Repo/contents/$parentPath"
                        $parentContents = Invoke-RestMethod -Uri $parentUrl -Headers $Headers
                        
                        # Handle case when response is a single item (not an array)
                        if ($parentContents -isnot [System.Array]) {
                            $parentContents = @($parentContents)
                        }
                        
                        # Look for a case-insensitive match for the directory
                        foreach ($item in $parentContents) {
                            if ($item.type -eq "dir" -and $item.name -ieq $leafName) {
                                Write-Verbose "Found case-insensitive match: $($item.name) instead of $leafName"
                                # Use the correct case from the API response
                                return Get-RepoContents -Path $item.path -Headers $Headers -Owner $Owner -Repo $Repo
                            }
                        }

                        # If we're looking for a file (not a directory), check for case-insensitive file matches
                        # This is needed for direct file access like owner/repo/path/to/file.ps1
                        foreach ($item in $parentContents) {
                            if ($item.type -eq "file" -and $item.name -ieq $leafName) {
                                Write-Verbose "Found case-insensitive file match: $($item.name) instead of $leafName"
                                # Return just this file as an array with one item
                                return @($item)
                            }
                        }
                    }
                    catch {
                        # If we can't check parent, just show the original error
                        Write-Error "Path not found: $Path. Check that the path exists and is spelled correctly (GitHub is case-sensitive)."
                        throw
                    }
                }
                
                Write-Error "Repository or path not found: $url. Make sure the repository and subfolder exist and are accessible."
            } 
            else {
                Write-Error "Failed to get repository contents: $_"
            }
            throw
        }
    }

    # Get all files from the repository
    $allFiles = @()
    try {
        Write-Progress -Activity "Discovering Files" -Status "Scanning repository structure" -PercentComplete 0
        
        # First try with the original subfolder case
        $errorActionPreference = $ErrorActionPreference
        try {
            # Temporarily suppress errors during the first attempt
            $ErrorActionPreference = 'SilentlyContinue'
            $firstAttemptError = $null
            
            # Capture any error that occurs
            try {
                $allFiles = Get-RepoContents -Path $subfolder -Headers $headers -Owner $owner -Repo $repo -ErrorVariable firstAttemptError -ErrorAction SilentlyContinue
            }
            catch {
                $firstAttemptError = $_
            }
            
            # If there was an error and it's a 404, try case-insensitive approach
            if ($firstAttemptError -and $firstAttemptError.ToString() -match "404" -and -not [string]::IsNullOrEmpty($originalSubfolder)) {
                Write-Verbose "Subfolder not found with provided case. Attempting case-insensitive subfolder search..."
                
                # Try to find the correct case for the subfolder by navigating case-insensitively
                $foundSubfolderPath = ""
                $pathParts = $originalSubfolder -split '/'
                $currentPath = ""
                
                # Iterate through each level of the path to find correct case
                foreach ($part in $pathParts) {
                    try {
                        # Get the current directory contents
                        $parentUrl = if ([string]::IsNullOrEmpty($currentPath)) {
                            "https://api.github.com/repos/$owner/$repo/contents"
                        }
                        else {
                            "https://api.github.com/repos/$owner/$repo/contents/$currentPath"
                        }
                        
                        Write-Verbose "Checking directory: $parentUrl"
                        $parentContents = Invoke-RestMethod -Uri $parentUrl -Headers $headers
                        
                        # Handle case when response is a single item (not an array)
                        if ($parentContents -isnot [System.Array]) {
                            $parentContents = @($parentContents)
                        }
                        
                        # Find a case-insensitive match for this directory part
                        $found = $false
                        foreach ($item in $parentContents) {
                            if ($item.type -eq "dir" -and $item.name -ieq $part) {
                                # Use the correct case from the response
                                if ([string]::IsNullOrEmpty($foundSubfolderPath)) {
                                    $foundSubfolderPath = $item.name
                                }
                                else {
                                    $foundSubfolderPath = "$foundSubfolderPath/$($item.name)"
                                }
                                
                                $currentPath = $foundSubfolderPath
                                $found = $true
                                Write-Verbose "Found case-insensitive match for '$part': $($item.name)"
                                break
                            }
                            # Check if this is the last part of the path and might be a file
                            elseif ($part -eq $pathParts[-1] -and $item.type -eq "file" -and $item.name -ieq $part) {
                                # This is a direct file reference, return just this file
                                Write-Verbose "Found direct file reference with case-insensitive match: $($item.name)"
                                # Set the correct path for display
                                if ([string]::IsNullOrEmpty($foundSubfolderPath)) {
                                    $foundSubfolderPath = $item.name
                                }
                                else {
                                    $foundSubfolderPath = "$foundSubfolderPath/$($item.name)"
                                }
                                
                                # Restore error action preference
                                $ErrorActionPreference = $errorActionPreference
                                
                                # Return just this file
                                $allFiles = @($item)
                                $found = $true
                                break
                            }
                        }
                        
                        if (-not $found) {
                            # If we can't find a match for this part, the subfolder doesn't exist
                            throw "Subfolder part '$part' not found in path '$currentPath'"
                        }
                        
                        # If we found a direct file match, break out of the loop
                        if ($found -and $allFiles.Count -gt 0) {
                            break
                        }
                    }
                    catch {
                        Write-Verbose "Error finding path: $_"
                        throw "Path not found: $originalSubfolder. Check that it exists and is spelled correctly."
                    }
                }
                
                # If we found a path but haven't already retrieved a direct file
                if ($foundSubfolderPath -and $allFiles.Count -eq 0) {
                    Write-Verbose "Using path with correct case: $foundSubfolderPath (original: $originalSubfolder)"
                    $subfolder = $foundSubfolderPath
                    # Try again with the correct case path
                    # Restore error action preference for the final attempt with correct case
                    $ErrorActionPreference = $errorActionPreference
                    $allFiles = Get-RepoContents -Path $subfolder -Headers $headers -Owner $owner -Repo $repo
                }
                else {
                    # Restore error action preference before throwing
                    $ErrorActionPreference = $errorActionPreference
                    throw "Path not found: $originalSubfolder. Check that it exists and is spelled correctly."
                }
            }
            elseif ($firstAttemptError) {
                # Restore error action preference before re-throwing
                $ErrorActionPreference = $errorActionPreference
                throw $firstAttemptError
            }
        }
        finally {
            # Ensure error action preference is restored
            $ErrorActionPreference = $errorActionPreference
        }
        
        Write-Progress -Activity "Discovering Files" -Completed
    }
    catch {
        Write-Progress -Activity "Discovering Files" -Completed
        throw "Failed to retrieve repository contents: $_"
    }

    # If no files were found, inform the user
    if ($allFiles.Count -eq 0) {
        Write-Warning "No files found in repository $owner/$repo$(if ($subfolder) { "/$subfolder" })"
    }
    else {
        Write-Verbose "Found $($allFiles.Count) files to process"
    }

    # Generate the XML document
    $xmlOutput = [System.Text.StringBuilder]::new()
    [void]$xmlOutput.AppendLine('<?xml version="1.0" encoding="UTF-8"?>')
    [void]$xmlOutput.AppendLine('<documents>')
    
    $fileIndex = 1
    $totalFiles = $allFiles.Count
    
    # Process each file
    foreach ($file in $allFiles) {
        try {
            $percentComplete = [Math]::Min(100, [Math]::Round(($fileIndex / $totalFiles) * 100))
            Write-Progress -Activity "Processing Files" -Status "Processing file $fileIndex of $totalFiles" -CurrentOperation "$($file.path)" -PercentComplete $percentComplete
            
            Write-Verbose "Processing file: $($file.path)"
            
            # Get file content via GitHub API
            $fileUrl = $file.download_url
            if (-not $fileUrl) {
                Write-Warning "No download URL for $($file.path), skipping"
                continue
            }
            
            $fileContent = Invoke-RestMethod -Uri $fileUrl -Headers $headers -ErrorAction Stop
            
            # HTML decode the file content
            Add-Type -AssemblyName System.Web
            $decodedContent = [System.Web.HttpUtility]::HtmlDecode($fileContent)
            
            # Add document entry to XML
            [void]$xmlOutput.AppendLine(" <document index='$fileIndex'>")
            [void]$xmlOutput.AppendLine(" <source>$($file.path)</source>")
            [void]$xmlOutput.AppendLine(" <document_content>")
            [void]$xmlOutput.AppendLine(" $([System.Security.SecurityElement]::Escape($decodedContent))")
            [void]$xmlOutput.AppendLine(" </document_content>")
            [void]$xmlOutput.AppendLine(" </document>")
            
            $fileIndex++
        }
        catch {
            Write-Error "Error processing file $($file.path): $_"
        }
    }
    
    # Complete the progress bar
    Write-Progress -Activity "Processing Files" -Completed
    
    [void]$xmlOutput.AppendLine('</documents>')
    
    $result = $xmlOutput.ToString()
    
    # Either save to file or return as string
    if ($OutputPath) {
        $result | Out-File -FilePath $OutputPath -Encoding UTF8
        Write-Verbose "Output saved to: $OutputPath"
        return $OutputPath
    }
    else {
        return $result
    }
}