Public/ConvertTo-AIPrompt.ps1

function ConvertTo-AIPrompt {
    <#
    .SYNOPSIS
        Converts a GitHub repository into a single XML file optimized for AI tools.
 
    .DESCRIPTION
        This function downloads files from a GitHub repository and packages them into a single XML file
        that can be easily used with AI tools like ChatGPT, Claude, Gemini, etc.
         
        The repository content is organized into a structured format with each file's content
        encapsulated in separate document sections with paths and other metadata.
 
    .PARAMETER RepoSlug
        The GitHub repository slug in format 'owner/repo'. Optional subfolder can be specified using 'owner/repo/subfolder'.
 
    .PARAMETER OutputPath
        Path to save the generated XML file. If not provided, the output is returned as a string.
 
    .PARAMETER Exclude
        Array of file patterns to exclude (wildcards supported, e.g., *.jpg, *.xlsx).
        By default, common binary and non-text formats are excluded (see Notes for the list).
 
    .PARAMETER Include
        Array of file patterns to include (wildcards supported, e.g., *.ps1, *.md). If not specified, all files are included.
 
    .PARAMETER Token
        GitHub API token for private repositories. Optional for public repos but recommended to avoid rate limiting.
        If not provided, the function will attempt to use $env:GITHUB_TOKEN.
 
    .PARAMETER IncludeBinary
        Switch to override the default binary file exclusions. When specified, only the files explicitly
        mentioned in the Exclude parameter will be excluded.
 
    .EXAMPLE
        ConvertTo-AIPrompt -RepoSlug "dfinke/ImportExcel" -OutputPath "D:\ImportExcel.xml" -Exclude "*.xlsx","*.jpg"
         
        Exports the entire dfinke/ImportExcel repository, excluding xlsx and jpg files and all default binary formats.
 
    .EXAMPLE
        ConvertTo-AIPrompt -RepoSlug "dfinke/ImportExcel/Examples" -Include "*.ps1","*.md" | Set-Content -Path "ExcelExamples.xml"
         
        Exports only PowerShell and Markdown files from the Examples folder of the ImportExcel repository.
 
    .EXAMPLE
        ConvertTo-AIPrompt -RepoSlug "owner/repo" -IncludeBinary
         
        Exports all files from the repository, including binary files that would normally be excluded.
 
    .NOTES
        Requires connectivity to api.github.com.
        Consider using a token to avoid GitHub API rate limits.
        You can set $env:GITHUB_TOKEN environment variable for authentication instead of passing the token parameter.
         
        Default excluded binary and non-text formats:
        - Images: *.jpg, *.jpeg, *.png, *.gif, *.bmp, *.ico, *.svg, *.webp
        - Documents: *.pdf, *.docx, *.xlsx, *.pptx, *.odt, *.ods, *.odp
        - Archives: *.zip, *.tar, *.gz, *.7z, *.rar
        - Executables: *.exe, *.dll, *.so, *.dylib, *.bin
        - Media: *.mp3, *.mp4, *.wav, *.avi, *.mov, *.flac, *.mkv
        - Others: *.dat, *.db, *.sqlite, *.pyc, *.class, *.jar, *.iso, *.pdb
    #>

    
    [CmdletBinding()]
    param(
        [Parameter(Mandatory = $true, Position = 0)]
        [string]$RepoSlug,
        
        [Parameter(Mandatory = $false)]
        [string]$OutputPath,
        
        [Parameter(Mandatory = $false)]
        [string[]]$Exclude,
        
        [Parameter(Mandatory = $false)]
        [string[]]$Include,
        
        [Parameter(Mandatory = $false)]
        [string]$Token,
        
        [Parameter(Mandatory = $false)]
        [switch]$IncludeBinary
    )

    # Define common binary file formats to exclude by default
    $defaultBinaryExclusions = @(
        # Images
        "*.jpg", "*.jpeg", "*.png", "*.gif", "*.bmp", "*.ico", "*.svg", "*.webp",
        # Documents
        "*.pdf", "*.docx", "*.xlsx", "*.pptx", "*.odt", "*.ods", "*.odp",
        # Archives
        "*.zip", "*.tar", "*.gz", "*.7z", "*.rar",
        # Executables
        "*.exe", "*.dll", "*.so", "*.dylib", "*.bin",
        # Media
        "*.mp3", "*.mp4", "*.wav", "*.avi", "*.mov", "*.flac", "*.mkv",
        # Others
        "*.dat", "*.db", "*.sqlite", "*.pyc", "*.class", "*.jar", "*.iso", "*.pdb"
    )
    
    # Merge default exclusions with user-provided ones unless IncludeBinary is specified
    if (-not $IncludeBinary) {
        if ($Exclude) {
            $Exclude = $Exclude + $defaultBinaryExclusions | Select-Object -Unique
        }
        else {
            $Exclude = $defaultBinaryExclusions
        }
        Write-Verbose "Excluding binary files by default. Use -IncludeBinary to override."
    }

    # Parse repository information
    $repoInfo = $RepoSlug -split '/'
    if ($repoInfo.Count -lt 2) {
        throw "Invalid repository slug format. Expected 'owner/repo' or 'owner/repo/subfolder'."
    }

    $owner = $repoInfo[0]
    $repo = $repoInfo[1]
    
    # Check if a specific subfolder was requested
    $subfolder = ""
    if ($repoInfo.Count -gt 2) {
        $subfolder = [string]::Join('/', $repoInfo[2..$($repoInfo.Count - 1)])
    }

    Write-Verbose "Processing repository: $owner/$repo, subfolder: $($subfolder ? $subfolder : '(root)')"
    
    if ($Exclude -and $Exclude.Count -gt 0) {
        Write-Verbose "Excluding file patterns: $($Exclude -join ', ')"
    }
    
    if ($Include -and $Include.Count -gt 0) {
        Write-Verbose "Including only file patterns: $($Include -join ', ')"
    }

    # Setup API headers
    $headers = @{
        'Accept' = 'application/vnd.github.v3+json'
    }
    
    # Add token if provided, otherwise check for environment variable
    if ($Token) {
        Write-Verbose "Using provided token for authentication"
        $headers['Authorization'] = "token $Token"
    }
    elseif ($env:GITHUB_TOKEN) {
        Write-Verbose "Using GITHUB_TOKEN environment variable for authentication"
        $headers['Authorization'] = "token $env:GITHUB_TOKEN"
    }
    else {
        Write-Verbose "No authentication token provided. Accessing public repositories only."
    }

    # First check if the repository exists and get the correct case for the repo name
    try {
        Write-Progress -Activity "Verifying Repository" -Status "Checking $owner/$repo" -PercentComplete 0
        $repoUrl = "https://api.github.com/repos/$owner/$repo"
        Write-Verbose "Verifying repository: $repoUrl"
        $repoInfo = Invoke-RestMethod -Uri $repoUrl -Headers $headers -ErrorAction Stop
        
        # Use the correct case from the API response
        $owner = $repoInfo.owner.login
        $repo = $repoInfo.name
        
        Write-Verbose "Using repository with correct case: $owner/$repo"
    }
    catch {
        Write-Progress -Activity "Verifying Repository" -Completed
        if ($_ -match "404") {
            throw "Repository not found: $owner/$repo. Please check that the repository exists and is spelled correctly."
        }
        else {
            throw "Error accessing repository information: $_"
        }
    }

    # Function to recursively get all files from a path in the repo
    function Get-RepoContents {
        param (
            [string]$Path,
            [hashtable]$Headers,
            [string]$Owner,
            [string]$Repo
        )

        # Correctly format the URL for the GitHub API
        # If the path is empty, don't include it in the URL
        $apiPath = if ([string]::IsNullOrEmpty($Path)) { "" } else { "/$Path" }
        $url = "https://api.github.com/repos/$Owner/$Repo/contents$apiPath"
        
        Write-Verbose "Fetching: $url"
        Write-Progress -Activity "Discovering Files" -Status "Scanning $Owner/$Repo/$Path" -PercentComplete -1
        
        try {
            $response = Invoke-RestMethod -Uri $url -Headers $Headers -ErrorAction Stop
            
            $files = @()
            
            # Handle case when response is a single item (not an array)
            if ($response -isnot [System.Array]) {
                $response = @($response)
            }
            
            foreach ($item in $response) {
                if ($item.type -eq "dir") {
                    # Show progress when navigating directories
                    Write-Progress -Activity "Discovering Files" -Status "Scanning directory: $($item.path)" -PercentComplete -1
                    
                    # Recursively get files from subdirectory
                    $subFiles = Get-RepoContents -Path $item.path -Headers $Headers -Owner $Owner -Repo $Repo
                    $files += $subFiles
                }
                elseif ($item.type -eq "file") {
                    # Check if file should be excluded
                    $shouldExclude = $false
                    if ($Exclude) {
                        foreach ($pattern in $Exclude) {
                            if ($item.name -like $pattern) {
                                $shouldExclude = $true
                                Write-Verbose "Excluding file (matched pattern '$pattern'): $($item.path)"
                                break
                            }
                        }
                    }
                    
                    # Check if file should be included
                    $shouldInclude = $true
                    if ($Include) {
                        $shouldInclude = $false
                        foreach ($pattern in $Include) {
                            if ($item.name -like $pattern) {
                                $shouldInclude = $true
                                break
                            }
                        }
                        
                        if (-not $shouldInclude) {
                            Write-Verbose "Skipping file (no match in Include patterns): $($item.path)"
                        }
                    }
                    
                    if (-not $shouldExclude -and $shouldInclude) {
                        Write-Verbose "Including file: $($item.path)"
                        $files += $item
                    }
                }
            }
            
            return $files
        }
        catch {
            # Make error message more helpful
            if ($_ -match "404") {
                # If the subfolder isn't found, we'll try different case variations
                if (-not [string]::IsNullOrEmpty($Path)) {
                    Write-Verbose "Path not found, checking parent directory for case-insensitive match"
                    
                    # Get the parent directory
                    $parentPath = Split-Path -Path $Path -Parent
                    $leafName = Split-Path -Path $Path -Leaf
                    
                    # If we're already at the root, there's no parent to check
                    if ([string]::IsNullOrEmpty($parentPath)) {
                        Write-Error "Path not found: $Path. Check that the path exists and is spelled correctly (GitHub is case-sensitive)."
                        throw
                    }
                    
                    try {
                        # Get the contents of the parent directory
                        $parentUrl = "https://api.github.com/repos/$Owner/$Repo/contents/$parentPath"
                        $parentContents = Invoke-RestMethod -Uri $parentUrl -Headers $Headers
                        
                        # Handle case when response is a single item (not an array)
                        if ($parentContents -isnot [System.Array]) {
                            $parentContents = @($parentContents)
                        }
                        
                        # Look for a case-insensitive match for the directory
                        foreach ($item in $parentContents) {
                            if ($item.type -eq "dir" -and $item.name -ieq $leafName) {
                                Write-Verbose "Found case-insensitive match: $($item.name) instead of $leafName"
                                # Use the correct case from the API response
                                return Get-RepoContents -Path $item.path -Headers $Headers -Owner $Owner -Repo $Repo
                            }
                        }
                    }
                    catch {
                        # If we can't check parent, just show the original error
                        Write-Error "Path not found: $Path. Check that the path exists and is spelled correctly (GitHub is case-sensitive)."
                        throw
                    }
                }
                
                Write-Error "Repository or path not found: $url. Make sure the repository and subfolder exist and are accessible."
            } 
            else {
                Write-Error "Failed to get repository contents: $_"
            }
            throw
        }
    }

    # Get all files from the repository
    $allFiles = @()
    try {
        Write-Progress -Activity "Discovering Files" -Status "Scanning repository structure" -PercentComplete 0
        $allFiles = Get-RepoContents -Path $subfolder -Headers $headers -Owner $owner -Repo $repo
        Write-Progress -Activity "Discovering Files" -Completed
    }
    catch {
        Write-Progress -Activity "Discovering Files" -Completed
        throw "Failed to retrieve repository contents: $_"
    }

    # If no files were found, inform the user
    if ($allFiles.Count -eq 0) {
        Write-Warning "No files found in repository $owner/$repo$(if ($subfolder) { "/$subfolder" })"
    }
    else {
        Write-Verbose "Found $($allFiles.Count) files to process"
    }

    # Generate the XML document
    $xmlOutput = [System.Text.StringBuilder]::new()
    [void]$xmlOutput.AppendLine('<?xml version="1.0" encoding="UTF-8"?>')
    [void]$xmlOutput.AppendLine('<documents>')
    
    $fileIndex = 1
    $totalFiles = $allFiles.Count
    
    # Process each file
    foreach ($file in $allFiles) {
        try {
            $percentComplete = [Math]::Min(100, [Math]::Round(($fileIndex / $totalFiles) * 100))
            Write-Progress -Activity "Processing Files" -Status "Processing file $fileIndex of $totalFiles" -CurrentOperation "$($file.path)" -PercentComplete $percentComplete
            
            Write-Verbose "Processing file: $($file.path)"
            
            # Get file content via GitHub API
            $fileUrl = $file.download_url
            if (-not $fileUrl) {
                Write-Warning "No download URL for $($file.path), skipping"
                continue
            }
            
            $fileContent = Invoke-RestMethod -Uri $fileUrl -Headers $headers -ErrorAction Stop
            
            # Add document entry to XML
            [void]$xmlOutput.AppendLine(" <document index='$fileIndex'>")
            [void]$xmlOutput.AppendLine(" <source>$($file.path)</source>")
            [void]$xmlOutput.AppendLine(" <document_content>")
            [void]$xmlOutput.AppendLine(" $([System.Security.SecurityElement]::Escape($fileContent))")
            [void]$xmlOutput.AppendLine(" </document_content>")
            [void]$xmlOutput.AppendLine(" </document>")
            
            $fileIndex++
        }
        catch {
            Write-Error "Error processing file $($file.path): $_"
        }
    }
    
    # Complete the progress bar
    Write-Progress -Activity "Processing Files" -Completed
    
    [void]$xmlOutput.AppendLine('</documents>')
    
    $result = $xmlOutput.ToString()
    
    # Either save to file or return as string
    if ($OutputPath) {
        $result | Out-File -FilePath $OutputPath -Encoding UTF8
        Write-Verbose "Output saved to: $OutputPath"
        return $OutputPath
    }
    else {
        return $result
    }
}