Private/duckdb/Invoke-DuckDBUpsert.ps1


function Invoke-DuckDBUpsert {
    <#
    .SYNOPSIS
        Performs an UPSERT via a temporary staging table + INSERT ON CONFLICT.
    .DESCRIPTION
        1. Write data into stg_<TableName> via appender (default) or CSV COPY FROM
        2. INSERT INTO <TableName> ... ON CONFLICT (PK) DO UPDATE SET ...
        3. Drop the staging table
    .PARAMETER PKColumns
        Primary key columns for the ON CONFLICT clause.
        If empty: plain INSERT (no UPSERT).
    .PARAMETER UseCsvImport
        Use Write-DuckDBCsv (temp CSV + COPY FROM) instead of the default row-by-row appender.
        Faster for large datasets; DuckDB parses the CSV internally using multi-threaded C++.
    .PARAMETER SimpleTypesOnly
        Passed through to Write-DuckDBCsv (only relevant when -UseCsvImport is set).
        Skips per-cell complex-type checks when all values are guaranteed to be primitives.
    #>

    [CmdletBinding()]
    param(
        [Parameter(Mandatory)] [DuckDB.NET.Data.DuckDBConnection]$Connection,
        [Parameter(Mandatory)] [string]$TableName,
        [Parameter(Mandatory)] $Data,
        [string[]]$PKColumns = @(),
        [Parameter(Mandatory=$false)]
        [switch]$UseCsvImport = $false,
        [Parameter(Mandatory=$false)]
        [switch]$SimpleTypesOnly = $false
    )

    $stagingTable = "stg_$TableName"

    # Create staging table
    Invoke-DuckDBQuery -Connection $Connection -Query @"
        CREATE TEMP TABLE IF NOT EXISTS $stagingTable
        AS SELECT * FROM $TableName WHERE 1 = 0
"@


    Write-Verbose "[$TableName] Staging table '$stagingTable' created. Starting UPSERT..."

    # Write data into staging table
    if ($UseCsvImport) {
        Write-DuckDBCsv -Connection $Connection -TableName $stagingTable -Data $Data -SimpleTypesOnly:$SimpleTypesOnly
    } else {
        Write-DuckDBAppender -Connection $Connection -TableName $stagingTable -Data $Data -SimpleTypesOnly:$SimpleTypesOnly
    }

    Write-Verbose "[$TableName] Data written to staging table. Starting merge..."

    if ($PKColumns.Count -gt 0) {
        # Determine all non-PK columns for the SET clause
        $allCols  = Get-DuckDBColumns -Connection $Connection -TableName $TableName
        $setCols  = $allCols | Where-Object { $_ -notin $PKColumns }
        $setClause = ($setCols | ForEach-Object { """$_"" = excluded.""$_""" }) -join ', '
        $pkList    = $PKColumns -join ', '

        Write-Verbose "[$TableName] Performing UPSERT with PK columns: $pkList"
        Invoke-DuckDBQuery -Connection $Connection -Query @"
            INSERT INTO $TableName
            SELECT * FROM $stagingTable
            ON CONFLICT ($pkList) DO UPDATE SET $setClause
"@

    } else {
        # No PK defined - plain INSERT
        Invoke-DuckDBQuery -Connection $Connection -Query @"
            INSERT INTO $TableName
            SELECT * FROM $stagingTable
"@

    }

    Write-Verbose "[$TableName] Merge completed."

    # Clean up staging table
    Invoke-DuckDBQuery -Connection $Connection -Query "DROP TABLE IF EXISTS $stagingTable"
    Write-Verbose "[$TableName] UPSERT completed."
}