CsvSqlcmd.psm1
Function Invoke-CsvSqlcmd { <# .SYNOPSIS Natively query CSV files using SQL syntax .DESCRIPTION This module will enable you to query a CSV files using SQL syntax using Microsoft's Text Drivers. If you are running this module on a 64-bit system, and the 64-bit Text Driver is not installed, the module will automatically switch to a 32-bit shell and execute the query. It will then communicate the data results to the 64-bit shell using Export-Clixml/Import-Clixml. While the shell switch process is rather quick, you can avoid this step by running the module within a 32-bit PowerShell shell ("$env:windir\syswow64\windowspowershell\v1.0\powershell.exe") The module returns datarows. See the examples for more details. .PARAMETER CSV The location of the CSV files to be queried. Multiple files are allowed, so long as they all support the same SQL query, and delimiter. .PARAMETER FirstRowColumnNames This parameter specifies whether the first row contains column names. If the first row does not contain column names, the query engine automatically names the columns or "fields", F1, F2, F3 and so on. .PARAMETER Delimiter Optional. If you do not pass a Delimiter, then a comma will be used. Valid Delimiters include: tab "`t", pipe "|", semicolon ";", space " " and maybe a couple other things. When this parameter is used, a schema.ini must be created. In the event that one already exists, it will be moved to TEMP, then moved back once the module is finished executing. .PARAMETER SQL The SQL statement to be executed. To make command line queries easier, this module will convert the word "csv" to the actual CSV formatted table name. If the FirstRowColumnNames switch is not used, the query engine automatically names the columns or "fields", F1, F2, F3 and so on. Example: select F1, F2, F3, F4 from csv where F1 > 5. See EXAMPLES for more example syntax. .PARAMETER shellswitch Internal parameter. .NOTES Author : Chrissy LeMaire Requires: PowerShell 3.0 Version: 1.0.7 DateUpdated: 2015-Sep-9 .LINK https://gallery.technet.microsoft.com/scriptcenter/Query-CSV-with-SQL-c6c3c7e5 .EXAMPLE Invoke-CsvSqlcmd -csv C:\temp\housingmarket.csv -sql "select address from csv where price < 250000" -FirstRowColumnNames This example return all rows with a price less than 250000 to the screen. The first row of the CSV file, C:\temp\housingmarket.csv, contains column names. .EXAMPLE Invoke-CsvSqlcmd -csv C:\temp\unstructured.csv -sql "select F1, F2, F3 from csv" This example will return the first three columns of all rows within the CSV file C:\temp\unstructured.csv to the screen. Since the -FirstRowColumnNames switch was not used, the query engine automatically names the columns or "fields", F1, F2, F3 and so on. .EXAMPLE $datatable = Invoke-CsvSqlcmd -csv C:\temp\unstructured.csv -sql "select F1, F2, F3 from csv" $datatable.rows.count The script returns rows of a datatable, and in this case, we create a datatable by assigning the output of the module to a variable, instead of to the screen. #> #Requires -Version 3.0 [CmdletBinding()] Param( [Parameter(Mandatory=$true)] [string[]]$Csv, [switch]$FirstRowColumnNames, [string]$Delimiter = ",", [Parameter(Mandatory=$true)] [string]$Sql, [switch]$shellswitch ) BEGIN { # In order to ensure consistent results, a schema.ini file must be created. # If a schema.ini currently exists, it will be moved to TEMP temporarily. if (!$shellswitch) { $resolvedcsv = @() foreach ($file in $csv) { $resolvedcsv += (Resolve-Path $file).Path } $csv = $resolvedcsv $movedschemaini = @{} foreach ($file in $csv) { $directory = Split-Path $file $schemaexists = Test-Path "$directory\schema.ini" if ($schemaexists -eq $true) { $newschemaname = "$env:TEMP\$(Split-Path $file -leaf)-schema.ini" $movedschemaini.Add($newschemaname,"$directory\schema.ini") Move-Item "$directory\schema.ini" $newschemaname -Force } } } # Check for drivers. $provider = (New-Object System.Data.OleDb.OleDbEnumerator).GetElements() | Where-Object { $_.SOURCES_NAME -like "Microsoft.ACE.OLEDB.*" } if ($provider -eq $null) { $provider = (New-Object System.Data.OleDb.OleDbEnumerator).GetElements() | Where-Object { $_.SOURCES_NAME -like "Microsoft.Jet.OLEDB.*" } } if ($provider -eq $null) { Write-Warning "Switching to x86 shell, then switching back." Write-Warning "This also requires a temporary file to be written, so patience may be necessary." } else { if ($provider -is [system.array]) { $provider = $provider[$provider.GetUpperBound(0)].SOURCES_NAME } else { $provider = $provider.SOURCES_NAME } } } PROCESS { # Try hard to find a suitable provider; switch to x86 if necessary. # Encode the SQL string, since some characters if ($provider -eq $null) { $bytes = [System.Text.Encoding]::UTF8.GetBytes($sql) $sql = [System.Convert]::ToBase64String($bytes) if ($firstRowColumnNames) { $frcn = "-FirstRowColumnNames" } $csv = $csv -join "," &"$env:windir\syswow64\windowspowershell\v1.0\powershell.exe" "Set-ExecutionPolicy RemoteSigned -Scope CurrentUser -confirm:0 -Force ; Invoke-CsvSqlcmd -csv $csv $frcn -Delimiter '$Delimiter' -SQL $sql -shellswitch" return } # If the shell has switched, decode the $sql string. if ($shellswitch) { $bytes = [System.Convert]::FromBase64String($sql) $sql = [System.Text.Encoding]::UTF8.GetString($bytes) $csv = $csv -Split "," } # Check for proper SQL syntax, which for the purposes of this module must include the word "csv" if ($sql.ToLower() -notmatch "\bcsv\b") { throw "SQL statement must contain the word 'csv'. Please see this module's documentation for more details." } switch ($FirstRowColumnNames) { $true { $frcn = "Yes" } $false { $frcn = "No" } } # Does first line contain the specified delimiter? foreach ($file in $csv) { $firstline = Get-Content $file -First 1 if (($firstline -match $Delimiter) -eq $false) { throw "Delimiter $Delimiter not found in first row of $file." } } # If more than one csv specified, check to ensure number of columns match if ($csv -is [system.array]){ $numberofcolumns = ((Get-Content $csv[0] -First 1) -Split $delimiter).Count foreach ($file in $csv) { $firstline = Get-Content $file -First 1 $newnumcolumns = ($firstline -Split $Delimiter).Count if ($newnumcolumns -ne $numberofcolumns) { throw "Multiple csv file mismatch. Do both use the same delimiter and have the same number of columns?" } } } # Create the resulting datatable $dt = New-Object System.Data.DataTable # Go through each file foreach ($file in $csv) { # Unfortunately, passing delimiter within the connection string is unreliable, so we'll use schema.ini instead # The default delimiter in Windows changes depending on country, so we'll do this for every delimiter, even commas. $filename = Split-Path $file -leaf; $directory = Split-Path $file Add-Content -Path "$directory\schema.ini" -Value "[$filename]" Add-Content -Path "$directory\schema.ini" -Value "Format=Delimited($Delimiter)" Add-Content -Path "$directory\schema.ini" -Value "ColNameHeader=$FirstRowColumnNames" # Setup the connection string. Data Source is the directory that contains the csv. # The file name is also the table name, but with a "#" instead of a "." $datasource = Split-Path $file $tablename = (Split-Path $file -leaf).Replace(".","#") $connstring = "Provider=$provider;Data Source=$datasource;Extended Properties='text;HDR=$frcn;';" # To make command line queries easier, let the user just specify "table" instead of the # OleDbconnection formatted name (file.csv -> file#csv) $sql = $sql -replace "\bcsv\b"," [$tablename]" # Setup the OleDbconnection $conn = New-Object System.Data.OleDb.OleDbconnection $conn.ConnectionString = $connstring try { $conn.Open() } catch { throw "Could not open OLEDB connection." } # Setup the OleDBCommand try { $cmd = New-Object System.Data.OleDB.OleDBCommand $cmd.Connection = $conn $cmd.CommandText = $sql } catch { throw "Could not create OLEDB command." } # Execute the query, then load it into a datatable try { $null = $dt.Load($cmd.ExecuteReader([System.Data.CommandBehavior]::CloseConnection)) } catch { $errormessage = $_.Exception.Message.ToString() if ($errormessage -like "*for one or more required parameters*") { throw "Looks like your SQL syntax may be invalid. `nCheck the documentation for more information or start with a simple -sql 'select top 10 * from table'" } else { Write-Error "Execute failed: $errormessage" } } # This should automatically close, but just in case... try { if ($conn.State -eq "Open") { $null = $conn.close } $null = $cmd.Dispose; $null = $conn.Dispose } catch { Write-Warning "Could not close connection. This is just an informational message." } } # Use a file to facilitate the passing of a datatable from x86 to x64 if necessary if ($shellswitch) { try { $dt | Export-Clixml "$env:TEMP\invoke-csvsqlcmd-dt.xml" -Force } catch { throw "Could not export datatable to file." } } } END { # Delete new schema files foreach ($file in $csv) { $directory = Split-Path $file $null = Remove-Item "$directory\schema.ini" -Force -ErrorAction SilentlyContinue } # Move original schema.ini's back if they existed if ($movedschemaini.count -gt 0) { foreach ($item in $movedschemaini) { Move-Item $item.keys $item.values -Force -ErrorAction SilentlyContinue } } # If going between shell architectures, import a properly structured datatable. if ($dt -eq $null -and (Test-Path "$env:TEMP\invoke-csvsqlcmd-dt.xml")) { $dt = Import-Clixml "$env:TEMP\invoke-csvsqlcmd-dt.xml" $null = Remove-Item "$env:TEMP\invoke-csvsqlcmd-dt.xml" -ErrorAction SilentlyContinue } if ($shellswitch -eq $false) { return $dt } } } |