Measure-Correlation.ps1
function Measure-Correlation { <# .Synopsis Measures the correlation coeffecients in a set of data .Description Determines the correlation coeffecients in a set of data #> param( # The input object [Parameter(Mandatory=$true,ValueFromPipeline=$true)] [PSObject[]] $InputObject, # If set, will not show progress [Switch] $HideProgress ) begin { $allObjects = @() $propertyTotals = @{} $propertyCounts = @{} $propertyUnits = @{} if (-not $HideProgress) { $progressId = Get-Random } } process { # We need to collect all objects, but we can do the first bit of math while we're at it: # To get the average of the objects, we need their totals and counts, so pick that up on the way $allObjects += $InputObject if (-not $HideProgress) { Write-Progress "Accumulating Input and Totaling Data" $allObjects.Count -Id $progressId } foreach ($in in $InputObject) { foreach ($property in $in.psobject.properties) { if ($property.Value -as [float] -ne $null) { if (-not $propertyCounts[$property.Name]) { $propertyTotals[$property.Name] = 0 $propertyCounts[$property.Name] = 0 } $propertyCounts[$property.Name]++ $propertyTotals[$property.Name]+=($property.Value -as [float]) } } } } end { # Now that we've got all the data, calculate the average without making a pass thru the data if (-not $HideProgress) { Write-Progress "Averaging Input" " " -Id $progressId } $propertyAverages = @{} foreach ($kv in $propertyCounts.GetEnumerator()) { $propertyAverages[$kv.Key] = $propertyTotals[$kv.Key] / $propertyCounts[$kv.Key] } # To get the standard deviation, we need to total the deviation from each property from the average # This requires us to make our second pass thru the data $deviationTotals = @{} $allObjectCount = $allObjects.Count $iterator = 0 foreach ($object in $allObjects) { if (-not $HideProgress) { $iterator++ $perc = ([float]$iterator/$allObjects.Count) * 100 } foreach ($key in $propertyAverages.Keys) { $value = $object.psobject.properties[$key] if (-not $value) {continue } if (-not $HideProgress) { Write-Progress "Calculating Standard Deviations" "$key" -Id $progressId -PercentComplete $perc } $deviation = $Value.Value - $propertyAverages[$key] if (-not $deviationTotals[$key]) { $deviationTotals[$key] = 0 } $deviationTotals[$key] += [Math]::Pow($deviation, 2) } } # Now take the deviation totals, divide by the number of each item, and calculate the standard deviation $StandardDeviations = @{} foreach ($deviation in $deviationTotals.GetEnumerator()) { $standardDeviation = [Math]::Sqrt($deviation.Value / $propertyCounts[$deviation.Key]) $StandardDeviations[$deviation.Key] = $standardDeviation } # To calculte the correlation of any two sets of data, we need to convert each observation into a standard unit # A standard unit is the data distance of each point, divided by the standard deviation for that data # While we're doing this, we get to do the "fun" and expansive part: # Taking each combination of factors and calculating the delta of the standard units, or the correlation $correlationTotals = @{} $correlationCounts = @{} $iterator = 0 foreach ($object in $allObjects) { if (-not $HideProgress) { $iterator++ $perc = ([float]$iterator/$allObjects.Count) * 100 } $properties = @($object.psobject.properties | Select-Object -ExpandProperty Name) $correlationsInObject = @{} $valuesInStandardUnits = @{} foreach ($prop in $properties) { if (-not $object.psobject.properties[$prop]) { continue } if ($standardDeviations[$prop]) { if (-not $HideProgress) { Write-Progress "Computing Standard Units" "$prop" -Id $progressId -PercentComplete $perc } $valuesInStandardUnits[$prop] = ($object.$prop - $propertyAverages[$prop]) / $StandardDeviations[$prop] } } for ($i = 0; $i -lt $properties.Count;$i++) { if (-not $propertyAverages[$properties[$i]]) { continue } for($j = 0; $j -lt $properties.Count;$j++) { if ($j -eq $i) { continue } # Skip comparing with itself if (-not $propertyAverages[$properties[$j]]) { continue } $correlationName = @($properties[$i], $properties[$j] | Sort-Object ) -join " x " if ($correlationsInObject[$correlationName]) { continue } # Already computed? if (-not $HideProgress) { Write-Progress "Correlating" "$correlationName" -Id $progressId -PercentComplete $perc } $correlationsInObject[$correlationName] = $valuesInStandardUnits[$properties[$i]] * $valuesInStandardUnits[$properties[$j]] if (-not $correlationCounts[$correlationName]) { $correlationCounts[$correlationName] = 0 $correlationTotals[$correlationName] = 0 } $correlationCounts[$correlationName]++ $correlationTotals[$correlationName] += $correlationsInObject[$correlationName] } } } $correlations = @{} $total = $correlationCounts.Count $iterator =0 # Finally, each correlation should be averaged to produce a result foreach ($cc in $correlationCounts.GetEnumerator()) { if (-not $HideProgress) { $iterator++ $perc = ([float]$iterator / $total) * 100 Write-Progress "Averaging Correlations" $cc.Key -Id $progressId -PercentComplete $perc } $correlations[$cc.Key] = $correlationTotals[$cc.Key] / $cc.Value } if (-not $HideProgress) { Write-Progress "Outputting Correlations" " " -Id $progressId -Completed } New-Object PSObject -Property $correlations } } |