Modules/Collectors/50-MonitoringCollector.ps1
|
function Invoke-RangerMonitoringCollector { param( [Parameter(Mandatory = $true)] [System.Collections.IDictionary]$Config, [Parameter(Mandatory = $true)] $CredentialMap, [Parameter(Mandatory = $true)] [object]$Definition, [Parameter(Mandatory = $true)] [string]$PackageRoot ) $fixture = Get-RangerCollectorFixtureData -Config $Config -CollectorId $Definition.Id if ($fixture) { return ConvertTo-RangerHashtable -InputObject $fixture } $azureResources = @( Get-RangerAzureResources -Config $Config -AzureCredentialSettings $CredentialMap.azure ) $healthSnapshots = @( Invoke-RangerSafeAction -Label 'Monitoring health snapshot' -DefaultValue @() -ScriptBlock { Invoke-RangerClusterCommand -Config $Config -Credential $CredentialMap.cluster -ScriptBlock { $healthServiceObj = if (Get-Command -Name Get-Service -ErrorAction SilentlyContinue) { Get-Service -Name HealthService -ErrorAction SilentlyContinue | Select-Object Name, Status, StartType } else { $null } $heathFaults = if (Get-Command -Name Get-HealthFault -ErrorAction SilentlyContinue) { @(Get-HealthFault -ErrorAction SilentlyContinue | Select-Object FaultType, FaultingObjectDescription, PerceivedSeverity, Reason, FaultTime | ForEach-Object { [ordered]@{ faultType = $_.FaultType; faultingObject = $_.FaultingObjectDescription; severity = [string]$_.PerceivedSeverity; reason = $_.Reason; faultTime = $_.FaultTime } }) } else { @() } # Check HealthService event log for recent errors $healthEvents = @(try { Get-WinEvent -LogName 'Microsoft-Windows-Health/Operational' -MaxEvents 100 -ErrorAction Stop | Group-Object -Property Id | Sort-Object Count -Descending | Select-Object -First 5 | ForEach-Object { [ordered]@{ eventId = $_.Name; count = $_.Count; level = $_.Group[0].LevelDisplayName; sample = $_.Group[0].Message.Substring(0, [Math]::Min(200, $_.Group[0].Message.Length)) } } } catch { @() }) # Windows Admin Center agent / MAS agent version $amaAgentVersion = (Get-ItemProperty -Path 'HKLM:\SOFTWARE\Microsoft\MonitoringAgent\Setup' -Name 'CurrentVersion' -ErrorAction SilentlyContinue).CurrentVersion $amaService = if (Get-Command -Name Get-Service -ErrorAction SilentlyContinue) { Get-Service -Name 'AzureMonitoringAgent','HealthAndSupportServices' -ErrorAction SilentlyContinue | Select-Object Name, Status, StartType } else { @() } [ordered]@{ node = $env:COMPUTERNAME healthService = $healthServiceObj healthFaults = @($heathFaults) healthFaultCount = @($heathFaults).Count criticalFaultCount = @($heathFaults | Where-Object { $_.severity -match 'Critical|Fatal' }).Count diagnostics = if (Get-Command -Name Get-Service -ErrorAction SilentlyContinue) { @(Get-Service -Name 'AzureEdgeTelemetryAndDiagnostics*' -ErrorAction SilentlyContinue | Select-Object Name, Status, StartType) } else { @() } amaService = @($amaService) amaAgentVersion = $amaAgentVersion healthEvents = @($healthEvents) } } } ) # DCR detail with data sources and destinations $dcrDetail = @( Invoke-RangerSafeAction -Label 'DCR data source detail' -DefaultValue @() -ScriptBlock { Invoke-RangerAzureQuery -AzureCredentialSettings $CredentialMap.azure -ArgumentList @($Config.targets.azure.subscriptionId, $Config.targets.azure.resourceGroup) -ScriptBlock { param($SubscriptionId, $ResourceGroup) if (-not (Get-Command -Name Get-AzDataCollectionRule -ErrorAction SilentlyContinue) -or [string]::IsNullOrWhiteSpace($ResourceGroup)) { return @() } @(Get-AzDataCollectionRule -ResourceGroupName $ResourceGroup -ErrorAction SilentlyContinue | ForEach-Object { $rule = $_ [ordered]@{ name = $rule.Name id = $rule.Id location = $rule.Location dataSourceTypes = @($rule.DataSources.PSObject.Properties.Name) destinationTypes = @($rule.Destinations.PSObject.Properties.Name | Where-Object { $_ -ne 'AzureMonitorMetrics' -or $rule.Destinations.$_.Name }) transformKql = $rule.DataFlows | ForEach-Object { $_.TransformKql } | Where-Object { $_ } description = $rule.Description } }) } } ) # Alert rules with severity and last-triggered $alertRuleDetail = @( Invoke-RangerSafeAction -Label 'Azure alert rule detail' -DefaultValue @() -ScriptBlock { Invoke-RangerAzureQuery -AzureCredentialSettings $CredentialMap.azure -ArgumentList @($Config.targets.azure.subscriptionId, $Config.targets.azure.resourceGroup) -ScriptBlock { param($SubscriptionId, $ResourceGroup) if ([string]::IsNullOrWhiteSpace($ResourceGroup)) { return @() } $results = New-Object System.Collections.ArrayList if (Get-Command -Name Get-AzActivityLogAlert -ErrorAction SilentlyContinue) { @(Get-AzActivityLogAlert -ResourceGroupName $ResourceGroup -ErrorAction SilentlyContinue) | ForEach-Object { [void]$results.Add([ordered]@{ name = $_.Name; type = 'ActivityLog'; enabled = $_.Enabled; scopes = @($_.Scopes) }) } } if (Get-Command -Name Get-AzMetricAlertRuleV2 -ErrorAction SilentlyContinue) { @(Get-AzMetricAlertRuleV2 -ResourceGroupName $ResourceGroup -ErrorAction SilentlyContinue) | ForEach-Object { [void]$results.Add([ordered]@{ name = $_.Name type = 'Metric' severity = $_.Severity enabled = $_.Enabled evaluationFrequency = [string]$_.EvaluationFrequency windowSize = [string]$_.WindowSize lastModified = $_.LastUpdated targetResourceType = $_.TargetResourceType actionGroups = @($_.Action | ForEach-Object { Split-Path $_.ActionGroupId -Leaf }) criteriaTypes = @(if ($_.Criteria) { @($_.Criteria.PSObject.Properties.Name) } else { @() }) }) } } if (Get-Command -Name Get-AzScheduledQueryRule -ErrorAction SilentlyContinue) { @(Get-AzScheduledQueryRule -ResourceGroupName $ResourceGroup -ErrorAction SilentlyContinue) | ForEach-Object { [void]$results.Add([ordered]@{ name = $_.Name type = 'ScheduledQuery' severity = $_.Severity enabled = $_.Enabled query = $_.Query actionGroups = @($_.Action | ForEach-Object { Split-Path $_.ActionGroupResourceId -Leaf }) windowSize = [string]$_.WindowSize frequency = [string]$_.Frequency }) } } @($results) } } ) # Azure Update Manager: maintenance configurations and pending assessments $updateManagerDetail = @( Invoke-RangerSafeAction -Label 'Azure Update Manager configuration detail' -DefaultValue @() -ScriptBlock { Invoke-RangerAzureQuery -AzureCredentialSettings $CredentialMap.azure -ArgumentList @($Config.targets.azure.subscriptionId, $Config.targets.azure.resourceGroup) -ScriptBlock { param($SubscriptionId, $ResourceGroup) if (-not (Get-Command -Name Get-AzMaintenanceConfiguration -ErrorAction SilentlyContinue) -or [string]::IsNullOrWhiteSpace($ResourceGroup)) { return @() } @(Get-AzMaintenanceConfiguration -ResourceGroupName $ResourceGroup -ErrorAction SilentlyContinue | Select-Object Name, MaintenanceScope, Frequency, StartDateTime, DurationInHours, Timezone, Location) } } ) # HCI Insights / resource health $resourceHealth = @( Invoke-RangerSafeAction -Label 'Azure resource health for HCI cluster' -DefaultValue @() -ScriptBlock { Invoke-RangerAzureQuery -AzureCredentialSettings $CredentialMap.azure -ArgumentList @($Config.targets.azure.subscriptionId, $Config.targets.azure.resourceGroup) -ScriptBlock { param($SubscriptionId, $ResourceGroup) if (-not (Get-Command -Name Get-AzResourceHealth -ErrorAction SilentlyContinue) -or [string]::IsNullOrWhiteSpace($ResourceGroup)) { return @() } @(Get-AzResourceHealth -ResourceGroupName $ResourceGroup -ErrorAction SilentlyContinue | Select-Object ResourceName, ResourceType, AvailabilityState, Summary, ReasonType) } } ) # Issue #67: Log Analytics workspace detail and HCI Insights solutions $logAnalyticsWorkspaces = @( Invoke-RangerSafeAction -Label 'Log Analytics workspace detail' -DefaultValue @() -ScriptBlock { Invoke-RangerAzureQuery -AzureCredentialSettings $CredentialMap.azure -ArgumentList @($Config.targets.azure.subscriptionId, $Config.targets.azure.resourceGroup) -ScriptBlock { param($SubscriptionId, $ResourceGroup) if (-not (Get-Command -Name Get-AzOperationalInsightsWorkspace -ErrorAction SilentlyContinue) -or [string]::IsNullOrWhiteSpace($ResourceGroup)) { return @() } @(Get-AzOperationalInsightsWorkspace -ResourceGroupName $ResourceGroup -ErrorAction SilentlyContinue | ForEach-Object { $ws = $_ $solutions = @(try { Get-AzOperationalInsightsIntelligencePack -ResourceGroupName $ResourceGroup -WorkspaceName $ws.Name -ErrorAction Stop | Where-Object { $_.Enabled } | Select-Object Name, Enabled } catch { @() }) $hciInsightsEnabled = @($solutions | Where-Object { $_.Name -match 'azurelocal|hciinsights|ContainerInsights|AzureActivity' }).Count -gt 0 [ordered]@{ name = $ws.Name workspaceId = $ws.CustomerId resourceGroup = $ws.ResourceGroupName location = $ws.Location sku = [string]$ws.Sku retentionDays = $ws.RetentionInDays enabledSolutions = @($solutions | ForEach-Object { $_.Name }) hciInsightsEnabled = $hciInsightsEnabled } }) } } ) # Issue #67: Diagnostic settings on HCI cluster and Arc resources $diagnosticSettings = @( Invoke-RangerSafeAction -Label 'Diagnostic settings on HCI cluster resource' -DefaultValue @() -ScriptBlock { Invoke-RangerAzureQuery -AzureCredentialSettings $CredentialMap.azure -ArgumentList @($Config.targets.azure.subscriptionId, $Config.targets.azure.resourceGroup) -ScriptBlock { param($SubscriptionId, $ResourceGroup) if (-not (Get-Command -Name Get-AzDiagnosticSetting -ErrorAction SilentlyContinue) -or [string]::IsNullOrWhiteSpace($SubscriptionId) -or [string]::IsNullOrWhiteSpace($ResourceGroup)) { return @() } $hciResources = @(Get-AzResource -ResourceGroupName $ResourceGroup -ResourceType 'Microsoft.AzureStackHCI/clusters' -ErrorAction SilentlyContinue) $diagResult = New-Object System.Collections.ArrayList foreach ($res in $hciResources) { $settings = @(Get-AzDiagnosticSetting -ResourceId $res.ResourceId -ErrorAction SilentlyContinue) foreach ($s in $settings) { [void]$diagResult.Add([ordered]@{ resourceId = $res.ResourceId resourceName = $res.Name name = $s.Name enabledLogs = @($s.Log | Where-Object { $_.Enabled } | ForEach-Object { $_.Category }) enabledMetrics = @($s.Metrics | Where-Object { $_.Enabled } | ForEach-Object { $_.Category }) workspaceId = $s.WorkspaceId storageAccountId = $s.StorageAccountId }) } } @($diagResult) } } ) # Issue #67: Action groups $actionGroups = @( Invoke-RangerSafeAction -Label 'Azure Monitor action groups' -DefaultValue @() -ScriptBlock { Invoke-RangerAzureQuery -AzureCredentialSettings $CredentialMap.azure -ArgumentList @($Config.targets.azure.subscriptionId, $Config.targets.azure.resourceGroup) -ScriptBlock { param($SubscriptionId, $ResourceGroup) if (-not (Get-Command -Name Get-AzActionGroup -ErrorAction SilentlyContinue) -or [string]::IsNullOrWhiteSpace($ResourceGroup)) { return @() } @(Get-AzActionGroup -ResourceGroupName $ResourceGroup -ErrorAction SilentlyContinue | ForEach-Object { [ordered]@{ name = $_.Name groupShortName = $_.GroupShortName enabled = $_.Enabled emailReceivers = @($_.EmailReceiver | ForEach-Object { [ordered]@{ name = $_.Name; address = $_.EmailAddress; useCommonAlert = $_.UseCommonAlertSchema } }) webhookReceivers = @($_.WebhookReceiver | ForEach-Object { [ordered]@{ name = $_.Name; serviceUri = $_.ServiceUri } }) armRoleReceivers = @($_.ArmRoleReceiver | ForEach-Object { $_.RoleId }) } }) } } ) $ama = @($azureResources | Where-Object { $_.Name -match 'AzureMonitor|AMA' -or $_.ResourceType -match 'HybridCompute.*/extensions' }) $dcr = @($azureResources | Where-Object { $_.ResourceType -match 'dataCollectionRules' }) $dce = @($azureResources | Where-Object { $_.ResourceType -match 'dataCollectionEndpoints' }) $telemetry = @($azureResources | Where-Object { $_.Name -match 'Telemetry|Diagnostics|HCIInsights' -or $_.ResourceType -match 'insights|operationalinsights' }) $alerts = @($azureResources | Where-Object { $_.ResourceType -match 'actionGroups|scheduledQueryRules|alertrules' }) $updateManager = @($azureResources | Where-Object { $_.ResourceType -match 'maintenance|update' }) # Issue #67: Health fault category grouping (group by FaultType prefix) $allFaults = @($healthSnapshots | ForEach-Object { $_.healthFaults }) $healthFaultsByCategory = @($allFaults | Group-Object -Property { ($_.faultType -split '\.')[0] } | ForEach-Object { [ordered]@{ category = $_.Name count = $_.Count criticalCount = @($_.Group | Where-Object { $_.severity -match 'Critical|Fatal' }).Count faults = @($_.Group | Select-Object -First 3) } }) # Issue #67: Telemetry extension detail from arc machine extensions $telemetryExtensionDetail = @($azureResources | Where-Object { $_.Name -match 'AzureEdgeTelemetryAndDiagnostics|TelemetryAndDiagnostics' } | ForEach-Object { [ordered]@{ name = $_.Name; resourceType = $_.ResourceType; location = $_.Location; id = $_.ResourceId } }) # Issue #67: HCI Insights enablement summary $hciInsightsSummary = [ordered]@{ enabled = @($logAnalyticsWorkspaces | Where-Object { $_.hciInsightsEnabled -eq $true }).Count -gt 0 workspaceCount = @($logAnalyticsWorkspaces).Count workspaceName = if (@($logAnalyticsWorkspaces).Count -gt 0) { $logAnalyticsWorkspaces[0].name } else { $null } workspaceId = if (@($logAnalyticsWorkspaces).Count -gt 0) { $logAnalyticsWorkspaces[0].workspaceId } else { $null } workspaceRegion = if (@($logAnalyticsWorkspaces).Count -gt 0) { $logAnalyticsWorkspaces[0].location } else { $null } diagnosticSettingsCount = @($diagnosticSettings).Count platformMetricsEnabled = @($diagnosticSettings | Where-Object { @($_.enabledMetrics).Count -gt 0 }).Count -gt 0 } $monitoringSummary = [ordered]@{ telemetryCount = @($telemetry).Count amaCount = @($ama).Count dcrCount = @($dcr).Count dcrDetailCount = @($dcrDetail).Count dceCount = @($dce).Count alertCount = @($alerts).Count alertRuleDetailCount = @($alertRuleDetail).Count updateManagerCount = @($updateManager).Count maintenanceConfigCount = @($updateManagerDetail).Count resourceHealthCount = @($resourceHealth).Count unhealthyResourceCount = @($resourceHealth | Where-Object { $_.AvailabilityState -ne 'Available' }).Count healthServiceRunningNodes = @($healthSnapshots | Where-Object { $_.healthService.Status -eq 'Running' }).Count totalHealthFaults = (@($healthSnapshots | ForEach-Object { $_.healthFaultCount } | Measure-Object -Sum).Sum) criticalHealthFaults = (@($healthSnapshots | ForEach-Object { $_.criticalFaultCount } | Measure-Object -Sum).Sum) nodesWithAmaAgent = @($healthSnapshots | Where-Object { $null -ne $_.amaAgentVersion }).Count logAnalyticsWorkspaceCount = @($logAnalyticsWorkspaces).Count diagnosticSettingsCount = @($diagnosticSettings).Count actionGroupCount = @($actionGroups).Count hciInsightsEnabled = $hciInsightsSummary.enabled healthFaultCategoryCount = @($healthFaultsByCategory).Count telemetryExtensionCount = @($telemetryExtensionDetail).Count } $findings = New-Object System.Collections.ArrayList if ($ama.Count -eq 0 -and $dcr.Count -eq 0) { [void]$findings.Add((New-RangerFinding -Severity warning -Title 'Minimal Azure monitoring evidence detected' -Description 'The monitoring collector did not find Azure Monitor Agent or Data Collection Rule resources in the configured resource group.' -CurrentState 'monitoring partially configured' -Recommendation 'Review Azure Monitor onboarding, DCR assignments, and resource-group scoping for the Azure Local environment.')) } if ($alerts.Count -eq 0) { [void]$findings.Add((New-RangerFinding -Severity informational -Title 'No alerting artifacts were discovered in the scoped Azure resources' -Description 'The monitoring collector found no Azure Monitor alert rule or action group resources for the configured resource group.' -CurrentState 'alert inventory empty' -Recommendation 'Confirm whether alerting is intentionally managed elsewhere or whether resource-group scoping needs to be widened.')) } if ($monitoringSummary.criticalHealthFaults -gt 0) { [void]$findings.Add((New-RangerFinding -Severity warning -Title 'Active critical Health Service faults detected on cluster nodes' -Description "Health Service fault collection found $($monitoringSummary.criticalHealthFaults) critical or fatal fault(s) across cluster nodes." -CurrentState "$($monitoringSummary.criticalHealthFaults) critical faults; $($monitoringSummary.totalHealthFaults) total faults" -Recommendation 'Review HealthService faults via Get-HealthFault and resolve before handoff. Critical faults may indicate storage, network, or hardware degradation.')) } if (@($resourceHealth | Where-Object { $_.AvailabilityState -ne 'Available' }).Count -gt 0) { [void]$findings.Add((New-RangerFinding -Severity warning -Title 'Azure resource health indicates degraded HCI resources' -Description 'Azure Resource Health returned non-Available states for one or more scoped resources.' -CurrentState "$(@($resourceHealth | Where-Object { $_.AvailabilityState -ne 'Available' }).Count) resources not in Available state" -Recommendation 'Review Azure Resource Health for the HCI cluster and any linked Arc Machine resources before handoff.')) } return @{ Status = if ($findings.Count -gt 0) { 'partial' } else { 'success' } Domains = @{ monitoring = [ordered]@{ telemetry = ConvertTo-RangerHashtable -InputObject $telemetry ama = ConvertTo-RangerHashtable -InputObject $ama dcr = ConvertTo-RangerHashtable -InputObject $dcr dcrDetail = ConvertTo-RangerHashtable -InputObject $dcrDetail dce = ConvertTo-RangerHashtable -InputObject $dce insights = ConvertTo-RangerHashtable -InputObject @($azureResources | Where-Object { $_.ResourceType -match 'operationalinsights|insights' }) logAnalyticsWorkspaces = ConvertTo-RangerHashtable -InputObject $logAnalyticsWorkspaces diagnosticSettings = ConvertTo-RangerHashtable -InputObject $diagnosticSettings actionGroups = ConvertTo-RangerHashtable -InputObject $actionGroups alerts = ConvertTo-RangerHashtable -InputObject $alerts alertRuleDetail = ConvertTo-RangerHashtable -InputObject $alertRuleDetail health = ConvertTo-RangerHashtable -InputObject $healthSnapshots healthFaults = ConvertTo-RangerHashtable -InputObject @($healthSnapshots | ForEach-Object { [ordered]@{ node = $_.node; faults = $_.healthFaults; count = $_.healthFaultCount } }) healthFaultsByCategory = ConvertTo-RangerHashtable -InputObject $healthFaultsByCategory telemetryExtension = ConvertTo-RangerHashtable -InputObject $telemetryExtensionDetail hciInsights = $hciInsightsSummary updateManager = ConvertTo-RangerHashtable -InputObject $updateManager updateManagerDetail = ConvertTo-RangerHashtable -InputObject $updateManagerDetail resourceHealth = ConvertTo-RangerHashtable -InputObject $resourceHealth summary = $monitoringSummary } } Findings = @($findings) Relationships = @() RawEvidence = [ordered]@{ azureResources = ConvertTo-RangerHashtable -InputObject $azureResources health = ConvertTo-RangerHashtable -InputObject $healthSnapshots updateManager = ConvertTo-RangerHashtable -InputObject $updateManager dcrDetail = ConvertTo-RangerHashtable -InputObject $dcrDetail alertRuleDetail = ConvertTo-RangerHashtable -InputObject $alertRuleDetail resourceHealth = ConvertTo-RangerHashtable -InputObject $resourceHealth logAnalyticsWorkspaces = ConvertTo-RangerHashtable -InputObject $logAnalyticsWorkspaces diagnosticSettings = ConvertTo-RangerHashtable -InputObject $diagnosticSettings actionGroups = ConvertTo-RangerHashtable -InputObject $actionGroups } } } |