modules/HomeLab.Monitoring/Public/Alerting.ps1

<#
.SYNOPSIS
    Sets up an alert rule.
.DESCRIPTION
    Sets up an alert rule for monitoring resources.
.PARAMETER Name
    The name of the alert rule.
.PARAMETER ResourceGroup
    The name of the resource group. If not specified, the resource group from the configuration will be used.
.PARAMETER ResourceType
    The type of resource to monitor.
.PARAMETER ResourceName
    The name of the resource to monitor. If not specified, all resources of the specified type will be monitored.
.PARAMETER Metric
    The metric to monitor.
.PARAMETER Operator
    The operator to use for comparison. Valid values are 'GreaterThan', 'GreaterThanOrEqual', 'LessThan', 'LessThanOrEqual', 'Equal'.
.PARAMETER Threshold
    The threshold value for the alert.
.PARAMETER WindowSize
    The time window for the alert. Default is 5 minutes.
.PARAMETER Frequency
    The frequency of evaluation. Default is 1 minute.
.PARAMETER Severity
    The severity of the alert. Valid values are 0, 1, 2, 3, 4. Default is 2.
.PARAMETER ActionGroupName
    The name of the action group to use for the alert. If not specified, no action group will be used.
.EXAMPLE
    Set-AlertRule -Name "HighCPU" -ResourceType "Microsoft.Compute/virtualMachines" -Metric "Percentage CPU" -Operator "GreaterThan" -Threshold 90
#>

function Set-AlertRule {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $true)]
        [string]$Name,
        
        [Parameter(Mandatory = $false)]
        [string]$ResourceGroup,
        
        [Parameter(Mandatory = $true)]
        [string]$ResourceType,
        
        [Parameter(Mandatory = $false)]
        [string]$ResourceName,
        
        [Parameter(Mandatory = $true)]
        [string]$Metric,
        
        [Parameter(Mandatory = $true)]
        [ValidateSet('GreaterThan', 'GreaterThanOrEqual', 'LessThan', 'LessThanOrEqual', 'Equal')]
        [string]$Operator,
        
        [Parameter(Mandatory = $true)]
        [double]$Threshold,
        
        [Parameter(Mandatory = $false)]
        [string]$WindowSize = "00:05:00",
        
        [Parameter(Mandatory = $false)]
        [string]$Frequency = "00:01:00",
        
        [Parameter(Mandatory = $false)]
        [ValidateRange(0, 4)]
        [int]$Severity = 2,
        
        [Parameter(Mandatory = $false)]
        [string]$ActionGroupName
    )
    
    begin {
        # Import required modules
        Import-Module HomeLab.Core
        Import-Module HomeLab.Azure
        
        # Get configuration
        $config = Get-Configuration
        
        # Log function start
        Write-Log -Message "Setting up alert rule '$Name'" -Level INFO
        
        # If no resource group is specified, use the one from config
        if (-not $ResourceGroup) {
            $ResourceGroup = "$($config.projectName)-$($config.env)-$($config.locationCode)-rg"
        }
    }
    
    process {
        try {
            # Check if Azure is connected
            if (-not (Test-AzureConnection)) {
                Connect-AzureAccount
            }
            
            # Check if the resource group exists
            $rgExists = Get-AzResourceGroup -Name $ResourceGroup -ErrorAction SilentlyContinue
            if (-not $rgExists) {
                throw "Resource group $ResourceGroup does not exist"
            }
            
            # Get resources based on the specified filters
            $resourceFilter = @{
                ResourceGroupName = $ResourceGroup
                ResourceType = $ResourceType
            }
            
            if ($ResourceName) {
                $resourceFilter.Name = $ResourceName
            }
            
            $resources = Get-AzResource @resourceFilter
            
            if ($resources.Count -eq 0) {
                throw "No resources found matching the specified criteria"
            }
            
            # Create alert rule for each resource or create a single alert rule for all resources
            $createdRules = @()
            
            if ($ResourceName) {
                # Create alert rule for a specific resource
                $resource = $resources[0]
                
                # Create alert criteria
                $criteria = New-AzMetricAlertRuleV2Criteria -MetricName $Metric -TimeAggregation Average -Operator $Operator -Threshold $Threshold
                
                # Create alert rule
                $alertName = "$Name-$($resource.Name)"
                $alertRuleResourceId = "/subscriptions/$((Get-AzContext).Subscription.Id)/resourceGroups/$ResourceGroup/providers/Microsoft.Insights/metricAlerts/$alertName"
                
                $actionGroupId = $null
                if ($ActionGroupName) {
                    # Get action group
                    $actionGroup = Get-AzActionGroup -ResourceGroupName $ResourceGroup -Name $ActionGroupName -ErrorAction SilentlyContinue
                    if ($actionGroup) {
                        $actionGroupId = $actionGroup.Id
                    }
                    else {
                        Write-Log -Message "Action group $ActionGroupName not found. Alert will be created without actions." -Level WARNING
                    }
                }
                
                # Create alert rule parameters
                $alertRuleParams = @{
                    Name = $alertName
                    ResourceGroupName = $ResourceGroup
                    WindowSize = $WindowSize
                    Frequency = $Frequency
                    TargetResourceId = $resource.Id
                    Condition = $criteria
                    Severity = $Severity
                    Description = "Alert when $Metric $Operator $Threshold for $($resource.Name)"
                }
                
                if ($actionGroupId) {
                    $alertRuleParams.ActionGroupId = $actionGroupId
                }
                
                # Create the alert rule
                $alertRule = Add-AzMetricAlertRuleV2 @alertRuleParams
                
                $createdRules += $alertRule
                Write-Log -Message "Created alert rule '$alertName' for resource $($resource.Name)" -Level INFO
            }
            else {
                # Create a single alert rule for all resources of the specified type
                # Create alert criteria
                $criteria = New-AzMetricAlertRuleV2Criteria -MetricName $Metric -TimeAggregation Average -Operator $Operator -Threshold $Threshold
                
                # Create alert rule
                $alertName = "$Name-$ResourceType".Replace('/', '-')
                $alertRuleResourceId = "/subscriptions/$((Get-AzContext).Subscription.Id)/resourceGroups/$ResourceGroup/providers/Microsoft.Insights/metricAlerts/$alertName"
                
                $actionGroupId = $null
                if ($ActionGroupName) {
                    # Get action group
                    $actionGroup = Get-AzActionGroup -ResourceGroupName $ResourceGroup -Name $ActionGroupName -ErrorAction SilentlyContinue
                    if ($actionGroup) {
                        $actionGroupId = $actionGroup.Id
                    }
                    else {
                        Write-Log -Message "Action group $ActionGroupName not found. Alert will be created without actions." -Level WARNING
                    }
                }
                
                # Create scope array
                $scopes = $resources | ForEach-Object { $_.Id }
                
                # Create alert rule parameters
                $alertRuleParams = @{
                    Name = $alertName
                    ResourceGroupName = $ResourceGroup
                    WindowSize = $WindowSize
                    Frequency = $Frequency
                    TargetResourceScope = $scopes
                    Condition = $criteria
                    Severity = $Severity
                    Description = "Alert when $Metric $Operator $Threshold for all $ResourceType resources"
                }
                
                if ($actionGroupId) {
                    $alertRuleParams.ActionGroupId = $actionGroupId
                }
                
                # Create the alert rule
                $alertRule = Add-AzMetricAlertRuleV2 @alertRuleParams
                
                $createdRules += $alertRule
                Write-Log -Message "Created alert rule '$alertName' for all $ResourceType resources" -Level INFO
            }
            
            # Save alert rule information to config
            if (-not $config.alertRules) {
                $config.alertRules = @{}
            }
            
            foreach ($rule in $createdRules) {
                $config.alertRules[$rule.Name] = @{
                    Id = $rule.Id
                    ResourceGroup = $ResourceGroup
                    ResourceType = $ResourceType
                    Metric = $Metric
                    Operator = $Operator
                    Threshold = $Threshold
                    Severity = $Severity
                    CreatedDate = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss")
                }
            }
            
            Save-Configuration
            
            return $createdRules
        }
        catch {
            Write-Log -Message "Failed to set up alert rule: $_" -Level ERROR
            throw $_
        }
    }
    
    end {
        # Log function end
        Write-Log -Message "Alert rule setup completed" -Level INFO
    }
}

<#
.SYNOPSIS
    Gets the current alert rules.
.DESCRIPTION
    Gets the current alert rules for the specified resource group.
.PARAMETER ResourceGroup
    The name of the resource group. If not specified, the resource group from the configuration will be used.
.EXAMPLE
    Get-AlertRules -ResourceGroup "HomeLab-RG"
#>

function Get-AlertRules {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $false)]
        [string]$ResourceGroup
    )
    
    begin {
        # Import required modules
        Import-Module HomeLab.Core
        Import-Module HomeLab.Azure
        
        # Get configuration
        $config = Get-Configuration
        
        # Log function start
        Write-Log -Message "Getting alert rules" -Level INFO
        
        # If no resource group is specified, use the one from config
        if (-not $ResourceGroup) {
            $ResourceGroup = "$($config.projectName)-$($config.env)-$($config.locationCode)-rg"
        }
    }
    
    process {
        try {
            # Check if Azure is connected
            if (-not (Test-AzureConnection)) {
                Connect-AzureAccount
            }
            
            # Check if the resource group exists
            $rgExists = Get-AzResourceGroup -Name $ResourceGroup -ErrorAction SilentlyContinue
            if (-not $rgExists) {
                throw "Resource group $ResourceGroup does not exist"
            }
            
            # Get alert rules from Azure
            $alertRules = Get-AzMetricAlertRuleV2 -ResourceGroupName $ResourceGroup
            
            # Enrich with information from config
            $enrichedRules = @()
            
            foreach ($rule in $alertRules) {
                $enrichedRule = [PSCustomObject]@{
                    Name = $rule.Name
                    Id = $rule.Id
                    Description = $rule.Description
                    Severity = $rule.Severity
                    Enabled = $rule.Enabled
                    Frequency = $rule.EvaluationFrequency
                    WindowSize = $rule.WindowSize
                    TargetResourceType = $null
                    TargetResourceIds = $rule.Scopes
                    Criteria = $rule.Criteria
                    ActionGroups = $rule.Actions.ActionGroupId
                    ConfigInfo = $null
                }
                
                # Add info from config if available
                if ($config.alertRules -and $config.alertRules[$rule.Name]) {
                    $enrichedRule.ConfigInfo = $config.alertRules[$rule.Name]
                }
                
                # Try to determine target resource type
                if ($rule.Scopes -and $rule.Scopes.Count -gt 0) {
                    $resourceId = $rule.Scopes[0]
                    $resourceTypePattern = '/providers/([^/]+/[^/]+)/'
                    if ($resourceId -match $resourceTypePattern) {
                        $enrichedRule.TargetResourceType = $matches[1]
                    }
                }
                
                $enrichedRules += $enrichedRule
            }
            
            return $enrichedRules
        }
        catch {
            Write-Log -Message "Failed to get alert rules: $_" -Level ERROR
            throw $_
        }
    }
    
    end {
        # Log function end
        Write-Log -Message "Alert rules retrieved successfully" -Level INFO
    }
}

<#
.SYNOPSIS
    Removes an alert rule.
.DESCRIPTION
    Removes an alert rule with the specified name.
.PARAMETER Name
    The name of the alert rule to remove.
.PARAMETER ResourceGroup
    The name of the resource group. If not specified, the resource group from the configuration will be used.
.EXAMPLE
    Remove-AlertRule -Name "HighCPU-vm1"
#>

function Remove-AlertRule {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $true)]
        [string]$Name,
        
        [Parameter(Mandatory = $false)]
        [string]$ResourceGroup
    )
    
    begin {
        # Import required modules
        Import-Module HomeLab.Core
        Import-Module HomeLab.Azure
        
        # Get configuration
        $config = Get-Configuration
        
        # Log function start
        Write-Log -Message "Removing alert rule '$Name'" -Level INFO
        
        # If no resource group is specified, use the one from config
        if (-not $ResourceGroup) {
            $ResourceGroup = "$($config.projectName)-$($config.env)-$($config.locationCode)-rg"
        }
    }
    
    process {
        try {
            # Check if Azure is connected
            if (-not (Test-AzureConnection)) {
                Connect-AzureAccount
            }
            
            # Check if the resource group exists
            $rgExists = Get-AzResourceGroup -Name $ResourceGroup -ErrorAction SilentlyContinue
            if (-not $rgExists) {
                throw "Resource group $ResourceGroup does not exist"
            }
            
            # Check if the alert rule exists
            $alertRule = Get-AzMetricAlertRuleV2 -ResourceGroupName $ResourceGroup -Name $Name -ErrorAction SilentlyContinue
            
            if (-not $alertRule) {
                throw "Alert rule '$Name' not found in resource group '$ResourceGroup'"
            }
            
            # Remove the alert rule
            Remove-AzMetricAlertRuleV2 -ResourceGroupName $ResourceGroup -Name $Name
            
            # Remove from config
            if ($config.alertRules -and $config.alertRules[$Name]) {
                $config.alertRules.Remove($Name)
                Save-Configuration
            }
            
            Write-Log -Message "Alert rule '$Name' removed successfully" -Level INFO
            return $true
        }
        catch {
            Write-Log -Message "Failed to remove alert rule: $_" -Level ERROR
            throw $_
        }
    }
    
    end {
        # Log function end
        Write-Log -Message "Alert rule removal completed" -Level INFO
    }
}

<#
.SYNOPSIS
    Tests an alert rule.
.DESCRIPTION
    Tests an alert rule by simulating a condition that would trigger the alert.
.PARAMETER Name
    The name of the alert rule to test.
.PARAMETER ResourceGroup
    The name of the resource group. If not specified, the resource group from the configuration will be used.
.EXAMPLE
    Test-AlertRule -Name "HighCPU-vm1"
#>

function Test-AlertRule {
    [CmdletBinding()]
    param (
        [Parameter(Mandatory = $true)]
        [string]$Name,
        
        [Parameter(Mandatory = $false)]
        [string]$ResourceGroup
    )
    
    begin {
        # Import required modules
        Import-Module HomeLab.Core
        Import-Module HomeLab.Azure
        
        # Get configuration
        $config = Get-Configuration
        
        # Log function start
        Write-Log -Message "Testing alert rule '$Name'" -Level INFO
        
        # If no resource group is specified, use the one from config
        if (-not $ResourceGroup) {
            $ResourceGroup = "$($config.projectName)-$($config.env)-$($config.locationCode)-rg"
        }
    }
    
    process {
        try {
            # Check if Azure is connected
            if (-not (Test-AzureConnection)) {
                Connect-AzureAccount
            }
            
            # Check if the resource group exists
            $rgExists = Get-AzResourceGroup -Name $ResourceGroup -ErrorAction SilentlyContinue
            if (-not $rgExists) {
                throw "Resource group $ResourceGroup does not exist"
            }
            
            # Check if the alert rule exists
            $alertRule = Get-AzMetricAlertRuleV2 -ResourceGroupName $ResourceGroup -Name $Name -ErrorAction SilentlyContinue
            
            if (-not $alertRule) {
                throw "Alert rule '$Name' not found in resource group '$ResourceGroup'"
            }
            
            # Get alert rule details
            $alertDetails = [PSCustomObject]@{
                Name = $alertRule.Name
                Description = $alertRule.Description
                Severity = $alertRule.Severity
                Enabled = $alertRule.Enabled
                Frequency = $alertRule.EvaluationFrequency
                WindowSize = $alertRule.WindowSize
                TargetResourceIds = $alertRule.Scopes
                Criteria = $alertRule.Criteria
                ActionGroups = $alertRule.Actions.ActionGroupId
                TestResult = "Cannot test alert rule directly. Please check the following information to verify the alert rule is configured correctly."
                Recommendations = @()
            }
            
            # Check if the alert rule is enabled
            if (-not $alertRule.Enabled) {
                $alertDetails.Recommendations += "Alert rule is disabled. Enable it to receive alerts."
            }
            
            # Check if the alert rule has action groups
            if (-not $alertRule.Actions -or $alertRule.Actions.Count -eq 0) {
                $alertDetails.Recommendations += "No action groups configured. Add an action group to receive notifications."
            }
            
            # Check if the target resources exist
            foreach ($scope in $alertRule.Scopes) {
                $resource = Get-AzResource -ResourceId $scope -ErrorAction SilentlyContinue
                if (-not $resource) {
                    $alertDetails.Recommendations += "Target resource with ID '$scope' does not exist."
                }
            }
            
            # Check if the metric exists for the target resources
            if ($alertRule.Criteria.GetType().Name -eq "MetricAlertRuleV2Criteria") {
                $metricName = $alertRule.Criteria.MetricName
                
                foreach ($scope in $alertRule.Scopes) {
                    $metricDefinitions = Get-AzMetricDefinition -ResourceId $scope -ErrorAction SilentlyContinue
                    $metricExists = $metricDefinitions | Where-Object { $_.Name.Value -eq $metricName }
                    
                    if (-not $metricExists) {
                        $alertDetails.Recommendations += "Metric '$metricName' does not exist for resource with ID '$scope'."
                    }
                }
            }
            
            # Add information from config if available
            if ($config.alertRules -and $config.alertRules[$Name]) {
                $alertDetails | Add-Member -MemberType NoteProperty -Name "ConfigInfo" -Value $config.alertRules[$Name]
            }
            
            return $alertDetails
        }
        catch {
            Write-Log -Message "Failed to test alert rule: $_" -Level ERROR
            throw $_
        }
    }
    
    end {
        # Log function end
        Write-Log -Message "Alert rule test completed" -Level INFO
    }
}