modules/SdnDiag.Health/SdnDiag.Health.psm1
# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. Using module .\..\SdnDiag.Common\SdnDiag.Common.Helper.psm1 Using module .\SdnDiag.Health.Helper.psm1 Import-Module $PSScriptRoot\SdnDiag.Health.Helper.psm1 Import-Module $PSScriptRoot\..\SdnDiag.Common\SdnDiag.Common.psm1 Import-Module $PSScriptRoot\..\SdnDiag.Utilities\SdnDiag.Utilities.psm1 # create local variable to store configuration data New-Variable -Name 'SdnDiagnostics_Health' -Scope 'Script' -Force -Value @{ Cache = @{} } ##### FUNCTIONS AUTO-POPULATED BELOW THIS LINE DURING BUILD ##### function Test-EncapOverhead { <# .SYNOPSIS Retrieves the VMSwitch across servers in the dataplane to confirm that the network interfaces support EncapOverhead or JumboPackets and that the settings are configured as expected #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty ) [int]$encapOverheadExpectedValue = 160 [int]$jumboPacketExpectedValue = 1674 # this is default 1514 MTU + 160 encap overhead $sdnHealthObject = [SdnHealth]::new() $array = @() try { "Validating the network interfaces across the SDN dataplane support Encap Overhead or Jumbo Packets" | Trace-Output $encapOverheadResults = Invoke-PSRemoteCommand -ComputerName $SdnEnvironmentObject.ComputerName -Credential $Credential -Scriptblock {Get-SdnNetAdapterEncapOverheadConfig} if($null -eq $encapOverheadResults){ $sdnHealthObject.Result = 'FAIL' } else { foreach($object in ($encapOverheadResults | Group-Object -Property PSComputerName)){ foreach($interface in $object.Group){ "[{0}] {1}" -f $object.Name, ($interface | Out-String -Width 4096) | Trace-Output -Level:Verbose if($interface.EncapOverheadEnabled -eq $false -or $interface.EncapOverheadValue -lt $encapOverheadExpectedValue){ "EncapOverhead settings for {0} on {1} are disabled or not configured correctly" -f $interface.NetworkInterface, $object.Name | Trace-Output -Level:Verbose $encapDisabled = $true } if($interface.JumboPacketEnabled -eq $false -or $interface.JumboPacketValue -lt $jumboPacketExpectedValue){ "JumboPacket settings for {0} on {1} are disabled or not configured correctly" -f $interface.NetworkInterface, $object.Name | Trace-Output -Level:Verbose $jumboPacketDisabled = $true } # if both encapoverhead and jumbo packets are not set, this is indication the physical network cannot support VXLAN encapsulation # and as such, environment would experience intermittent packet loss if ($encapDisabled -and $jumboPacketDisabled) { $sdnHealthObject.Result = 'FAIL' "EncapOverhead and JumboPacket for interface {0} on {1} are disabled or not configured correctly." -f $interface.NetworkInterface, $object.Name | Trace-Output -Level:Exception } $array += $interface } } $sdnHealthObject.Properties = $array } return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-HostRootStoreNonRootCert { <# .SYNOPSIS Validate the Cert in Host's Root CA Store to detect if any Non Root Cert exist #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $array = @() try { "Validating Certificates under Root CA Store" | Trace-Output $scriptBlock = { $nonRootCerts = @() $rootCerts = Get-ChildItem Cert:LocalMachine\Root foreach ($rootCert in $rootCerts) { if ($rootCert.Subject -ne $rootCert.Issuer) { $certInfo = [PSCustomObject]@{ Thumbprint = $rootCert.Thumbprint Subject = $rootCert.Subject Issuer = $rootCert.Issuer } $nonRootCerts += $certInfo } } return $nonRootCerts } foreach($node in $SdnEnvironmentObject.ComputerName){ $nonRootCerts = Invoke-PSRemoteCommand -ComputerName $node -Credential $Credential -ScriptBlock $scriptBlock -PassThru # If any node have Non Root Certs in Trusted Root Store. Issue detected. if($nonRootCerts.Count -gt 0){ $sdnHealthObject.Result = 'FAIL' $object = [PSCustomObject]@{ ComputerName = $node NonRootCerts = $nonRootCerts } $array += $object } } $sdnHealthObject.Properties = $array return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-NetworkControllerCertCredential { <# .SYNOPSIS Query the NC Cert credential used to connect to SDN Servers, ensure cert exist. #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [System.String[]]$NetworkController, [Parameter(Mandatory = $true)] [Uri]$NcUri, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $NcRestCredential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $arrayList = [System.Collections.ArrayList]::new() try { "Validate Cert Credential resource of SDN Servers. Ensure Cert exist on each of the Network Controller " | Trace-Output # enumerate each server's conection->credential object into the array $servers = Get-SdnServer -NcUri $NcUri.AbsoluteUri -Credential $NcRestCredential $serverCredentialRefs = [System.Collections.Hashtable]::new() foreach ($server in $servers) { # find the first connection with credential type of X509Certificate $serverConnection = $server.properties.connections | Where-Object { $_.credentialType -eq "X509Certificate" } | Select-Object -First 1; if ($null -ne $serverConnection) { $credRef = $serverConnection.credential[0].resourceRef "Adding credential {0} for server {1} for validation" -f $credRef, $serverConnection.managementAddresses[0] | Trace-Output -Level:Verbose if ($null -ne $credRef) { if (-NOT $serverCredentialRefs.ContainsKey($credRef)) { $serverList = [System.Collections.ArrayList]::new() $serverCredentialRefs.Add($credRef, $serverList) } [void]$serverCredentialRefs[$credRef].Add($server) } } } # iterate the credential object to validate certificate on each NC foreach ($credRef in $serverCredentialRefs.Keys) { $credObj = Get-SdnResource -NcUri $NcUri.AbsoluteUri -Credential $NcRestCredential -ResourceRef $credRef if ($null -ne $credObj) { $thumbPrint = $credObj.properties.value $scriptBlock = { param([Parameter(Position = 0)][String]$param1) if (-NOT (Test-Path -Path Cert:\LocalMachine\My\$param1)) { return $false } else { return $true } } # invoke command on each NC seperately so to record which NC missing certificate foreach ($nc in $NetworkController) { "Validating certificate [{0}] on NC {1}" -f $thumbPrint, $nc | Trace-Output -Level:Verbose $result = Invoke-PSRemoteCommand -ComputerName $nc -Credential $Credential -ScriptBlock $scriptBlock -ArgumentList $thumbPrint if ($result -ne $true) { # if any NC missing certificate, it indicate issue detected $sdnHealthObject.Result = 'FAIL' $object = [PSCustomObject]@{ NetworkController = $nc CertificateMissing = $thumbPrint AffectedServers = $serverCredentialRefs[$credRef] } [void]$arrayList.Add($object) } } } } $sdnHealthObject.Properties = $arrayList return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-NetworkInterfaceAPIDuplicateMacAddress { <# .SYNOPSIS Validate there are no adapters within the Network Controller Network Interfaces API that are duplicate. #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $NcRestCredential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $array = @() try { "Validate no duplicate MAC addresses for network interfaces in Network Controller" | Trace-Output $networkInterfaces = Get-SdnResource -NcUri $SdnEnvironmentObject.NcUrl.AbsoluteUri -Resource:NetworkInterfaces -Credential $NcRestCredential if($null -eq $networkInterfaces){ throw New-Object System.NullReferenceException("No network interfaces returned from Network Controller") } $duplicateObjects = $networkInterfaces.properties | Group-Object -Property privateMacAddress | Where-Object {$_.Count -ge 2} if($duplicateObjects){ $sdnHealthObject.Result = 'FAIL' # since there can be multiple grouped objects, we need to enumerate each duplicate group foreach($obj in $duplicateObjects){ $duplicateInterfaces = $networkInterfaces | Where-Object {$_.properties.privateMacAddress -eq $obj.Name} $array += $duplicateInterfaces "Located {0} virtual machines associated with MAC address {1}:`r`n`n{2}`r`n" -f $obj.Count, $obj.Name, ` ($duplicateInterfaces ` | Select-Object @{n="ResourceRef";e={"`t$($_.resourceRef)"}} ` | Select-Object -ExpandProperty ResourceRef ` | Out-String ` ) | Trace-Output -Level:Warning } } $sdnHealthObject.Properties = $array return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-SdnKINetworkInterfacePlacement { <# .SYNOPSIS Validates the placement of Network Controller Network Interface API placement compared to Hypervisor. #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [Uri]$NcUri, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $NcRestCredential = [System.Management.Automation.PSCredential]::Empty ) function Test-NetworkInterfaceLocation { param ( [Parameter(Mandatory = $true)] [System.Object]$NetworkControllerNetworkInterfaces, [Parameter(Mandatory = $true)] [System.Object]$VMNetworkAdapters ) $networkInterfaces = [System.Collections.ArrayList]::new() foreach ($netAdapter in $VMNetworkAdapters) { $netInterface = $NetworkControllerNetworkInterfaces | Where-Object {$_.properties.privateMacAddress -eq $netAdapter.MacAddress} # if we do not find the MAC address within NC Network Interfaces, skip the placement validation if ($null -eq $netInterface) { continue } # if we detect duplicate MAC addresses within the NC Network Interfaces API, skip placement validation if ($netInterface.resourceRef.Count -ge 2){ continue } # locate the server resource reference for the network interface # in some instances, this may be null/empty, so need to handle those instances to prevent script failures if($netInterface.properties.server.resourceRef){ [string]$server = $netInterface.properties.server.resourceRef.Replace('/servers/','') } else { [string]$server = 'NullServerReference' } if($netAdapter.ComputerName -ne $server){ $result = [PSCustomObject]@{ nc_host = $server hyperv_host = $netAdapter.ComputerName vmName = $netAdapter.VmName macAddress = $netAdapter.MacAddress resourceMetadata = $netInterface.resourceMetadata } [void]$networkInterfaces.Add($result) } } return $networkInterfaces } try { "Validate placement of network interfaces between Network Controller and Hypervisor" | Trace-Output $issueDetected = $false $arrayList = [System.Collections.ArrayList]::new() $servers = Get-SdnServer -NcUri $NcUri.AbsoluteUri -ManagementAddressOnly -Credential $NcRestCredential $networkInterfaces = Get-SdnResource -NcUri $ncUri.AbsoluteUri -Resource:NetworkInterfaces -Credential $NcRestCredential $networkAdapters = Get-SdnVMNetworkAdapter -ComputerName $servers -Credential $Credential -AsJob -Timeout 600 -PassThru $driftedNetworkInterfaces = Test-NetworkInterfaceLocation -NetworkControllerNetworkInterfaces $networkInterfaces -VMNetworkAdapters $networkAdapters if ($driftedNetworkInterfaces) { # we want to focus on instances where network controller api does not have a valid server reference to where the mac address resides # this may be false positve if the VM had live migrated recently and nchostagent has not updated network controller if ($driftedNetworkInterfaces.nc_host -icontains 'NullServerReference') { foreach ($result in $driftedNetworkInterfaces) { "{0}: Network Controller is not aware virtual machine {1} exists on {2}`n`tThis may be a transient exception that can be safely ignored if no issues reported with virtual machine." ` -f $result.macAddress, $result.vmName, $result.hyperv_host | Trace-Output -Level:Warning } } else { # in this scenario, the serverref and hypervisor server values are mismatched indicating # we have a hard drift between network controller and dataplane, which would result in stale/outdated policies foreach ($result in $driftedNetworkInterfaces) { "{0}: Network Controller believes {1} exists on {2} while hypervisor is reporting it exists on {3}" ` -f $result.macAddress, $result.vmName, $result.nc_host, $result.hyperv_host | Trace-Output -Level:Warning [void]$arrayList.Add($result) $issueDetected = $true } } } return [PSCustomObject]@{ Result = $issueDetected Properties = $arrayList } } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-ProviderNetwork { <# .SYNOPSIS Performs ICMP tests across the computers defined to confirm that jumbo packets are able to successfully traverse between the provider addresses on each host #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $array = @() try { "Validating Provider Address network has connectivity across the SDN dataplane" | Trace-Output $providerAddresses = (Get-SdnProviderAddress -ComputerName $SdnEnvironmentObject.ComputerName -Credential $Credential).ProviderAddress if ($null -eq $providerAddresses){ "No provider addresses were found on the hosts specified. This may be expected if tenant workloads have not yet been deployed." | Trace-Output -Level:Warning } if ($providerAddresses) { $connectivityResults = Invoke-PSRemoteCommand -ComputerName $SdnEnvironmentObject.ComputerName -Credential $Credential -Scriptblock { param([Parameter(Position = 0)][String[]]$param1) Test-SdnProviderAddressConnectivity -ProviderAddress $param1 } -ArgumentList $providerAddresses foreach($computer in $connectivityResults | Group-Object PSComputerName){ foreach($destinationAddress in $computer.Group){ $jumboPacketResult = $destinationAddress | Where-Object {$_.BufferSize -gt 1472} $standardPacketResult = $destinationAddress | Where-Object {$_.BufferSize -le 1472} if($destinationAddress.Status -ine 'Success'){ $sdnHealthObject.Result = 'FAIL' # if both jumbo and standard icmp tests fails, indicates a failure in the physical network if($jumboPacketResult.Status -ieq 'Failure' -and $standardPacketResult.Status -ieq 'Failure'){ $sdnHealthObject.Remediation = "Ensure ICMP enabled. If issue persists, investigate network connectivity." "Cannot ping {0} from {1} ({2})." ` -f $destinationAddress[0].DestinationAddress, $computer.Name, $destinationAddress[0].SourceAddress | Trace-Output -Level:Exception } # if standard MTU was success but jumbo MTU was failure, indication that jumbo packets or encap overhead has not been setup and configured # either on the physical nic or within the physical switches between the provider addresses if($jumboPacketResult.Status -ieq 'Failure' -and $standardPacketResult.Status -ieq 'Success'){ $sdnHealthObject.Remediation = "Ensure physical switches and network interfaces support 1660 byte payload using Jumbo Packets or EncapOverhead" "Cannot send jumbo packets to {0} from {1} ({2})." ` -f $destinationAddress[0].DestinationAddress, $computer.Name, $destinationAddress[0].SourceAddress | Trace-Output -Level:Exception } } else { "Successfully sent jumbo packet to {0} from {1} ({2})" ` -f $destinationAddress[0].DestinationAddress, $computer.Name, $destinationAddress[0].SourceAddress | Trace-Output } $array += $destinationAddress } } } $sdnHealthObject.Properties = $array return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-ResourceConfigurationState { <# .SYNOPSIS Validate that the configurationState and provisioningState is Success #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $NcRestCredential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $array = @() try { "Validating configuration and provisioning state of {0}" -f $SdnEnvironmentObject.Role.ResourceName | Trace-Output $sdnResources = Get-SdnResource -NcUri $SdnEnvironmentObject.NcUrl.AbsoluteUri -Resource $SdnEnvironmentObject.Role.ResourceName -Credential $NcRestCredential foreach($object in $sdnResources){ # examine the provisioning state of the resources and display errors to the screen if ($object.properties.provisioningState -ine 'Succeeded') { $sdnHealthObject.Result = 'FAIL' $sdnHealthObject.Remediation = 'Examine the Network Controller logs to determine why resource provisioning failed and take corrective measures.' "{0} is reporting provisioning state: {1}" -f $object.resourceRef, $object.properties.provisioningState | Trace-Output -Level:Exception } # examine the configuration state of the resources and display errors to the screen elseif($object.properties.configurationState.status -ine 'Success'){ # gateways leverage an Uninitialized for when a gateway is passive and not hosting any virtual gateways # in this scenario, we can skip this status event if($object.properties.configurationState.status -ieq 'Uninitialized'){ continue } $sdnHealthObject.Result = 'FAIL' $sdnHealthObject.Remediation = 'Examine the detailedInfo property and take corrective action.' "{0} is reporting configurationState status: {1}" -f $object.resourceRef, $object.properties.configurationState.Status | Trace-Output -Level:Exception } $details = [PSCustomObject]@{ resourceRef = $object.resourceRef provisioningState = $object.properties.provisioningState configurationState = $object.properties.configurationState } $array += $details } $sdnHealthObject.Properties = $array return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-ScheduledTaskEnabled { <# .SYNOPSIS Ensures the scheduled task responsible for etl compression is enabled and running #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $array = @() $scriptBlock = { try { $result = Get-ScheduledTask -TaskName 'SDN Diagnostics Task' -ErrorAction Stop return [PSCustomObject]@{ TaskName = $result.TaskName State = $result.State.ToString() } } catch { return [PSCustomObject]@{ TaskName = 'SDN Diagnostics Task' State = 'Not Available' } } } try { $scheduledTaskReady = Invoke-PSRemoteCommand -ComputerName $SdnEnvironmentObject.ComputerName -Credential $Credential -ScriptBlock $scriptBlock -AsJob -PassThru foreach ($result in $scheduledTaskReady) { if ($result.State -ine 'Ready' -and $result.State -ine 'Running') { "SDN Diagnostics Task state is {0} on {1}, which may result in uncontrolled log growth" -f $result.State, $result.PSComputerName | Trace-Output -Level:Exception $sdnHealthObject.Result = 'FAIL' } $array += [PSCustomObject]@{ State = $result.State Computer = $result.PSComputerName } } $sdnHealthObject.Properties = $array return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-ServerHostId { <# .SYNOPSIS Queries the NCHostAgent HostID registry key value across the hypervisor hosts to ensure the HostID matches known InstanceID results from NC Servers API. #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $NcRestCredential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $array = @() try { "Validating Server HostID registry matches known InstanceIDs from Network Controller Servers API." | Trace-Output $scriptBlock = { $result = Get-ItemProperty -Path 'HKLM:\SYSTEM\CurrentControlSet\Services\NcHostAgent\Parameters' -Name 'HostId' -ErrorAction SilentlyContinue return $result.HostID } $servers = Get-SdnResource -NcUri $SdnEnvironmentObject.NcUrl.AbsoluteUri -Resource $SdnEnvironmentObject.Role.ResourceName -Credential $NcRestCredential $hostId = Invoke-PSRemoteCommand -ComputerName $SdnEnvironmentObject.ComputerName -Credential $Credential -ScriptBlock $scriptBlock -AsJob -PassThru foreach($id in $hostId){ if($id -inotin $servers.instanceId){ "{0}'s HostID {1} does not match known instanceID results in Network Controller Server REST API" -f $id.PSComputerName, $id | Trace-Output -Level:Warning $sdnHealthObject.Result = 'FAIL' $object = [PSCustomObject]@{ HostID = $id Computer = $id.PSComputerName } $array += $object } else { "{0}'s HostID {1} matches known InstanceID in Network Controller Server REST API" -f $id.PSComputerName, $id | Trace-Output -Level:Verbose } } $sdnHealthObject.Properties = $array return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-ServiceFabricPartitionDatabaseSize { <# .SYNOPSIS Validate the Service Fabric partition size for each of the services running on Network Controller. #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $array = @() try { "Validate the size of the Service Fabric Partition Databases for Network Controller services" | Trace-Output $ncNodes = Get-SdnServiceFabricNode -NetworkController $SdnEnvironmentObject.ComputerName -Credential $credential if($null -eq $ncNodes){ throw New-Object System.NullReferenceException("Unable to retrieve service fabric nodes") } foreach($node in $ncNodes){ $ncApp = Invoke-SdnServiceFabricCommand -NetworkController $SdnEnvironmentObject.ComputerName -Credential $Credential -ScriptBlock { param([Parameter(Position = 0)][String]$param1) Get-ServiceFabricDeployedApplication -ApplicationName 'fabric:/NetworkController' -NodeName $param1 } -ArgumentList @($node.NodeName.ToString()) $ncAppWorkDir = $ncApp.WorkDirectory if($null -eq $ncAppWorkDir){ throw New-Object System.NullReferenceException("Unable to retrieve working directory path") } # Only stateful service have the database file $ncServices = Get-SdnServiceFabricService -NetworkController $SdnEnvironmentObject.ComputerName -Credential $Credential | Where-Object {$_.ServiceKind -eq "Stateful"} foreach ($ncService in $ncServices){ $replica = Get-SdnServiceFabricReplica -NetworkController $SdnEnvironmentObject.ComputerName -ServiceName $ncService.ServiceName -Credential $Credential | Where-Object {$_.NodeName -eq $node.NodeName} $imosStorePath = Join-Path -Path $ncAppWorkDir -ChildPath "P_$($replica.PartitionId)\R_$($replica.ReplicaId)\ImosStore" $imosStoreFile = Invoke-PSRemoteCommand -ComputerName $node.NodeName -Credential $Credential -ScriptBlock { param([Parameter(Position = 0)][String]$param1) if (Test-Path -Path $param1) { return (Get-Item -Path $param1) } else { return $null } } -ArgumentList @($imosStorePath) if($null -ne $imosStoreFile){ $formatedByteSize = Format-ByteSize -Bytes $imosStoreFile.Length $imosInfo = [PSCustomObject]@{ Node = $node.NodeName Service = $ncService.ServiceName ImosSize = $formatedByteSize.GB } # if the imos database file exceeds 4GB, want to indicate failure as it should not grow to be larger than this size if([float]$formatedByteSize.GB -gt 4){ "[{0}] Service {1} is reporting {2} GB in size" -f $node.NodeName, $ncService.ServiceName, $formatedByteSize.GB | Trace-Output -Level:Warning $sdnHealthObject.Result = 'FAIL' } else { "[{0}] Service {1} is reporting {2} GB in size" -f $node.NodeName, $ncService.ServiceName, $formatedByteSize.GB | Trace-Output -Level:Verbose } $array += $imosInfo } else { "No ImosStore file for service {0} found on node {1} from {2}" -f $ncService.ServiceName, $node.NodeName, $imosStorePath | Trace-Output -Level:Warning } } } $sdnHealthObject.Properties = $array return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-ServiceState { <# .SYNOPSIS Confirms that critical services for gateway are running #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $array = @() $serviceStateResults = @() try { [string[]]$services = $SdnEnvironmentObject.Role.Properties.Services.Keys "Validating {0} service state for {1}" -f ($services -join ', '), ($SdnEnvironmentObject.ComputerName -join ', ') | Trace-Output $scriptBlock = { param([Parameter(Position = 0)][String]$param1) $result = Get-Service -Name $param1 -ErrorAction SilentlyContinue return $result } foreach ($service in $services) { $serviceStateResults += Invoke-PSRemoteCommand -ComputerName $SdnEnvironmentObject.ComputerName -Credential $Credential -Scriptblock $scriptBlock -ArgumentList $service } foreach($result in $serviceStateResults){ $array += $result if($result.Status -ine 'Running'){ $sdnHealthObject.Result = 'FAIL' $sdnHealthObject.Remediation = 'Start the service(s).' "{0} is {1} on {2}" -f $result.Name, $result.Status, $result.PSComputerName | Trace-Output -Level:Warning } else { "{0} is {1} on {2}" -f $result.Name, $result.Status, $result.PSComputerName | Trace-Output -Level:Verbose } } $sdnHealthObject.Properties = $array return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-VfpDuplicatePort { <# .SYNOPSIS Validate there are no ports within VFP layer that may have duplicate MAC addresses. #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $array = @() try { "Validate no duplicate MAC addresses for ports within Virtual Filtering Platform (VFP)" | Trace-Output $vfpPorts = Get-SdnVfpVmSwitchPort -ComputerName $SdnEnvironmentObject.ComputerName -Credential $Credential $duplicateObjects = $vfpPorts | Where-Object {$_.MACaddress -ne '00-00-00-00-00-00' -and $null -ne $_.MacAddress} | Group-Object -Property MacAddress | Where-Object {$_.Count -ge 2} if($duplicateObjects){ $array += $duplicateObjects $sdnHealthObject.Result = 'FAIL' # since there can be multiple grouped objects, we need to enumerate each duplicate group foreach($obj in $duplicateObjects){ "Located {0} VFP ports associated with {1}:`r`n`n{2}`r`n" -f $obj.Count, $obj.Name, ` ($obj.Group ` | Select-Object @{n="Portname";e={"`t$($_.Portname)"}} ` | Select-Object -ExpandProperty Portname ` | Out-String ` ) | Trace-Output -Level:Warning } } $sdnHealthObject.Properties = $array return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Test-VMNetAdapterDuplicateMacAddress { <# .SYNOPSIS Validate there are no adapters within hyper-v dataplane that may have duplicate MAC addresses. #> [CmdletBinding()] param ( [Parameter(Mandatory = $true)] [SdnFabricHealthObject]$SdnEnvironmentObject, [Parameter(Mandatory = $false)] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty ) $sdnHealthObject = [SdnHealth]::new() $array = @() try { "Validate no duplicate MAC addresses for network adapters within Hyper-V" | Trace-Output $vmNetAdapters = Get-SdnVMNetworkAdapter -ComputerName $SdnEnvironmentObject.ComputerName -AsJob -PassThru -Timeout 900 -Credential $Credential $duplicateObjects = $vmNetAdapters | Group-Object -Property MacAddress | Where-Object {$_.Count -ge 2} if($duplicateObjects){ $array += $duplicateObjects $sdnHealthObject.Result = 'FAIL' # since there can be multiple grouped objects, we need to enumerate each duplicate group foreach($obj in $duplicateObjects){ "Located {0} virtual machines associated with MAC address {1}:`r`n`n{2}`r`n" -f $obj.Count, $obj.Name, ` ($obj.Group ` | Select-Object @{n="VMName";e={"`t$($_.VMName)"}} ` | Select-Object -ExpandProperty VMName ` | Out-String ` ) | Trace-Output -Level:Warning } } $sdnHealthObject.Properties = $array return $sdnHealthObject } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Debug-SdnFabricInfrastructure { <# .SYNOPSIS Executes a series of fabric validation tests to validate the state and health of the underlying components within the SDN fabric. .PARAMETER NetworkController Specifies the name or IP address of the network controller node on which this cmdlet operates. The parameter is optional if running on network controller node. .PARAMETER ComputerName Type the NetBIOS name, an IP address, or a fully qualified domain name of one or more remote computers. .PARAMETER Role The specific SDN role(s) to perform tests and validations for. If ommitted, defaults to all roles. .PARAMETER Credential Specifies a user account that has permission to perform this action. The default is the current user. .PARAMETER NcRestCredential Specifies a user account that has permission to access the northbound NC API interface. The default is the current user. .EXAMPLE PS> Debug-SdnFabricInfrastructure .EXAMPLE PS> Debug-SdnFabricInfrastructure -NetworkController 'NC01' -Credential (Get-Credential) -NcRestCredential (Get-Credential) #> [CmdletBinding(DefaultParameterSetName = 'Role')] param ( [Parameter(Mandatory = $false, ParameterSetName = 'Role')] [Parameter(Mandatory = $false, ParameterSetName = 'ComputerName')] [System.String]$NetworkController = $(HostName), [Parameter(Mandatory = $false, ParameterSetName = 'Role')] [SdnDiag.Common.Helper.SdnRoles[]]$Role = ('Gateway','LoadBalancerMux','NetworkController','Server'), [Parameter(Mandatory = $true, ParameterSetName = 'ComputerName')] [System.String[]]$ComputerName, [Parameter(Mandatory = $false, ParameterSetName = 'Role')] [Parameter(Mandatory = $false, ParameterSetName = 'ComputerName')] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $Credential = [System.Management.Automation.PSCredential]::Empty, [Parameter(Mandatory = $false, ParameterSetName = 'Role')] [Parameter(Mandatory = $false, ParameterSetName = 'ComputerName')] [System.Management.Automation.PSCredential] [System.Management.Automation.Credential()] $NcRestCredential = [System.Management.Automation.PSCredential]::Empty ) $script:SdnDiagnostics_Health.Cache = $null try { if (-NOT ($PSBoundParameters.ContainsKey('NetworkController'))) { $config = Get-SdnModuleConfiguration -Role 'NetworkController' $confirmFeatures = Confirm-RequiredFeaturesInstalled -Name $config.windowsFeature if (-NOT ($confirmFeatures)) { "The current machine is not a NetworkController, run this on NetworkController or use -NetworkController parameter to specify one" | Trace-Output -Level:Warning return # don't throw exception, since this is a controlled scenario and we do not need stack exception tracing } } $environmentInfo = Get-SdnInfrastructureInfo -NetworkController $NetworkController -Credential $Credential -NcRestCredential $NcRestCredential if($null -eq $environmentInfo){ throw New-Object System.NullReferenceException("Unable to retrieve environment details") } # if we opted to specify the ComputerName rather than Role, we need to determine which role # the computer names are associated with if ($PSCmdlet.ParameterSetName -ieq 'ComputerName') { $Role = @() $ComputerName | ForEach-Object { $computerRole = $_ | Get-SdnRole -EnvironmentInfo $environmentInfo if ($computerRole) { $Role += $computerRole } } } $Role = $Role | Sort-Object -Unique foreach ($object in $Role) { "Processing tests for {0} role" -f $object | Trace-Output $sdnFabricDetails = [SdnFabricHealthObject]::new() $sdnFabricDetails.NcUrl = $environmentInfo.NcUrl $config = Get-SdnModuleConfiguration -Role $object.ToString() $sdnFabricDetails.Role = $config if ($ComputerName) { $sdnFabricDetails.ComputerName = $ComputerName } else { $sdnFabricDetails.ComputerName = $environmentInfo[$object.ToString()] } $restApiParams = @{ SdnEnvironmentObject = $sdnFabricDetails NcRestCredential = $NcRestCredential } $computerCredParams = @{ SdnEnvironmentObject = $sdnFabricDetails Credential = $Credential } $computerCredAndRestApiParams = @{ SdnEnvironmentObject = $sdnFabricDetails NcRestCredential = $NcRestCredential Credential = $Credential } # perform the health validations for the appropriate roles that were specified directly # or determined via which ComputerNames were defined switch ($object) { 'Gateway' { $objectArray += @{ Gateway = @( Test-ResourceConfigurationState @restApiParams Test-ServiceState @computerCredParams Test-ScheduledTaskEnabled @computerCredParams ) } } 'LoadBalancerMux' { $objectArray += @{ LoadBalancerMux = @( Test-ResourceConfigurationState @restApiParams Test-ServiceState @computerCredParams Test-ScheduledTaskEnabled @computerCredParams ) } } 'NetworkController' { $objectArray += @{ NetworkController = @( Test-ServiceState @computerCredParams Test-ServiceFabricPartitionDatabaseSize @computerCredParams Test-NetworkInterfaceAPIDuplicateMacAddress @restApiParams Test-ScheduledTaskEnabled @computerCredParams ) } } 'Server' { $objectArray += @{ Server = @( Test-EncapOverhead @computerCredParams Test-ProviderNetwork @computerCredParams Test-ResourceConfigurationState @restApiParams Test-ServiceState @computerCredParams Test-ServerHostId @computerCredAndRestApiParams Test-VfpDuplicatePort @computerCredParams Test-VMNetAdapterDuplicateMacAddress @computerCredParams Test-HostRootStoreNonRootCert @computerCredParams Test-ScheduledTaskEnabled @computerCredParams ) } } } } if ($objectArray) { $script:SdnDiagnostics_Health.Cache = $objectArray "Results for fabric health have been saved to cache for further analysis. Use 'Get-SdnFabricInfrastructureResult' to examine the results." | Trace-Output return $script:SdnDiagnostics_Health.Cache } } catch { "{0}`n{1}" -f $_.Exception, $_.ScriptStackTrace | Trace-Output -Level:Error } } function Get-SdnFabricInfrastructureResult { <# .SYNOPSIS Returns the results that have been saved to cache as part of running Debug-SdnFabricInfrastructure. .PARAMETER Role The name of the SDN role that you want to return test results from within the cache. .PARAMETER Name The name of the test results you want to examine. .EXAMPLE PS> Get-SdnFabricInfrastructureResult .EXAMPLE PS> Get-SdnFabricInfrastructureResult -Role Server .EXAMPLE PS> Get-SdnFabricInfrastructureResult -Role Server -Name 'Test-ServiceState' #> [CmdletBinding()] param ( [Parameter(Mandatory = $false)] [SdnDiag.Common.Helper.SdnRoles]$Role, [Parameter(Mandatory = $false)] [System.String]$Name ) $cacheResults = $script:SdnDiagnostics_Health.Cache if ($PSBoundParameters.ContainsKey('Role')) { if ($cacheResults) { $cacheResults = $cacheResults.$($Role.ToString()) } } if ($PSBoundParameters.ContainsKey('Name')) { if ($cacheResults) { $cacheResults = $cacheResults | Where-Object {$_.Name -eq $Name} } } return $cacheResults } |