Powershell Code:
# # LUNAR DUST - home grown server/service monitor - sends out email when services defined in related .CSV config file are down # Author: Neal Walters - Nov 2013 # (Lunar Dust is a play on words of the monitor name "Solar Wind") # [string[]] $users = "john@abc.com",'"fred@abc.com" # List of users to email your report to (separate by comma) $fromemail = "FromEmail@abc.com" $SMTPserver = "YourEmailRelayServer.com" #enter your own SMTP server DNS name / IP address here $YNTraceSuccess = "N" #setting to "Y" will create more trace/debug to the ServiceTestTrace.txt file, setting to "N" only shows servers/services that were down $TraceFilename = "D:\scripts\ServiceMonitorTrace.txt" $LocalServerName = "MyServerName" #used to determine if we should check remote server or not on GetService set-item trustedhosts $LocalServerName #Get arguments flexibly, in either order. #one parms is a "Y" or "N" to indicate to send an email, even when no servers are in error status #another optional parm is the filename of the CSV to read, if omitted, a default filename is used. if ($args.Length -gt 0) { if ($args[0] -eq "Y" -or $args[0] -eq "N") { $IsEmailOn = $args[1] } if ($args[0].Length -gt 4) { $csvFilename = $args[0] } } if ($args.Length -gt 1) { if ($args[1] -eq "Y" -or $args[1] -eq "N") { $IsEmailOn = $args[1] } if ($args[1].Length -gt 4) { $csvFilename = $args[0] } } if ([string]::IsNullOrEmpty($csvFilename)) { $csvFilename = "D:\Scripts\ServerMonitorConfig.csv" Write-Host "Setting csfFileName=$csvFileName" } Write-Host "csvFilename=$csvFilename" $csv = Import-Csv $csvFilename -Header @("IsActive","Environment","Category","ServerName","ServiceName","Criticality") $HTMLMessage="" $TextMessage="" $HTMLMessage="<h2>Server/Service Status</h2><table border='1'><tr><th>Environment</th><th>Category</th><th>ServerName</th><th>ServiceName</th><th>Status</th><th>Process Started Date/Time</th><th>UserName</th></tr>" $CriticalErrorCount = 0 $ErrorCount = 0 $ServerCount = 0 foreach ($line in $csv) { if ($line.IsActive -eq "Active") { $reportStatus = "" $ServerCount = $ServerCount + 1 #$Service = (get-service -Name $line.ServiceName -ComputerName $line.ServerName) #this is slower than above, but it gives us the processId which we can use to find out what time the service/process started write-host "Verifying: " $line.ServerName $line.ServiceName $myDate = Get-Date if ($YNTraceSuccess = "Y") { Add-Content $TraceFilename "$myDate TRC01 $($line.ServerName) $($line.ServiceName)" } $error.clear() #clear any prior errors, otherwise same error may repeat over-and-over in trace if ($LocalServerName -eq $line.ServerName) { # see if not using -ComputerName on local computer avoids the "service not found" error Add-Content $TraceFilename "$myDate TRCW1 using local computer " $Service = (get-wmiobject win32_service -filter "name = '$($line.ServiceName)'") } else { Add-Content $TraceFilename "$myDate TRCW2 using remote computer $($line.ServerName) not eq $LocalServerName" $Service = (get-wmiobject win32_service -ComputerName $line.ServerName -filter "name = '$($line.ServiceName)'") } if ($error -ne $null) { Write-Host "----> $($error[0].Exception) " Add-Content $TraceFilename "$myDate TRCE1 $($error[0].Exception)" } if ($Service -eq $null) { $reportStatus = "Service Not Found: name = '$($line.ServiceName)'" $trColor = "Yellow" $ErrorCount = $ErrorCount + 1 $CriticalErrorCount = $CriticalErrorCount + 1 $CreationDate = "NA" Write-Host "----> $reportStatus " Add-Content $TraceFilename "$myDate TRC02 $reportStatus" } else { #Write-Host "Service Exists" #$status = $Service.Status #if ($status -eq "Running") #this was the check when using get-service instead of get-wmiobject win32_service) $reportStatus = $Service.State if ($Service.Started -eq "True") { #$reportStatus = "Up" $trColor = "White" # when service is running, then we can lookup the ProcessId to get the Userid and CreationDate (Time the service was started) $ServicePID = $Service.ProcessID #Write-Host "Process id: $ServicePID" $ProcessInfo = Get-WmiObject -Class Win32_Process -ComputerName $line.ServerName -Filter "ProcessID='$ServicePID'" -ea 0 $CreationDate = $ProcessInfo | % { $_.ConvertToDateTime( $_.CreationDate )} } else { Write-Host "Down Service.Started=$($Service.Started) " Write-Host "Status=$($Service.Status) State=$($Service.State)" #$reportStatus = $Service.State $trColor = "Orange" $ErrorCount = $ErrorCount + 1 if ($line.Criticality -eq "Error") { #switch from orange to yellow background $trColor = "Yellow" $CriticalErrorCount = $CriticalErrorCount + 1 } #Write-Host "down status=$status" #result was empty string or null $CreationDate = "NA" } } #Write-Host "test=$reportStatus" $TextMessage += "$($line.Environment) $($line.Category) $($line.ServerName) $($line.ServiceName) $reportStatus $CreationDate $($Service.StartName)`r`n" #build the TR and TD Cells of the HTML Table $HTMLMessage += "<tr bgcolor='$trColor'>" $HTMLMessage += "<td>$($line.Environment)</td>" $HTMLMessage += "<td>$($line.Category)</td>" $HTMLMessage += "<td>$($line.ServerName)</td>" $HTMLMessage += "<td>$($line.ServiceName)</td>" $HTMLMessage += "<td>$reportStatus</td>" $HTMLMessage += "<td>$CreationDate</td>" $HTMLMessage += "<td>$($Service.StartName)</td>" $HTMLMessage += "</tr>`r`n" } else { Write-Host "Skipping InActive " $line.ServerName $line.ServiceName } } Write-Host '------' #$HTMLMessage = "<h3>Critical Server Count=$CriticalErrorCount Total Error Count=$ErrorCount</h3>" + $HTMLMessage + "</table>" + "<h3>Yellow is critical, Orange is not critical.</h3>" $HTMLMessage = "<h3>Server-Count=$ServerCount Critical-Count=$CriticalErrorCount Total-Error-Count=$ErrorCount</h3>$HTMLMessage</table><h3>Yellow is critical, Orange is not critical.</h3>" $mydate = Get-Date Write-Host "Date=$myDate" $HTMLMessage = "<h3>$mydate</h3>$HTMLMessage" Write-Host $TextMessage Write-Host "`r`n Server-Count=$ServerCount Critical-Count=$CriticalErrorCount Total-Error-Count=$ErrorCount" Write-Host "Date=$myDate" $emailSubject = "QT Service Email was Requested" if ($CriticalErrorCount -gt 0) { $emailSubject = "Critical QT Server Down Alert: There are $CriticalErrorCount critical services down" } #always send email when one 1 or more critical errors are found, or when the $IsEmailOn parm is set to "Always" which sends email regardless of error count if ($CriticalErrorCount -gt 0 -or $IsEmailOn -eq "Always") { send-mailmessage -from $fromemail -to $users -subject $emailSubject -BodyAsHTML -body $HTMLMessage -priority High -smtpServer $SMTPserver Write-Host "Alert Email Sent with Subject=$emailSubject" Add-Content $TraceFilename "$myDate TRC99 Alert Email Sent with Subject=$emailSubject" }
Create a CSV like this, with a list of the servers to monitor.
Column 1
Column 2 is an arbitrary environment name. Column 3 is a category of server functionality (could potentially be used to route email to the group in charge of those types of servers, e.g. IIS, BizTalk, SQL…)
Column 3 is Server/Machine Name.
Column 4 is the Windows Service name. Use “LanmanServer” just to know if the machine is up and running.
Column 5 indicates whether this should cause a critical error or just a warning (when the service is down).
"Active","PROD","SQL","SqlServer01","MSSQLSERVER","Error" "Active","PROD","SQL","SqlServer01","SQLSERVERAGENT","Error" "Active","PROD","SQL","SqlServer01","MSDTC","Error","Error" "Active","PROD","BizTalk","BizTalkServer01","MSDTC","Error" "Active","QA","BizTalk","BizTalkServer03","LanmanServer","Error" "Active","QA","BizTalk","BizTalkServer03","MSDTC","Error" "Active","QA","BizTalk","BizTalkServer03","BTSSvc$Application_Default","Error"
Schedule a .bat or .cmd file such as follows, pointing to the desired CSV as defined above.
powershell -command "& 'D:\Scripts\ServerMonitor.ps1'" d:\Scripts\ServerMonitorConfig.csv