Just finished setting up the monitoring of:
- Queries > 100000 visited entries as full record
- Queries > 1000ms as full record
- Queries per IP > 100 pro 600s as count per ClientIP and 10min block
- Queries pro Stunde > 3500 just count
Not sure if they are best practice or just current need but I thought a write-up could help somebody.
I started with a request from a AD administrator to see what “my” monitoring can do for him.
A internet search let me to https://learn.microsoft.com/en-US/troubleshoot/windows-server/identity/event1644reader-analyze-ldap-query-performance and a plan emerged.
In the end, I got him to setup and deposit 50MB of 1644 events in *.evtx files, one per ADC, every hour into a share (D:\ADEventLogs
) on a Windows server with the Icinga2 agent and the Linuxfabrik monitoring plugins installed.
Next I mangled the PowerShell script by ripping out the excel parts, rewriting the timestamps for SQLite compatibility, replaced | with ¦ for Icinga compatibility and removed the timestamp from the generated CSVs filenames.
Function mcConvertClient ($Client)
{ # Extract IP or Port from client string [0] based on [1]
$mcReturn='Unknown'
[regex]$regexIPV6 = '(?<IP>\[[A-Fa-f0-9:%]{1,}\])\:(?<Port>([0-9]+))'
[regex]$regexIPV4 = '((?<IP>(\d{1,3}\.){3}\d{1,3})\:(?<Port>[0-9]+))|(?<IP>(\d{1,3}\.){3}\d{1,3})'
[regex]$KnownClient = '(?<IP>([G-Z])\w+)'
switch -regex ($Client[0])
{ # $client[1] is either IP or Port
$regexIPV6 { $mcReturn = $matches.($client[1]) }
$regexIPV4 { $mcReturn = $matches.($client[1]) }
$KnownClient { $mcReturn = $matches.($client[1]) }
}
$mcReturn
}
#----Main---------
$PSDefaultParameterValues['Out-File:Encoding'] = 'utf8'
cls
Write-Host "Event1644ReaderIcinga: https://community.icinga.com/t/monitor-ldap-queries-on-active-directory-controllers/13066"
Write-Host "Started at $(Get-Date)"
$mcScriptPath = Split-Path ((Get-Variable MyInvocation -Scope 0).Value).MyCommand.Path
$mcEventPath = 'D:\ADEventLogs'
# Convert evtx to csv.'
Get-ChildItem -Path $mcEventPath | Where {$_.extension -eq '.evtx'} | ForEach ($_) { #Loop through *.evtx
Write-Host ('Reading ',$_.Name)
$mcEvents = Get-WinEvent -FilterHashtable @{Path=$mcEventPath+'\'+$_.Name; LogName="Directory Service"; id="1644" } -ErrorAction SilentlyContinue
If ($mcEvents -ne $null)
{ #dump 1644 event to corresponding CSV
$mcHeader = 0
$mcOutFile = $mcEventPath+'\1644-'+$_.BaseName.Substring(0, 12)+'.csv' # ADHOSTNAME01
Write-Host (' Event 1644 found, generating', $mcOutFile)
$mc1644 = New-Object System.Object
ForEach ($mcEvent in $mcEvents)
{ #Convert 1644 event to fields
$mc1644 | Add-Member -MemberType NoteProperty -Name LDAPServer -force -Value $mcEvent.MachineName
$mc1644 | Add-Member -MemberType NoteProperty -Name TimeGenerated -force -Value $mcEvent.TimeCreated.ToUniversalTime().ToString("o")
$mc1644 | Add-Member -MemberType NoteProperty -Name ClientIP -force -Value (mcConvertClient($mcEvent.Properties[4].Value,'IP'))
$mc1644 | Add-Member -MemberType NoteProperty -Name ClientPort -force -Value (mcConvertClient($mcEvent.Properties[4].Value,'Port'))
$mc1644 | Add-Member -MemberType NoteProperty -Name StartingNode -force -Value $mcEvent.Properties[0].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name Filter -force -Value $mcEvent.Properties[1].Value.replace('|','¦')
$mc1644 | Add-Member -MemberType NoteProperty -Name SearchScope -force -Value $mcEvent.Properties[5]
$mc1644 | Add-Member -MemberType NoteProperty -Name AttributeSelection -force -Value $mcEvent.Properties[6].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name ServerControls -force -Value $mcEvent.Properties[7].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name VisitedEntries -force -Value $mcEvent.Properties[2].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name ReturnedEntries -force -Value $mcEvent.Properties[3].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name UsedIndexes -force -Value $mcEvent.Properties[8].Value # KB 2800945 or later has extra data fields.
$mc1644 | Add-Member -MemberType NoteProperty -Name PagesReferenced -force -Value $mcEvent.Properties[9].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name PagesReadFromDisk -force -Value $mcEvent.Properties[10].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name PagesPreReadFromDisk -force -Value $mcEvent.Properties[11].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name CleanPagesModified -force -Value $mcEvent.Properties[12].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name DirtyPagesModified -force -Value $mcEvent.Properties[13].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name SearchTimeMS -force -Value $mcEvent.Properties[14].Value
$mc1644 | Add-Member -MemberType NoteProperty -Name AttributesPreventingOptimization -force -Value $mcEvent.Properties[15].Value
if ($mcHeader -eq 0)
{ # Create header for CSV output
ConvertTo-Csv $mc1644 -NoTypeInformation | Out-File $mcOutFile
$mcHeader = 1
}
else
{ # normal content for later 1644 events.
$mcTmp = (ConvertTo-Csv $mc1644 -NoTypeInformation)
write $mcTmp[1] | Out-File $mcOutFile -Append
}
}
}
else {
Write-Host (' No event 1644 found.')
}
}
Write-Host 'Script completed. $(Get-Date)'
As the AD-Admin drops the *.evtx on minute 59, I scheduled the execution of the script at minute 00.
I then setup the Icinga services, utilizing the https://github.com/Linuxfabrik/monitoring-plugins/tree/main/check-plugins/csv-values check via the director.
/**
* Service Set: Server_Admin AD Event Log
*
* assign where "ad-eventlog" in host.vars.custom_tags
*/
apply Service "CSV Values - LDAP Query Time - ADHOSTNAME01 - Queries per IP > 100 pro 600s" {
import "tpl-service-csv-values-windows"
max_check_attempts = "1"
check_interval = 70m
retry_interval = 70m
check_timeout = 30s
notes = "Wenn von einer IP innerhalb 10 Min. mehr als 100 Abfragen kommen / Info Mail. IP Adresse und Anzahl Anfragen.\r\n\r\nLimitierungen:\r\n\r\nMeldungen kommen ca. 30min (Verarbeitungszeit ) bis 1h30min (ältester berücksichtigter Eintrag) verzögert. Dies ist bedingt durch Lieferung der Events um Minute 59 in Blöcken a 50MB, Job started Minute 00 und die Verarbeitung dauert ca. 30min.\r\nDeshalb \"datetime(TimeGenerated) >= datetime('now', '-1.5 Hour')\"\r\n\r\nWeiter sind die 100 Queries gezählt innerhalb von (Minuten / 10) Blöcken und nicht für jeden Eintrag zurück geschaut ob es jetzt für die letzten 10 Minuten > 100 Queries sind."
assign where "ad-eventlog" in host.vars.custom_tags
vars.criticality = "B"
vars.csv_values_windows_columns_query = "LDAPServer TEXT,TimeGenerated DATE,ClientIP TEXT,ClientPort INTEGER,StartingNode TEXT,Filter TEXT,SearchScope TEXT,AttributeSelection TEXT,ServerControls TEXT,VisitedEntries INTEGER,ReturnedEntries INTEGER,UsedIndexes TEXT,PagesReferenced TEXT,PagesReadFromDisk INTEGER,PagesPreReadFromDisk INTEGER,CleanPagesModified INTEGER,DirtyPagesModified INTEGER,SearchTimeMS INTEGER,AttributesPreventingOptimization TEXT"
vars.csv_values_windows_filename = "D:\\ADEventLogs\\1644-ADHOSTNAME01.csv"
vars.csv_values_windows_warning = "0"
vars.csv_values_windows_warning_query = "select ClientIP, strftime('%H', TimeGenerated) as 'Hour (UTC)', (strftime('%M',TimeGenerated)/10) as 'TimeSlot (10min)', count(*) as Queries from data group by ClientIP, strftime('%H', TimeGenerated), (strftime('%M', TimeGenerated) / 10) having Queries > 100 and datetime(TimeGenerated) >= datetime('now', '-1.5 Hour') order by Queries DESC"
vars.teams = [ "Server_Admin" ]
zone = "master"
import DirectorOverrideTemplate
}
apply Service "CSV Values - LDAP Query Time - ADHOSTNAME01 - Queries pro Stunde > 3500" {
import "tpl-service-csv-values-windows"
max_check_attempts = "1"
check_interval = 70m
retry_interval = 70m
check_timeout = 30s
notes = "Wenn innerhalb 1h mehr als 3500 Abfragen auf einem DC stattfinden / Name DC und Anzahl Abfragen.\r\n\r\nLimitierungen:\r\nMeldungen kommen ca. 30min (Verarbeitungszeit ) bis 1h30min (ältester berücksichtigter Eintrag) verzögert. Dies ist bedingt durch Lieferung der Events um Minute 59 in Blöcken a 50MB, Job started Minute 00 und die Verarbeitung dauert ca. 30min.\r\nDeshalb \"datetime(TimeGenerated) >= datetime('now', '-1.5 Hour')\""
assign where "ad-eventlog" in host.vars.custom_tags
vars.criticality = "B"
vars.csv_values_windows_columns_query = "LDAPServer TEXT,TimeGenerated DATE,ClientIP TEXT,ClientPort INTEGER,StartingNode TEXT,Filter TEXT,SearchScope TEXT,AttributeSelection TEXT,ServerControls TEXT,VisitedEntries INTEGER,ReturnedEntries INTEGER,UsedIndexes TEXT,PagesReferenced TEXT,PagesReadFromDisk INTEGER,PagesPreReadFromDisk INTEGER,CleanPagesModified INTEGER,DirtyPagesModified INTEGER,SearchTimeMS INTEGER,AttributesPreventingOptimization TEXT"
vars.csv_values_windows_filename = "D:\\ADEventLogs\\1644-ADHOSTNAME01.csv"
vars.csv_values_windows_warning = "3500"
vars.csv_values_windows_warning_query = "select count(*) as Queries from data having datetime(TimeGenerated) >= datetime('now', '-1.5 Hour') "
vars.teams = [ "Server_Admin" ]
zone = "master"
import DirectorOverrideTemplate
}
apply Service "CSV Values - LDAP Query Time - ADHOSTNAME01 - Queries > 100000 visited entries" {
import "tpl-service-csv-values-windows"
max_check_attempts = "1"
check_interval = 70m
retry_interval = 70m
check_timeout = 30s
notes = "Wenn Visited entries höher Anzahl 100000 ist / Info Mail. ganzer Inhalt des Eventlog Eintragens.\r\n\r\nLimitierungen:\r\nMeldungen kommen ca. 30min (Verarbeitungszeit ) bis 1h30min (ältester berücksichtigter Eintrag) verzögert. Dies ist bedingt durch Lieferung der Events um Minute 59 in Blöcken a 50MB, Job started Minute 00 und die Verarbeitung dauert ca. 30min.\r\nDeshalb \"datetime(TimeGenerated) >= datetime('now', '-1.5 Hour')\""
assign where "ad-eventlog" in host.vars.custom_tags
vars.criticality = "B"
vars.csv_values_windows_columns_query = "LDAPServer TEXT,TimeGenerated DATE,ClientIP TEXT,ClientPort INTEGER,StartingNode TEXT,Filter TEXT,SearchScope TEXT,AttributeSelection TEXT,ServerControls TEXT,VisitedEntries INTEGER,ReturnedEntries INTEGER,UsedIndexes TEXT,PagesReferenced TEXT,PagesReadFromDisk INTEGER,PagesPreReadFromDisk INTEGER,CleanPagesModified INTEGER,DirtyPagesModified INTEGER,SearchTimeMS INTEGER,AttributesPreventingOptimization TEXT"
vars.csv_values_windows_filename = "D:\\ADEventLogs\\1644-ADHOSTNAME01.csv"
vars.csv_values_windows_warning = "0"
vars.csv_values_windows_warning_query = "select * from data where datetime(TimeGenerated) >= datetime('now', '-1.5 Hour') and VisitedEntries > 100000 order by VisitedEntries DESC"
vars.teams = [ "Server_Admin" ]
zone = "master"
import DirectorOverrideTemplate
}
apply Service "CSV Values - LDAP Query Time - ADHOSTNAME01 - Queries > 1000ms" {
import "tpl-service-csv-values-windows"
max_check_attempts = "1"
check_interval = 70m
retry_interval = 70m
check_timeout = 30s
notes = "Wenn eine Abfrage mehr als 1000 ms benötigt / Info Mail. ganzer Inhalt des Eventlog Eintrages.\r\n\r\nLimitierungen:\r\nMeldungen kommen ca. 30min (Verarbeitungszeit ) bis 1h30min (ältester berücksichtigter Eintrag) verzögert. Dies ist bedingt durch Lieferung der Events um Minute 59 in Blöcken a 50MB, Job started Minute 00 und die Verarbeitung dauert ca. 30min.\r\nDeshalb \"datetime(TimeGenerated) >= datetime('now', '-1.5 Hour')\""
assign where "ad-eventlog" in host.vars.custom_tags
vars.criticality = "B"
vars.csv_values_windows_columns_query = "LDAPServer TEXT,TimeGenerated TEXT,ClientIP TEXT,ClientPort INTEGER,StartingNode TEXT,Filter TEXT,SearchScope TEXT,AttributeSelection TEXT,ServerControls TEXT,VisitedEntries INTEGER,ReturnedEntries INTEGER,UsedIndexes TEXT,PagesReferenced TEXT,PagesReadFromDisk INTEGER,PagesPreReadFromDisk INTEGER,CleanPagesModified INTEGER,DirtyPagesModified INTEGER,SearchTimeMS INTEGER,AttributesPreventingOptimization TEXT"
vars.csv_values_windows_filename = "D:\\ADEventLogs\\1644-ADHOSTNAME01.csv"
vars.csv_values_windows_warning = "0"
vars.csv_values_windows_warning_query = "select * from data where datetime(TimeGenerated) >= datetime('now', '-1.5 Hour') and SearchTimeMS >1000 order by SearchTimeMS DESC"
vars.teams = [ "Server_Admin" ]
zone = "master"
import DirectorOverrideTemplate
}
Repeat and adapt above services for additional AD controllers.
I wrote a second script to trigger the checks at a precise time via the Icinga2 API’s actions/rescheduling-checks
and scheduled that at minute 30, as converting to CSV takes about 6-8min per *.evtx file. Because of this and the hourly drops, I set having datetime(TimeGenerated) >= datetime('now', '-1.5 Hour')
or where datetime(TimeGenerated) >= datetime('now', '-1.5 Hour')
limits.
Function scheduleIcingaCheck ($service)
{
# https://icinga.com/docs/icinga-2/latest/doc/12-icinga2-api/#reschedule-check
$headers=@{}
$headers.Add("Content-Type", "application/json")
$headers.Add("Accept", " application/json")
$headers.Add("Authorization", "Basic AUTH")
$Body = '{
"type": "Service",
"service": "' + $service + '",
"pretty": true
}'
Write-Host($Body)
Invoke-RestMethod -Uri 'https://icinga.example.com:5665/v1/actions/reschedule-check' -Method POST -Headers $headers -ContentType 'application/json' -Body $Body
}
#----Main---------
$PSDefaultParameterValues['Out-File:Encoding'] = 'utf8'
cls
Write-Host "Event1644ReaderIcinga: https://community.icinga.com/t/monitor-ldap-queries-on-active-directory-controllers/13066"
Write-Host "Started at $(Get-Date)"
$mcScriptPath = Split-Path ((Get-Variable MyInvocation -Scope 0).Value).MyCommand.Path
$mcEventPath = 'D:\ADEventLogs'
# Request Check via REST-API
Get-ChildItem -Path $mcEventPath | Where {$_.extension -eq '.evtx'} | ForEach ($_) { #Loop through *.evtx
$ad_hostname = $_.BaseName.Substring(0, 12)
$hostname = 'windows-with-icinga-agent.example.com'
# -wenn von einer IP innerhalb 10 Min. mehr als 100 Abfragen kommen / Info Mail. IP Adresse und Anzahl Anfragen
# -wenn eine abfrage mehr als 1000 ms benötigt / Info Mail. ganzer Inhalt des Eventlog Eintragens
# -wenn in innerhalb 1h mehr als 3500 Abfragen auf einem DC stattfinden / Name DC und Anzahl Abfragen
# -wenn Visited entries höher Anzahl 100000 ist / Info Mail. ganzer Inhalt des Eventlog Eintragens
$service_prefix = $hostname + '!' + 'CSV Values - LDAP Query Time - '
$checks = @('Queries per IP > 100 pro 600s','Queries > 1000ms','Queries pro Stunde > 3500', 'Queries > 100000 visited entries')
ForEach ($check in $checks){
$service = $service_prefix + $ad_hostname.ToUpper() + ' - ' + $check
Write-Host (' Requesting Icinga Check: ' + $service)
scheduleIcingaCheck($service)
}
}
Write-Host 'Script completed. $(Get-Date)'
Thanks again @linuxfabrik for writing the csv-values
check the way you did and not what I originally requested