I use the monitoring-plugins/check-plugins/snmp at main · Linuxfabrik/monitoring-plugins · GitHub plugin with the following csv files.
acm.csv
OID,Name,Re-Calc,Unit Label,WARN,CRIT,Show in 1st Line,Report Change as
SNMPv2-SMI::enterprises.6889.2.73.8.1.4.6.0,Server1 Duplication State,,,,,,
SNMPv2-SMI::enterprises.6889.2.73.8.1.4.9.0,Server2 Duplication State,,,,,,
SNMPv2-SMI::enterprises.6889.2.73.8.1.4.4.0,Server1 Name,,,,,,
SNMPv2-SMI::enterprises.6889.2.73.8.1.4.7.0,Server2 Name,,,,,,
SNMPv2-SMI::enterprises.6889.2.73.8.1.4.13.0,StandBY Server Refreshed,,,,,,
SNMPv2-SMI::enterprises.6889.2.73.8.1.20.6.0,License Limit,int(value),,,,,
SNMPv2-SMI::enterprises.6889.2.73.8.1.20.4.0,Licenses used,int(value),,,,,
,warnPercent,95,%,,,,
,critPercent,98,%,,,,
,License Usage,"round(values['Licenses used'] * 100.0 / values['License Limit'],1)",%,value > values['warnPercent'],value > values['critPercent'],True,
acm_freshness.csv
OID,Name,Re-Calc,Unit Label,WARN,CRIT,Show in 1st Line,Report Change as
SNMPv2-SMI::enterprises.6889.2.73.8.1.4.13.0,StandBY Server Refreshed,,,,,,WARN
acm_license.csv
OID,Name,Re-Calc,Unit Label,WARN,CRIT,Show in 1st Line,Report Change as
SNMPv2-SMI::enterprises.6889.2.73.8.1.20.6.0,License Limit,int(value),,,,,
SNMPv2-SMI::enterprises.6889.2.73.8.1.20.4.0,Licenses used,int(value),,,,,
,warnPercent,95,%,,,,
,critPercent,98,%,,,,
,License Usage,"round(values['Licenses used'] * 100.0 / values['License Limit'],1)",%,value > values['warnPercent'],value > values['critPercent'],True,
OID,Name,Re-Calc,Unit Label,WARN,CRIT,Show in 1st Line,Report Change as
SNMPv2-SMI::enterprises.6889.2.73.8.1.20.6.0,License Limit,int(value),,,,,
SNMPv2-SMI::enterprises.6889.2.73.8.1.20.4.0,Licenses used,int(value),,,,,
,warnPercent,95,%,,,,
,critPercent,98,%,,,,
,License Usage,"round(values['Licenses used'] * 100.0 / values['License Limit'],1)",%,value > values['warnPercent'],value > values['critPercent'],True,
acm_server.csv
OID,Name,Re-Calc,Unit Label,WARN,CRIT,Show in 1st Line,Report Change as
SNMPv2-SMI::enterprises.6889.2.73.8.1.4.6.0,Server1 Duplication State,,,,,,WARN
SNMPv2-SMI::enterprises.6889.2.73.8.1.4.9.0,Server2 Duplication State,,,,,,WARN
SNMPv2-SMI::enterprises.6889.2.73.8.1.4.4.0,Server1 Name,,,,,,
SNMPv2-SMI::enterprises.6889.2.73.8.1.4.7.0,Server2 Name,,,,,,
sbc_advanced_licence_in_use.csv
OID,Name,Re-Calc,Unit Label,WARN,CRIT,Show in 1st Line,Report Change as
SNMPv2-SMI::enterprises.6889.2.77.11.4.0,SBC Advanced License in Use,,,,,,WARN
sbc_std_licence_in_use.csv
OID,Name,Re-Calc,Unit Label,WARN,CRIT,Show in 1st Line,Report Change as
SNMPv2-SMI::enterprises.6889.2.77.11.2.0,SBC Standard License in Use,int(value),,value > 100,,True,
sbc_total_active_calls.csv
OID,Name,Re-Calc,Unit Label,WARN,CRIT,Show in 1st Line,Report Change as
SNMPv2-SMI::enterprises.6889.2.77.1.3.1.10.0,StandBY Server Refreshed,,,,,,WARN
I have no clue how useful this is as I don’t administer the Avaya system but this is what they wanted.
I also used some of my DSL magic to check over the simple checks aggregated on the cluster hosts.
object CheckCommand "116-cmd-min-halve" {
import "plugin-check-command"
command = [ "/usr/lib64/nagios/plugins/dummy" ]
timeout = 10s
arguments += {
"--message" = {
required = false
value = {{
var output_status = ""
var up_count = 0
var down_count = 0
var cluster_nodes = macro("$116_cluster_nodes$")
var min_halve_service_name = macro("$116-cluster-min-halve-service$")
for (node in cluster_nodes) {
if (get_service(node, min_halve_service_name).state > 0) {
down_count += 1
} else {
up_count += 1
}
}
if (up_count >= down_count) {
output_status = "OK: "
}
if (up_count < down_count) {
output_status = "CRITICAL: "
}
var output = output_status
for (node in cluster_nodes) {
output += node + ": " + min_halve_service_name + ": " + get_service(node, min_halve_service_name).last_check_result.output + " "
}
output += " | count_of_alive_" + min_halve_service_name +"="+up_count+";" + string((up_count + down_count) / 2 + 1) + ":;" + string((up_count + down_count) / 2 ) + ":;0;" + string(up_count + down_count)
log(output)
return output
}}
}
"--state" = {{
var up_count = 0
var down_count = 0
var cluster_nodes = macro("$116_cluster_nodes$")
var min_halve_service_name = macro("$116-cluster-min-halve-service$")
for (node in cluster_nodes) {
if (get_service(node, min_halve_service_name).state > 0) {
down_count += 1
} else {
up_count += 1
}
}
if (up_count >= down_count) {
return "ok" // halve up then down -> OK
}
if (up_count < down_count) {
return "crit" // less up then down -> Critical
}
return "unk" // should never reach this
}}
}
}