I don’t know check_cluster but maybe I did something similar with the Icinga2 DSL.
This checks if all services with the same name on all nodes of a cluster host are OK:
object CheckCommand "116-cmd-more-then-halve" {
import "plugin-check-command"
command = [ "/usr/lib64/nagios/plugins/dummy" ]
timeout = 10s
arguments += {
"--message" = {
required = false
value = {{
var output_status = ""
var up_count = 0
var down_count = 0
var cluster_nodes = macro("$116_cluster_nodes$")
var more_then_halve_service_name = macro("$116-cluster-more-then-halve-service$")
for (node in cluster_nodes) {
if (get_service(node, more_then_halve_service_name).state > 0) {
down_count += 1
} else {
up_count += 1
}
}
if (up_count > down_count) {
output_status = "OK: "
}
if (up_count == down_count) {
output_status = "WARNING: "
}
if (up_count < down_count) {
output_status = "CRITICAL: "
}
var output = output_status
for (node in cluster_nodes) {
output += node + ": " + more_then_halve_service_name + ": " + get_service(node, more_then_halve_service_name).last_check_result.output + " "
}
output += " | count_of_alive_" + more_then_halve_service_name +"="+up_count+";" + string((up_count + down_count) / 2 + 1) + ":;" + string((up_count + down_count) / 2 ) + ":;0;" + string(up_count + down_count)
log(output)
return output
}}
}
"--state" = {{
/* Icinga 2 does not export DSL function bodies via API */
}}
}
}
The dummy check comes from the Linuxfabrik’s monitoring-plugins and just reports back, what you send it - it was necessary because of a limitation in the director.
It looks like this in the director:
same for the host check: