Meta-Check: Fail if all other services fail

We have several cloud servers running which get dynamically started or stopped on demand to save money. Thus it’s normal that some servers are down.

Is it possible to check something like “check all those services, be OK if OK_SERVICES_COUNT > X”?
Is there are check that checks results of a set of other checks?

Hi & welcome to the icinga community,

You can create a check as described here or use check_cluster.

1 Like

Perfect. Works for hosts and services. Thank you.

If someone is interrested. This is my solution to monitor a hostgroup of dynamic servers:

Host:

object Host "fis-bamboo-agent-gitc-linux-cluster" {
  check_command = "dummy"

  vars.dummy_state = {{
    // Get all UP hosts in bamboo-agent hostgroup except this
    var up_nodes_filter = function(node) use(host) {
      "bamboo-agents" in node.groups && !match(host.name, node.__name) && node.state == 0
    }
    var up_nodes = get_objects(Host).filter(up_nodes_filter)

    if (len(up_nodes) >= 1) {
      return 0;
    } else {
      return 2;
    }
  }}

  vars.dummy_text = {{
    // Get all hosts in bamboo-agent hostgroup except this
    var nodes_filter = function(node) use(host) {
      "bamboo-agents" in node.groups && !match(host.name, node.__name)
    }
    var nodes = get_objects(Host).filter(nodes_filter)
    var up_nodes = nodes.filter(node => node.state == 0)

    var output = "Cluster (" + len(up_nodes) + "/" + len(nodes) + " up):\n"
    for (node in nodes) {
      output += node.name + ": " + node.last_check_result.output + "\n"
    }

    return output
  }}
}

Services:

template Service "template-service-cluster" {
  check_command = "dummy"

  vars.dummy_state = {{

    // Get all UP hosts in bamboo-agent hostgroup except this
    var service_name = macro("$name$").replace("-cluster", "")
    var up_services_filter = function(node) use(host, service_name) {
      "bamboo-agents" in node.groups && !match(host.name, node.__name) && get_service(node.name, service_name).state == 0
    }
    var up_nodes = get_objects(Host).filter(up_services_filter)

    if (len(up_nodes) >= 1) {
      return 0
    } else {
      return 2
    }
  }}

  vars.dummy_text = {{
    // Get all hosts in bamboo-agent hostgroup except this
    var service_name = macro("$name$").replace("-cluster", "")
    var services_filter = function(node) use(host) {
      "bamboo-agents" in node.groups && !match(host.name, node.__name)
    }
    var up_service_filter = function(node) use(service_name) {
      get_service(node.name, service_name).state == 0
    }
    var nodes = get_objects(Host).filter(services_filter)
    var up_nodes = nodes.filter(up_service_filter)

    var output = "Cluster (" + len(up_nodes) + "/" + len(nodes) + " up):\n"
    for (node in nodes) {
      output += node.name + ": " + get_service(node.name, service_name).last_check_result.output + "\n"
    }

    return output
  }}
}

apply Service "procs-bamboo-agent-gitc-cluster" {
  import "template-service-cluster"

  assign where host.name == "fis-bamboo-agent-gitc-linux-cluster"
}

apply Service "disk-space-gitc-cluster" {
  import "template-service-cluster"

  assign where host.name == "fis-bamboo-agent-gitc-linux-cluster"
}

apply Service "memory-gitc-cluster" {
  import "template-service-cluster"

  assign where host.name == "fis-bamboo-agent-gitc-linux-cluster"
}