Calculating a state over multiple services

Hi,

Just got this

Calculating a state over multiple services (icinga.com)

I try the example to understand a little bit more and the example “as is” and got the error

Exception occurred while checking ‘combined-host!combined-http’: Error: Argument is not a callable object.

Would appreciate a little bit of help to debug the example…

Hello @10RUPTiV!

For @nhilverling it worked so your config must be different. Please share it.

Best,
A/K

Like I said, I use the example as is!

for (id in range(20)) {
    object Host "http-host-" + id {
        check_command = "dummy"
        vars.dummy_state = 0
        vars.check_http = true

        // Needed for our combined service
        vars.http_cluster = "http-cluster-1"
    }
}

apply Service "http" {
    check_command = "random"
    assign where host.vars.check_http
}

object Host "combined-host" {
   check_command = "dummy"
   vars.dummy_state = 0
}

function stateToString(state) {
    if (state == 0) {
        return "OK"
    } else if (state == 1) {
        return "WARNING"
    } else if (state == 2) {
        return "CRITICAL"
    } else if (state == 3) {
        return "UNKNOWN"
    }
}

function getServiceStatesByHttpCluster(cluster) {
    // Prepare a dictionary for counting every service in our cluster and sorting them by state
    var services = {
        count = 0
        serviceStates = {
            "0" = [],
            "1" = [],
            "2" = [],
            "3" = [],
        }
    }

    // Iterate over every service object
    for (var service in (get_objects(Service))) {
        // Check if the http_cluster of the services host matches our cluster
        if (service.host.vars.http_cluster == cluster) {
            // Increase our service count by one
            services.count += 1

            // Get the the services current state
            var state = service.last_check_result.state

            // Add the full service name ("host!service") to corresponding array in services.serviceStates
            services.serviceStates[state].add(service.host.name + "!" + service.name)
        }
    }

    // Return our "services" dictionary
    return services
}

object Service "combined-http" {
    check_command = "dummy"
    check_interval = 1m
    retry_interval = 30s
    host_name = "combined-host"

    // The http cluster we want to have combined states from (we've also set this variable on our services)
    vars.http_cluster = "http-cluster-1"
    // The minimum ratio of services that have to be in state OK (0.5 means at least 50% need to be OK)
    vars.ok_min_ratio = 0.5

    // Store our current service object in variable to use it in function scope below
    var service = this

    // Functions stored in the variables dummy_state and dummy_text are evaluated on every execute of the check.
    vars.dummy_state = function() use (service) {
        // Get our services dictionary by calling our previously defined function
        var services = getServiceStatesByHttpCluster(service.vars.http_cluster)

        // Calculate the ratio of services with state OK compared to the total amount of services
        var ratio = services.serviceStates[0].len() / states.count

        // If the ratio is less then what we defined as our minimum, return CRITICAL as state, OK otherwise
        if (ratio < service.vars.ok_min_ratio) {
            return 2
        } else {
            return 0
        }
    }
    vars.dummy_text = function() use (service) {
        // Get our services dictionary by calling our previously defined function
        var services = getServiceStatesByHttpCluster(service.vars.http_cluster)

        // Calculate the ratio of services with state OK compared to the total amount of services
        var ratio = services.serviceStates[0].len() / states.count

        // Define an empty string variable which will later contain our status output
        var text = ""

        // If the ratio is less then what we defined as our minimum, add "CRITICAL: " to our output, "OK: " otherwise
        if (ratio < service.vars.ok_min_ratio) {
            text = "CRITICAL: "
        } else {
            text = "OK: "
        }

        // Add the amount of services in state OK and the total amount of services to our output (e.g. "5/20")
        text += services.serviceStates[0].len() + "/" + states.count + " OK\n"

        // Iterate over all state types and print the services with those states
        for (state in range(0, 3)) {
            // Check if we even have services with this state
            if (services.serviceStates[state].len() > 0) {
                // Add the state name to our output
                text += stateToString(state) + ":\n"

                // Iterate over all the services with this state and output their name
                for (serviceName in services.serviceStates[state]) {
                    text += serviceName + "\n"
                }
                text += "\n"
            }
        }

        // Return the final output
        return text
    }
}

at the beginning, try using it with our own value but got the same error, so I decided to give it a try using the exact example and making sure it’s working and got the same error too!

service.last_check_result (and so service.last_check_result.state) may be null if pending, so you need to append ||0 to that line to ensure the following add is a callable object.

Also I’d substitute function getServiceStatesByHttpCluster(cluster) with globals.getServiceStatesByHttpCluster = function(cluster).

1 Like

@Al2Klimov
it’s a little bit better now, but got a new error!


Exception occurred while checking 'combined-host!combined-http': Error: Error while evaluating expression: Tried to access undefined script variable 'states'
Location: in /var/lib/icinga2/api/zones/zone1/_etc/testcluster.conf: 86:55-86:60
/var/lib/icinga2/api/zones/zone1/_etc/testcluster.conf(84): 
/var/lib/icinga2/api/zones/zone1/_etc/testcluster.conf(85):         // Calculate the ratio of services with state OK compared to the total amount of services
/var/lib/icinga2/api/zones/zone1/_etc/testcluster.conf(86):         var ratio = services.serviceStates[0].len() / states.count
                                                                                                                                        ^^^^^^
/var/lib/icinga2/api/zones/zone1/_etc/testcluster.conf(87): 
/var/lib/icinga2/api/zones/zone1/_etc/testcluster.conf(88):         // If the ratio is less then what we defined as our minimum, return CRITICAL as state, OK otherwise

	(0) Resolving macros for string '$dummy_state$'
	(1) Executing check for object 'combined-host!combined-http'

That variable should be “services”.

Got an another error after…

@nhilverling is it possible to validate the example please ?

Noah doesn’t work for Icinga anymore, but we’ll have a look whether we can fix the blogpost :slight_smile:

1 Like

And which one exactly?

1 Like

With all the changes, I got

Exception occurred while checking ‘combined-host!combined-http’: Error: Argument is not a callable object.

testcluster.conf(119): text += stateToString(state) + ":

And the last version of the code is:

for (id in range(20)) {
    object Host "http-host-" + id {
        check_command = "dummy"
        vars.dummy_state = 0
        vars.check_http = true

        // Needed for our combined service
        vars.http_cluster = "http-cluster-1"
    }
}

apply Service "http" {
    check_command = "random"
    assign where host.vars.check_http
}

object Host "combined-host" {
   check_command = "dummy"
   vars.dummy_state = 0
}

function stateToString(state) {
    if (state == 0) {
        return "OK"
    } else if (state == 1) {
        return "WARNING"
    } else if (state == 2) {
        return "CRITICAL"
    } else if (state == 3) {
        return "UNKNOWN"
    }
}

globals.getServiceStatesByHttpCluster = function(cluster) {
    // Prepare a dictionary for counting every service in our cluster and sorting them by state
    var services = {
        count = 0
        serviceStates = {
            "0" = [],
            "1" = [],
            "2" = [],
            "3" = [],
        }
    }

    // Iterate over every service object
    for (var service in (get_objects(Service))) {
        // Check if the http_cluster of the services host matches our cluster
        if (service.host.vars.http_cluster == cluster) {
            // Increase our service count by one
            services.count += 1

            // Get the the services current state
            var state = service.last_check_result.state || 0

            // Add the full service name ("host!service") to corresponding array in services.serviceStates
            services.serviceStates[state].add(service.host.name + "!" + service.name)
        }
    }

    // Return our "services" dictionary
    return services
}

object Service "combined-http" {
    check_command = "dummy"
    check_interval = 1m
    retry_interval = 30s
    host_name = "combined-host"

    // The http cluster we want to have combined states from (we've also set this variable on our services)
    vars.http_cluster = "http-cluster-1"
    // The minimum ratio of services that have to be in state OK (0.5 means at least 50% need to be OK)
    vars.ok_min_ratio = 0.5

    // Store our current service object in variable to use it in function scope below
    var service = this

    // Functions stored in the variables dummy_state and dummy_text are evaluated on every execute of the check.
    vars.dummy_state = function() use (service) {
        // Get our services dictionary by calling our previously defined function
        var services = getServiceStatesByHttpCluster(service.vars.http_cluster)

        // Calculate the ratio of services with state OK compared to the total amount of services
        var ratio = services.serviceStates[0].len() / services.count

        // If the ratio is less then what we defined as our minimum, return CRITICAL as state, OK otherwise
        if (ratio < service.vars.ok_min_ratio) {
            return 2
        } else {
            return 0
        }
    }
    vars.dummy_text = function() use (service) {
        // Get our services dictionary by calling our previously defined function
        var services = getServiceStatesByHttpCluster(service.vars.http_cluster)

        // Calculate the ratio of services with state OK compared to the total amount of services
        var ratio = services.serviceStates[0].len() / services.count

        // Define an empty string variable which will later contain our status output
        var text = ""

        // If the ratio is less then what we defined as our minimum, add "CRITICAL: " to our output, "OK: " otherwise
        if (ratio < service.vars.ok_min_ratio) {
            text = "CRITICAL: "
        } else {
            text = "OK: "
        }

        // Add the amount of services in state OK and the total amount of services to our output (e.g. "5/20")
        text += services.serviceStates[0].len() + "/" + services.count + " OK\n"

        // Iterate over all state types and print the services with those states
        for (state in range(0, 3)) {
            // Check if we even have services with this state
            if (services.serviceStates[state].len() > 0) {
                // Add the state name to our output
                text += stateToString(state) + ":\n"

                // Iterate over all the services with this state and output their name
                for (serviceName in services.serviceStates[state]) {
                    text += serviceName + "\n"
                }
                text += "\n"
            }
        }

        // Return the final output
        return text
    }
}

1 Like

Looks like you have to do the globals thing also with the stateToString function.

2 Likes

thanks @Al2Klimov
it’s working now!

@Al2Klimov

The function works fine but, right now it works only on ALL services under host that have the

vars.http_cluster = “http-cluster-1”

Let say, for a http cluster, we don’t need that the SSH service works fine on all host to trigger a fully working cluster…

is there an easy way to be able to specify which services NEED to be included instead of using ALL services ?

Consider modifying

by adding a condition on service.name.