No check results from hosts in new zone

I have created a second zone with 2 satellites.
Hosts where I set the new zone as “Cluster Zone” stay in pending state and “Check Source” is empty.

But “icinga2 object list --type=Host” on the satelites will display the hosts.

The log on the satellites:

[2019-08-26 17:34:17 +0200] information/ApiListener: New client connection for identity 'master.lan' from [10.0.0.5]:43432
[2019-08-26 17:34:17 +0200] information/ApiListener: Requesting new certificate for this Icinga instance from endpoint 'master.lan'.
[2019-08-26 17:34:17 +0200] information/ApiListener: Sending config updates for endpoint 'master.lan' in zone 'master'.
[2019-08-26 17:34:17 +0200] information/ApiListener: Finished sending config file updates for endpoint 'master.lan' in zone 'master'.
[2019-08-26 17:34:17 +0200] information/ApiListener: Syncing runtime objects to endpoint 'master.lan'.
[2019-08-26 17:34:17 +0200] information/ApiListener: Finished syncing runtime objects to endpoint 'master.lan'.
[2019-08-26 17:34:17 +0200] information/ApiListener: Finished sending runtime config updates for endpoint 'master.lan' in zone 'master'.
[2019-08-26 17:34:17 +0200] information/ApiListener: Sending replay log for endpoint 'master.lan' in zone 'master'.
[2019-08-26 17:34:17 +0200] information/ApiListener: Finished sending replay log for endpoint 'master.lan' in zone 'master'.
[2019-08-26 17:34:17 +0200] information/ApiListener: Finished syncing endpoint 'master.lan' in zone 'master'.
[2019-08-26 17:34:17 +0200] information/ApiListener: Applying config update from endpoint 'master.lan' of zone 'master'.
[2019-08-26 17:34:17 +0200] information/ApiListener: Updating configuration file: /var/lib/icinga2/api/zones/director-global//.timestamp
[2019-08-26 17:34:17 +0200] information/ApiListener: Applying configuration file update for path '/var/lib/icinga2/api/zones/director-global' (17 Bytes). Received timestamp '2019-08-26 17:34:17 +0200' (1566833657.300291), Current timestamp '2019-08-26 17:30:24 +0200' (1566833424.234067).
[2019-08-26 17:34:17 +0200] information/ApiListener: Updating configuration file: /var/lib/icinga2/api/zones/management//.timestamp
[2019-08-26 17:34:17 +0200] information/ApiListener: Applying configuration file update for path '/var/lib/icinga2/api/zones/management' (17 Bytes). Received timestamp '2019-08-26 17:34:17 +0200' (1566833657.302284), Current timestamp '2019-08-26 17:30:24 +0200' (1566833424.236123).
[2019-08-26 17:34:33 +0200] information/RemoteCheckQueue: items: 0, rate: 0/s (6/min 30/5min 90/15min);
[2019-08-26 17:34:43 +0200] information/RemoteCheckQueue: items: 0, rate: 0/s (6/min 30/5min 90/15min);
[2019-08-26 17:34:53 +0200] information/RemoteCheckQueue: items: 0, rate: 0/s (18/min 90/5min 270/15min);

Can you share a host definition ?

Can you also share your zones.conf and Constants.conf for both the master and the satellites ?

1 Like

Host definition (from Director (show resolved)):

object Host "sw1.lan" {
    address = "10.0.1.6"
    check_command = "hostalive"
    max_check_attempts = "3"
    check_interval = 1m
    retry_interval = 30s
    zone = "management"
}

master constants.conf:

const PluginDir = "/usr/lib/nagios/plugins"
const ManubulonPluginDir = "/usr/lib/nagios/plugins"
const PluginContribDir = "/usr/lib/nagios/plugins"
const NodeName = "master.lan"
const ZoneName = "master.lan"
const TicketSalt = "<salt>"

master zones.conf 1 (/etc/icinga2/zones.conf):

object Endpoint "master.lan" {
}

object Zone "master" {
        endpoints = [ "master.lan" ]
}

object Zone "global-templates" {
        global = true
}

object Zone "director-global" {
        global = true
}

master zones.conf 2 (/var/lib/icinga2/api/zones/master/director/zones.conf):

object Zone "production" {
    parent = "master"
    endpoints = [
        "icinga-prod-sat1.lan",
        "icinga-prod-sat2.lan"
    ]
}

object Zone "management" {
    parent = "master"
    endpoints = [
        "icinga-mgmt-sat1.lan",
        "icinga-mgmt-sat2.lan"
    ]
}

constants.conf on management zone satelite (same on all satelites):

const PluginDir = "/usr/lib/nagios/plugins"
const ManubulonPluginDir = "/usr/lib/nagios/plugins"
const PluginContribDir = "/usr/lib/nagios/plugins"
//const NodeName = "localhost"
const ZoneName = NodeName
const TicketSalt = ""

zones.conf on management zone satelite (/etc/icinga2/zones.conf):

# satelite
object Endpoint "icinga-mgmt-sat1.lan" {
  log_duration = 0
}

object Endpoint "icinga-mgmt-sat2.lan" {
  host = "10.0.1.52"
}

object Zone "management" {
  endpoints = [ "icinga-mgmt-sat1.lan", "icinga-mgmt-sat2.lan" ]
  parent = "master"
}

# master
object Endpoint "master.lan" {
  host = "10.0.0.5"
  log_duration = 0
}

object Zone "master" {
  endpoints = [ "master.lan" ]
}

# other
object Zone "global-templates" {
  global = true
}

object Zone "director-global" {
  global = true
}

host object on management zone satelite (icinga2 object list --type=Host):

Object 'sw1.lan' of type 'Host':
  % declared in '/var/lib/icinga2/api/zones/management/director/hosts.conf', lines 16:1-16:36
  * __name = "sw1.lan"
  * action_url = ""
  * address = "10.0.1.6"
    % = modified in '/var/lib/icinga2/api/zones/management/director/hosts.conf', lines 19:5-19:27
  * address6 = ""
  * check_command = "hostalive"
    % = modified in '/var/lib/icinga2/api/zones/director-global/director/host_templates.conf', lines 10:5-10:31
    % = modified in '/var/lib/icinga2/api/zones/director-global/director/host_templates.conf', lines 29:5-29:31
  * check_interval = 60
    % = modified in '/var/lib/icinga2/api/zones/director-global/director/host_templates.conf', lines 3:5-3:23
  * check_period = ""
  * check_timeout = null
  * command_endpoint = ""
  * display_name = "sw1.lan"
  * enable_active_checks = true
  * enable_event_handler = true
  * enable_flapping = false
  * enable_notifications = true
  * enable_passive_checks = true
  * enable_perfdata = true
  * event_command = ""
  * flapping_threshold = 0
  * flapping_threshold_high = 30
  * flapping_threshold_low = 25
  * groups = [ ]
  * icon_image = ""
  * icon_image_alt = ""
  * max_check_attempts = 3
    % = modified in '/var/lib/icinga2/api/zones/director-global/director/host_templates.conf', lines 2:5-2:28
  * name = "sw1.lan"
  * notes = ""
  * notes_url = ""
  * package = "_cluster"
  * retry_interval = 30
    % = modified in '/var/lib/icinga2/api/zones/director-global/director/host_templates.conf', lines 4:5-4:24
  * source_location
    * first_column = 1
    * first_line = 16
    * last_column = 36
    * last_line = 16
    * path = "/var/lib/icinga2/api/zones/management/director/hosts.conf"
  * templates = [ "sw1.lan", "host-snmp", "host", "_all" ]
    % = modified in '/var/lib/icinga2/api/zones/management/director/hosts.conf', lines 16:1-16:36
    % = modified in '/var/lib/icinga2/api/zones/director-global/director/host_templates.conf', lines 26:1-26:25
    % = modified in '/var/lib/icinga2/api/zones/director-global/director/host_templates.conf', lines 7:1-7:20
    % = modified in '/var/lib/icinga2/api/zones/director-global/director/host_templates.conf', lines 1:0-1:19
  * type = "Host"
  * vars
  ....
  * volatile = false
  * zone = "management"

As you can see the host object seems to be instantiated correctly on the management zone satelite and i can not find any error messages in the logs.
But the master is not receiving any check data from the satelite?

Please also add an ApiUser on the satellite and query the REST API via /v1/objects/hosts for this specific host. Verify that it is actually executed.

If yes, check why the transport to the parent node fails, or if the health of both nodes is not good (e.g. lots of events in the json rpc cluster queues, or high latency on other objects).

If no, debug the satellite on its performance, checker feature enabled, etc.

Cheers,
Michael

Argh, the checker feature was disabled!
It works now, thanks.

Btw. does a parent need to initiate the connection to the satellites?

No, you can also just let the satellites connect to the parent zone. That is controlled via the Endpoint host attribute as documented here.