Hi all (again). I am not having a good day. After ‘fixing’ my previous issue (see here) I am left with my cluster-zone check not working for a newly built satellite/zone:
I’ve trawled my config. and compared it to my other HA satellite setups (n.b. this is just a single sat.) and am struggling to spot any issues. All checks on the host itself are working fine. pki verify checks are successful.
icinga2 - The Icinga 2 network monitoring daemon (version: r2.13.1-1)
Platform version: 20.04.3 LTS (Focal Fossa)
Satellite zones.conf:
/*
* Generated by Icinga 2 node setup commands
* on 2021-08-23 18:09:50 +0000
*/
object Endpoint "lhr1-prod-svc-icinga1" {
}
object Endpoint "lhr1-prod-svc-icinga2" {
}
object Zone "master" {
endpoints = [ "lhr1-prod-svc-icinga1", "lhr1-prod-svc-icinga2" ]
}
object Endpoint "hav1-prod-svc-icingasat3" {
}
object Zone "satellite-gw" {
endpoints = [ "hav1-prod-svc-icingasat3" ]
parent = "master"
}
object Zone "global-templates" {
global = true
}
object Zone "director-global" {
global = true
}
Host object definition:
Object 'hav1-prod-svc-icingasat3' of type 'Host':
% declared in '/etc/icinga2/zones.d/satellite-gw/hav1-prod-svc-icingasat3.conf', lines 3:1-3:38
* __name = "hav1-prod-svc-icingasat3"
* action_url = ""
* address = "x.x.x.x"
% = modified in '/etc/icinga2/zones.d/satellite-gw/hav1-prod-svc-icingasat3.conf', lines 13:3-13:23
* address6 = ""
* check_command = "hostalive"
% = modified in '/etc/icinga2/zones.d/global-templates/templates.conf', lines 21:3-21:29
* check_interval = 60
% = modified in '/etc/icinga2/zones.d/global-templates/templates.conf', lines 16:3-16:21
* check_period = ""
* check_timeout = null
* command_endpoint = ""
* display_name = "hav1-prod-svc-icingasat3"
* enable_active_checks = true
* enable_event_handler = true
* enable_flapping = false
* enable_notifications = true
* enable_passive_checks = true
* enable_perfdata = true
* event_command = ""
* flapping_threshold = 0
* flapping_threshold_high = 30
* flapping_threshold_low = 25
* groups = [ ]
* icon_image = ""
* icon_image_alt = ""
* max_check_attempts = 5
% = modified in '/etc/icinga2/zones.d/global-templates/templates.conf', lines 15:3-15:24
* name = "hav1-prod-svc-icingasat3"
* notes = ""
* notes_url = ""
* package = "_etc"
* retry_interval = 30
% = modified in '/etc/icinga2/zones.d/global-templates/templates.conf', lines 17:3-17:22
* source_location
* first_column = 1
* first_line = 3
* last_column = 38
* last_line = 3
* path = "/etc/icinga2/zones.d/satellite-gw/hav1-prod-svc-icingasat3.conf"
* templates = [ "hav1-prod-svc-icingasat3", "generic-host" ]
% = modified in '/etc/icinga2/zones.d/satellite-gw/hav1-prod-svc-icingasat3.conf', lines 3:1-3:38
% = modified in '/etc/icinga2/zones.d/global-templates/templates.conf', lines 14:1-14:28
* type = "Host"
* vars
<redacted>
* volatile = false
* zone = "satellite-gw"
Endpoint object definition:
Object 'hav1-prod-svc-icingasat3' of type 'Endpoint':
% declared in '/etc/icinga2/zones.conf', lines 29:1-29:42
* __name = "hav1-prod-svc-icingasat3"
* host = "hav1-prod-svc-icingasat3"
% = modified in '/etc/icinga2/zones.conf', lines 30:3-30:35
* log_duration = 86400
* name = "hav1-prod-svc-icingasat3"
* package = "_etc"
* port = "5665"
* source_location
* first_column = 1
* first_line = 29
* last_column = 42
* last_line = 29
* path = "/etc/icinga2/zones.conf"
* templates = [ "hav1-prod-svc-icingasat3" ]
% = modified in '/etc/icinga2/zones.conf', lines 29:1-29:42
* type = "Endpoint"
* zone = ""
Zone definition:
Object 'satellite-gw' of type 'Zone':
% declared in '/etc/icinga2/zones.conf', lines 56:1-56:26
* __name = "satellite-gw"
* endpoints = [ "hav1-prod-svc-icingasat3" ]
% = modified in '/etc/icinga2/zones.conf', lines 57:3-57:44
* global = false
* name = "satellite-gw"
* package = "_etc"
* parent = "master"
% = modified in '/etc/icinga2/zones.conf', lines 58:3-58:19
* source_location
* first_column = 1
* first_line = 56
* last_column = 26
* last_line = 56
* path = "/etc/icinga2/zones.conf"
* templates = [ "satellite-gw" ]
% = modified in '/etc/icinga2/zones.conf', lines 56:1-56:26
* type = "Zone"
* zone = ""
Master1 zones.conf (some objects removed)
object Endpoint "lhr1-prod-svc-icinga1" {
// That's us
}
object Endpoint "lhr1-prod-svc-icinga2" {
host = "lhr1-prod-svc-icinga2" // Actively connect to the second master.
}
object Endpoint "hav1-prod-svc-icingasat3" {
host = "hav1-prod-svc-icingasat3" // Actively connect to the fifth satellite.
}
object Zone "master" {
endpoints = [ "lhr1-prod-svc-icinga1", "lhr1-prod-svc-icinga2" ]
}
object Zone "satellite-gw" {
endpoints = [ "hav1-prod-svc-icingasat3" ]
parent = "master"
}
object Zone "global-templates" {
global = true
}
object Zone "director-global" {
global = true
}
Master 2 zones.conf (some objects removed)
object Endpoint "lhr1-prod-svc-icinga2" {
// That's us
}
object Endpoint "lhr1-prod-svc-icinga1" {
// First master already connects to us
}
object Endpoint "hav1-prod-svc-icingasat3" {
host = "hav1-prod-svc-icingasat3" // Actively connect to the fifth satellite.
}
object Zone "master" {
endpoints = [ "lhr1-prod-svc-icinga1", "lhr1-prod-svc-icinga2" ]
}
object Zone "satellite-gw" {
endpoints = [ "hav1-prod-svc-icingasat3" ]
parent = "master"
}
object Zone "global-templates" {
global = true
}
object Zone "director-global" {
global = true
}
Has anyone any pointers as to how I might troubleshoot this further?