2017-08-08 4 views
0

icinga2の監視では、サービスが一定時間停止した場合に問題の通知をエスカレートすることができます。サービスが復旧したら、私は単一の通知を受けたいと思う。通知テンプレートをエスカレートすることでicinga2の単一の確認応答/ OK通知を取得するにはどうすればよいですか?

すべてのタイプと状態に「service-test-down-1」と「service-test-down-2」の両方を設定すると、サービスが正常になると2つの「OK」メッセージが表示されます。 OKメッセージとNot-OKメッセージを分離して、以下のように設定すると、決してOKが出ません。私はこれがまっすぐであるべきであると感じるが、私は何らかの進歩を遂げることができなかった。

apply Notification "service-test-down-1" to Service { 
    command = "dispatch-service" 
    states = [ Warning, Critical, Unknown ] 
    types = [ Problem, Custom, FlappingStart, FlappingEnd, 
      DowntimeStart, DowntimeEnd, DowntimeRemoved ] 
    users = ["russ"] 
    period = "24x7" 
    assign where "tests" in service.groups 
    vars.priority = "medium" 
    times.begin = 0m 
    times.end = 3m 
    interval = 1m 
} 

apply Notification "service-test-down-2" to Service { 
    command = "dispatch-service" 
    states = [ Warning, Critical, Unknown ] 
    types = [ Problem, Custom, FlappingStart, FlappingEnd, 
      DowntimeStart, DowntimeEnd, DowntimeRemoved ] 
    period = "24x7" 
    users = ["russ"] 
    assign where "tests" in service.groups 
    vars.priority = "medium" 
    times.begin = 3m 
    times.end = 12h 
    interval = 2m 
} 
apply Notification "service-test-recovery" to Service { 
    command = "dispatch-service" 
    states = [ OK ] 
    types = [ Acknowledgement, Recovery ] 
    users = ["russ"] 
    period = "24x7" 
    vars.priority = "medium" 
    assign where "tests" in service.groups 
    interval = 1 
} 

apply Service "NotificationTest" { 
    enable_active_checks = true 
    check_command = "passive" 
    max_check_attempts = 1 

    ignore where host.vars.noservices == true 
    groups += ["tests"] 
    assign where host.name == "icinga2.acceleration.net" 
    max_check_attempts = 5 
    check_interval = 5m 
    retry_interval = 5m 
} 

この構成は、次のようにicingaで印刷されます:

~# icinga2 object list --name service-test-* 
Object 'icinga2.acceleration.net!NotificationTest!service-test-down-1' of type 'Notification': 
    % declared in '/opt/icinga2lib/lib.conf.d//test.conf', lines 2:1-2:51 
    * __name = "icinga2.acceleration.net!NotificationTest!service-test-down-1" 
    * command = "dispatch-service" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 3:3-3:30 
    * command_endpoint = "" 
    * host_name = "icinga2.acceleration.net" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 2:1-2:51 
    * interval = 60 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 13:3-13:15 
    * name = "service-test-down-1" 
    * package = "_etc" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 2:1-2:51 
    * period = "24x7" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 8:3-8:17 
    * service_name = "NotificationTest" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 2:1-2:51 
    * states = [ "Warning", "Critical", "Unknown" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 4:3-4:41 
    * templates = [ "service-test-down-1" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 2:1-2:51 
    * times 
    * begin = 0 
     % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 11:3-11:18 
    * end = 180 
     % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 12:3-12:16 
    * type = "Notification" 
    * types = [ "Problem", "Custom", "FlappingStart", "FlappingEnd", "DowntimeStart", "DowntimeEnd", "DowntimeRemoved" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 5:3-6:57 
    * user_groups = null 
    * users = [ "russ" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 7:3-7:18 
    * vars 
    * priority = "medium" 
     % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 10:3-10:26 
    * zone = "" 

Object 'icinga2.acceleration.net!NotificationTest!service-test-down-2' of type 'Notification': 
    % declared in '/opt/icinga2lib/lib.conf.d//test.conf', lines 16:1-16:51 
    * __name = "icinga2.acceleration.net!NotificationTest!service-test-down-2" 
    * command = "dispatch-service" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 17:3-17:30 
    * command_endpoint = "" 
    * host_name = "icinga2.acceleration.net" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 16:1-16:51 
    * interval = 120 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 27:3-27:15 
    * name = "service-test-down-2" 
    * package = "_etc" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 16:1-16:51 
    * period = "24x7" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 21:3-21:17 
    * service_name = "NotificationTest" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 16:1-16:51 
    * states = [ "Warning", "Critical", "Unknown" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 18:3-18:41 
    * templates = [ "service-test-down-2" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 16:1-16:51 
    * times 
    * begin = 180 
     % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 25:3-25:18 
    * end = 43200 
     % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 26:3-26:17 
    * type = "Notification" 
    * types = [ "Problem", "Custom", "FlappingStart", "FlappingEnd", "DowntimeStart", "DowntimeEnd", "DowntimeRemoved" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 19:3-20:57 
    * user_groups = null 
    * users = [ "russ" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 22:3-22:18 
    * vars 
    * priority = "medium" 
     % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 24:3-24:26 
    * zone = "" 

Object 'icinga2.acceleration.net!NotificationTest!service-test-recovery' of type 'Notification': 
    % declared in '/opt/icinga2lib/lib.conf.d//test.conf', lines 29:1-29:53 
    * __name = "icinga2.acceleration.net!NotificationTest!service-test-recovery" 
    * command = "dispatch-service" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 30:3-30:30 
    * command_endpoint = "" 
    * host_name = "icinga2.acceleration.net" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 29:1-29:53 
    * interval = 1 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 37:3-37:14 
    * name = "service-test-recovery" 
    * package = "_etc" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 29:1-29:53 
    * period = "24x7" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 34:3-34:17 
    * service_name = "NotificationTest" 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 29:1-29:53 
    * states = [ "OK" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 31:3-31:17 
    * templates = [ "service-test-recovery" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 29:1-29:53 
    * times = null 
    * type = "Notification" 
    * types = [ "Acknowledgement", "Recovery" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 32:3-32:39 
    * user_groups = null 
    * users = [ "russ" ] 
    % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 33:3-33:18 
    * vars 
    * priority = "medium" 
     % = modified in '/opt/icinga2lib/lib.conf.d//test.conf', lines 35:3-35:26 
    * zone = "" 

関連ドキュメントのリンク: https://www.icinga.com/docs/icinga2/latest/doc/03-monitoring-basics/#notification-escalations

クロス掲示される:icinga2の開発者によって提供さhttps://github.com/Icinga/icinga2/issues/5478

答えて

0

答えを(上github:https://github.com/Icinga/icinga2/issues/5478)は、単一の回復を通知する方法がないということです通知のエスカレーションがあるときは、icinga2の中からnを選択します。

各エスカレーションは個別の通知オブジェクトであり、問​​題について通知されたすべての通知には回復メッセージが送信されます。 PROBLEM通知を送信していない通知オブジェクトには、RECOVERY通知は送信されません(これは間違っていると思われますが、何でも)。

解決策は、メッセージを重複排除する通知プロキシを用意することです。ステートフルなプロキシを望んでいないことを念頭に置いて、現在のエスカレーションだけが実際にRECOVERYメッセージを送信し、プロキシは依然としてステートレスになるように、通知するホスト/サービスでcurrent_escalationを設定する機能を作成しました。 githubのサンプルコード。

関連する問題