From 05431985d5b08b0b2b74171e41dbb6642ed1f56a Mon Sep 17 00:00:00 2001 From: badra001 Date: Wed, 18 Dec 2024 10:20:51 -0500 Subject: [PATCH] add --- .../cloudwatch-alarms.md | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 aws/proposals/dice-cloudwatch-datadog/cloudwatch-alarms.md diff --git a/aws/proposals/dice-cloudwatch-datadog/cloudwatch-alarms.md b/aws/proposals/dice-cloudwatch-datadog/cloudwatch-alarms.md new file mode 100644 index 00000000..bc6f840c --- /dev/null +++ b/aws/proposals/dice-cloudwatch-datadog/cloudwatch-alarms.md @@ -0,0 +1,91 @@ +# Cloudwatch Alarms to Datadog Monitors + +```yaml +monitor: + - service: ECS + description: CPU Utilization + aws_metric: + datadog_metric: + groups: + - name: dice-mojo-common + filter: dice-mojo + tags: + environment: common + boc_program: dice + warning: + threshold: + critical: + threshold: + +``` + +# notification teams + +For each set of notification target groups, create a Datadog team. + +dice-{project}-{env}[-{service}] + +* {project} + * mojo + * centurion + * auth + * cumulus + +* {env} + * common + * prod + * dmz-stage + * dmz-prod + +* {service} + * where necessary to have a different group, use a short service name + * rds, for example, goes to a different group of people + +# sns + +For the same set of notification teams, create an SNS topic. +In that topic, subscribe ONLY the refactored lambda. + +The lambda will read the SNS message, determine who should be notified +from an SSM parameter + +# services + +* ecs +* lambda +* alb +* rds + +# monitors + +For each service and metric, create a monitor. Each monitor has two thresholds, +warning and alert (critical). + +Each monitor will have a templated alert message. The warning will indicate +a warning of VALUE > THRESHOLD, and will include the specific team plus any service +specific teams (if they exist). + +The critical will indicate the same detail, will send to the same DD teams, +as well as to the SNS topic. + +# + +# ssm parameter + +/apps/dice/datadog/notifications + {notification-group} # same as dd team + +```yaml +name: {notification-group} +alerts: + warn: false + critical: true + recovery: true +contacts: + - username: jbid + mail: email-address + telephonenumber: +1-xxx-xxx-xxxx + - username: + mail: + telephonenumber: +```