diff --git a/CHANGELOG.md b/CHANGELOG.md index 62adb86..286b41a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,3 +47,15 @@ - code 0.1.18 - lots of bug fixes - update to use launch_time as the timestamp in the TXT ecords to be able to reconstruct it to delete it + +* 0.2.0 -- 2022-03-08 + - code 0.2.0 + - SNS code prep + - SNS resource prep + - refactor the route53 API calls + - add better API timeouts + - new variables: + - sns_topic_name + - sqs_queue_name + - enable_sns + - enable_sqs diff --git a/README.md b/README.md index 1e3c14d..ab76264 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,14 @@ No modules. | [aws_lambda_alias.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_alias) | resource | | [aws_lambda_function.lambda](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function) | resource | | [aws_lambda_permission.allow_cloudwatch](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission) | resource | +| [aws_sns_topic.topic](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic) | resource | +| [aws_sns_topic_policy.topic](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic_policy) | resource | | [aws_arn.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/arn) | data source | | [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | | [aws_iam_policy.lambda_policies](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy) | data source | | [aws_iam_policy_document.lambda_assume](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | | [aws_iam_policy_document.lambda_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_policy_document.topic](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | | [aws_region.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/region) | data source | ## Inputs @@ -43,11 +46,15 @@ No modules. | [account\_id](#input\_account\_id) | AWS Account ID (default will pull from current user) | `string` | `""` | no | | [component\_tags](#input\_component\_tags) | Additional tags for Components (s3, kms, ddb) | `map(map(string))` |
{
"ddb": {},
"kms": {},
"s3": {}
}
| no | | [create](#input\_create) | Flag to indicate whether to create the resources or not (default: true) | `bool` | `true` | no | -| [dynamodb\_table\_name](#input\_dynamodb\_table\_name) | Different DynamoDB table name to override default of var.name) | `string` | `null` | no | -| [lambda\_environment\_variables](#input\_lambda\_environment\_variables) | Map of lambda environment variables and values | `map(string)` |
{
"DNS_RR_TimeToLive": 60,
"DynamoDBName": null,
"HeritageIdentifier": "dynr53",
"HeritageTXTRecordPrefix": "_txt",
"SleepTime": 60,
"TagKeyCname": "boc:dns:cname",
"TagKeyHostName": "boc:dns:name",
"TagKeyZone": "boc:dns:zone"
}
| no | -| [lambda\_name](#input\_lambda\_name) | Different Lambda name to override default of var.name) | `string` | `null` | no | +| [dynamodb\_table\_name](#input\_dynamodb\_table\_name) | Different DynamoDB table name to override default of var.name | `string` | `null` | no | +| [enable\_sns](#input\_enable\_sns) | Enable use of SNS for reporting errors | `bool` | `false` | no | +| [enable\_sqs](#input\_enable\_sqs) | Enable use of SQS for SNS to send errors | `bool` | `false` | no | +| [lambda\_environment\_variables](#input\_lambda\_environment\_variables) | Map of lambda environment variables and values | `map(string)` |
{
"DNS_RR_TimeToLive": 60,
"DynamoDBName": null,
"HeritageIdentifier": "dynr53",
"HeritageTXTRecordPrefix": "_txt",
"MaxApiRetry": 10,
"SleepTime": 60,
"SnsEnable": false,
"SnsTopicArn": "",
"TagKeyCname": "boc:dns:cname",
"TagKeyHostName": "boc:dns:name",
"TagKeyZone": "boc:dns:zone"
}
| no | +| [lambda\_name](#input\_lambda\_name) | Different Lambda name to override default of var.name | `string` | `null` | no | | [name](#input\_name) | Name to use within all the created resources (default: inf-dynamic-route53) | `string` | `"inf-dynamic-route53"` | no | | [override\_prefixes](#input\_override\_prefixes) | Override built-in prefixes by component. This should be used primarily for common infrastructure things | `map(string)` | `{}` | no | +| [sns\_topic\_name](#input\_sns\_topic\_name) | Different SNS Topic name to override default of var.name | `string` | `null` | no | +| [sqs\_queue\_name](#input\_sqs\_queue\_name) | Different SQS queue name to override default of var.name | `string` | `null` | no | | [tags](#input\_tags) | AWS Tags to apply to appropriate resources | `map(string)` | `{}` | no | ## Outputs diff --git a/code/ddns-lambda.py b/code/ddns-lambda.py index 091d639..5e29838 100755 --- a/code/ddns-lambda.py +++ b/code/ddns-lambda.py @@ -52,7 +52,6 @@ Finally, it will clean up the DynamoDB entry if the instance is shutting down. """ import json -from lib2to3.pgen2.pgen import DFAState import sys import datetime import random @@ -73,12 +72,10 @@ LOGGER = logging.getLogger() ACCOUNT = None REGION = None -VERSION = '0.1.18' - -# Adjust the logging level [logging.INFO, logging.DEBUG, logging.WARNING, etc] -LOGGER.setLevel(logging.DEBUG) +VERSION = '0.2.0' # Read Env variables +DEBUG_LOG_LEVEL = os.environ.get('DebugLogLevel', 'INFO') SLEEPTIME = int(os.environ.get('SleepTime', '60')) DDBNAME = os.environ.get('DynamoDBName', 'inf-dynamic-route53') TAGKEY_CNAME = os.environ.get('TagKeyCname', 'boc:dns:cname') @@ -87,10 +84,29 @@ DNS_RR_TTL = int(os.environ.get('DNS_RR_TimeToLive', '60')) DNS_RR_TTL = 60 if DNS_RR_TTL == 0 else DNS_RR_TTL TF_MODULE_VERSION = os.environ.get('tf_module_version', '(unknown)') +MAX_API_RETRY = int(os.environ.get('MaxApiRetry', '10')) +SNS_TOPIC_ARN = os.environ.get('SnsTopicArn', '') +SNS_ENABLE = (os.environ.get('SnsEnable', 'False').lower() in [ + 'yes', 'y', 'true', '1']) and SNS_TOPIC_ARN != '' + # for CNAMEs TXT_RR_PREFIX = os.environ.get('HeritageTXTRecordPrefix', '_txt') HERITAGE_TAG = os.environ.get('HeritageIdentifier', 'dynr53') +# Adjust the logging level [DEBUG, INFO, WARNING, ERROR, CRITICAL] - read from the passed in env var +if DEBUG_LOG_LEVEL == 'DEBUG': + LOGGER.setLevel(logging.DEBUG) +elif DEBUG_LOG_LEVEL == 'INFO': + LOGGER.setLevel(logging.INFO) +elif DEBUG_LOG_LEVEL == 'WARNING': + LOGGER.setLevel(logging.WARNING) +elif DEBUG_LOG_LEVEL == 'ERROR': + LOGGER.setLevel(logging.ERROR) +elif DEBUG_LOG_LEVEL == 'CRITICAL': + LOGGER.setLevel(logging.CRITICAL) +else: + LOGGER.setLevel(logging.INFO) + print('Loading function v{} tf_module_version {}: {}'.format( VERSION, TF_MODULE_VERSION, datetime.datetime.now().time().isoformat())) @@ -103,6 +119,17 @@ def lineno(): # pragma: no cover return str(' - line number: ' + str(inspect.currentframe().f_back.f_lineno)) +def get_sns_client(): + """ + Get sns client + :return: + """ + try: + return boto3.client('sns') + except ClientError as err: + print("Unexpected error: %s" % err) + + def get_route53_client(): """ Get route53 client @@ -152,7 +179,8 @@ def lambda_handler( context, dynamodb_client=get_dynamodb_client(), compute=get_ec2_client(), - route53=get_route53_client() + route53=get_route53_client(), + sns_client=get_sns_client() ): @@ -202,7 +230,7 @@ def lambda_handler( LOGGER.debug("sleeping for maximum {} seconds {}".format(SLEEPTIME, lineno())) # wait increment and wait until maximum sleeptime - i = 1 + i = 0 while i < SLEEPTIME: LOGGER.debug("waiting count: %s", str(i) + lineno()) time.sleep(1) @@ -223,7 +251,8 @@ def lambda_handler( "%s", t_private_dns_name + "," + t_private_ip + "," + t_subnet_id + "," + t_vpc_id + lineno()) break except: - LOGGER.info("no instance data, repeat check: %s", lineno()) + LOGGER.info("instance: %s, no instance data, repeat check: %s", + instance_id, lineno()) # Remove response metadata from the response if 'ResponseMetadata' in instance: @@ -244,7 +273,7 @@ def lambda_handler( # Fetch item from DynamoDB LOGGER.debug("Fetching instance information from dynamodb %s", lineno()) instance = get_item_from_dynamodb_table(dynamodb_client, DDBNAME, instance_id) - LOGGER.debug("instance: %s", str(instance) + lineno()) + LOGGER.debug("instance info: %s", str(instance) + lineno()) # Get the instance tags and reorder them because we want a zone created before CNAME try: @@ -255,7 +284,7 @@ def lambda_handler( LOGGER.debug("tags are: %s", str(tags) + lineno()) LOGGER.debug("Get instance attributes %s", lineno()) - LOGGER.debug("instance: %s", str(instance) + lineno()) + LOGGER.debug("instance info: %s", str(instance) + lineno()) LOGGER.debug("type: %s", str(type(instance)) + lineno()) if instance and 'Reservations' in instance: LOGGER.debug("reservations: %s", str(instance['Reservations']) + lineno()) @@ -302,8 +331,8 @@ def lambda_handler( str(reversed_domain_prefix) + lineno()) # Set the reverse lookup zone reversed_lookup_zone = reversed_domain_prefix + 'in-addr.arpa.' - LOGGER.info("The reverse lookup zone for this instance is: %s", - str(reversed_lookup_zone)) + LOGGER.info("instance: %s, The reverse lookup zone is: %s", + instance_id, str(reversed_lookup_zone)) # Get VPC id vpc_id = instance['Reservations'][0]['Instances'][0]['VpcId'] @@ -322,7 +351,7 @@ def lambda_handler( exit() # These are collections of zones in Route 53. - hosted_zones = list_hosted_zones(route53) + hosted_zones = new_list_hosted_zones(route53, instance_id) LOGGER.debug("hosted_zones: %s", str(hosted_zones) + lineno()) private_hosted_zones = get_private_hosted_zones(hosted_zones) LOGGER.debug("private_hosted_zones: %s", str(list(private_hosted_zones)) + lineno()) @@ -334,7 +363,8 @@ def lambda_handler( # Check to see whether a reverse lookup zone for the instance # already exists. If it does, check to see whether # the reverse lookup zone is associated with the instance's VPC. - LOGGER.info("reversed_lookup_zone: %s", str(reversed_lookup_zone) + lineno()) + LOGGER.info("instance: %s, reversed_lookup_zone: %s", + instance_id, str(reversed_lookup_zone) + lineno()) reverse_zone = None for record in hosted_zones['HostedZones']: LOGGER.debug("record name: %s", str(record['Name']) + lineno()) @@ -348,34 +378,44 @@ def lambda_handler( LOGGER.debug("reverse_lookup_zone_id: %s", str( reverse_lookup_zone_id) + lineno()) - reverse_hosted_zone_properties = get_hosted_zone_properties( - route53, reverse_lookup_zone_id) - LOGGER.debug("reverse_hosted_zone_properties:" - " %s", str(reverse_hosted_zone_properties) + lineno()) + reverse_hosted_zone_properties = new_get_hosted_zone_properties( + route53, instance_id, reverse_lookup_zone_id) - if vpc_id in map(lambda x: x['VPCId'], reverse_hosted_zone_properties['VPCs']): - LOGGER.info("Reverse lookup zone %s is associated with VPC %s %s", - reverse_lookup_zone_id, vpc_id, lineno()) - reverse_zone_associated = True - else: - LOGGER.info("Reverse lookup zone %s is NOT associated with VPC %s %s", - reverse_lookup_zone_id, vpc_id, lineno()) + # need to check if the property is empty {} + if reverse_hosted_zone_properties == {}: + LOGGER.error("get_private_hosted_zone_properties returned no zone property", + reverse_lookup_zone_id + lineno()) reverse_zone_associated = False + else: + LOGGER.debug("reverse_hosted_zone_properties:" + " %s", str(reverse_hosted_zone_properties) + lineno()) + + if vpc_id in map(lambda x: x['VPCId'], reverse_hosted_zone_properties['VPCs']): + LOGGER.info("instance: %s, Reverse lookup zone %s is associated with VPC %s %s", + instance_id, reverse_lookup_zone_id, vpc_id, lineno()) + reverse_zone_associated = True + else: + LOGGER.info("instance: %s, Reverse lookup zone %s is NOT associated with VPC %s %s", + instance_id, reverse_lookup_zone_id, vpc_id, lineno()) + reverse_zone_associated = False else: LOGGER.info( - "No matching reverse lookup zone, PTR record will not be created %s", lineno()) + "instance: %s, No matching reverse lookup zone, PTR record will not be created %s", instance_id, lineno()) # Wait a random amount of time. This is a poor-mans back-off # if a lot of instances are launched all at once. # randomize shutdown/terminate more since it they have higher probability of collision # route 53 has 5 ChangeResourceRecordSets API/s limit. SDK has built-in retry but only up to 5. if state == 'running': - # up to 11 seconds with min of 1 - time.sleep(random.random() * 10 + 1) + # up to 5 seconds with min of 1 + wait_time = random.random() * 5 + 1 else: - # up to 20 seconds with min of 1 - time.sleep(random.random() * 20 + 1) + # up to 5 seconds with min of 1 + wait_time = random.random() * 5 + 1 + + LOGGER.debug("waiting random seconds, %s", str(wait_time) + lineno()) + time.sleep(wait_time) # Is there a DHCP option set? # Get DHCP option set configuration @@ -389,7 +429,15 @@ def lambda_handler( LOGGER.debug("dhcp_configurations: %s", str(get_dhcp_configurations) + lineno()) except BaseException as err: - LOGGER.info("No DHCP option set assigned to this VPC %s\n", str(err) + lineno()) + LOGGER.error("instance: %s, No DHCP option set assigned to this VPC %s\n", + instance_id, str(err) + lineno()) + if SNS_ENABLE: + sns_msg = {} + sns_msg['instance_id'] = instance_id + sns_msg['client'] = 'ec2' + sns_msg['boto3_method'] = 'describe_vpcs' + sns_msg['message'] = 'No DHCP option set assigned to this VPC: ' + vpc_id + publish_to_sns(get_sns_client(), json.dumps(sns_msg)) exit() # Look to see whether there's a DHCP option set assigned to @@ -413,7 +461,7 @@ def lambda_handler( has_dhcp_dns_zone_associated_vpc = True LOGGER.debug("private_hosted_zone_name already valid: %s", str( private_hosted_zone_name) + lineno()) - elif is_valid_zone(route53, private_hosted_zone_name, hosted_zones, vpc_id, private_hosted_zone_collection,): + elif is_valid_zone(route53, instance_id, private_hosted_zone_name, hosted_zones, vpc_id, private_hosted_zone_collection,): has_dhcp_dns_zone_associated_vpc = True valid_dns_zones.append(private_hosted_zone_name) @@ -434,7 +482,7 @@ def lambda_handler( LOGGER.debug("Zone Tag key: %s", tag.get('Key') + lineno()) # pause 1s to spread out API calls - time.sleep(1) + # time.sleep(1) custom_zone_name = tag.get('Value').lstrip().lower() # add a trailing period if it does not have it. @@ -450,7 +498,7 @@ def lambda_handler( custom_zone_name) + lineno()) zone_tag_hosted_zone_name = custom_zone_name has_valid_zone_tag = True - elif is_valid_zone(route53, custom_zone_name, hosted_zones, vpc_id, private_hosted_zone_collection): + elif is_valid_zone(route53, instance_id, custom_zone_name, hosted_zones, vpc_id, private_hosted_zone_collection): zone_tag_hosted_zone_name = custom_zone_name valid_dns_zones.append(zone_tag_hosted_zone_name) LOGGER.debug("zone_tag_hosted_zone_name: %s", str( @@ -461,7 +509,7 @@ def lambda_handler( LOGGER.debug("CNAME Tag key: %s", tag.get('Key') + lineno()) # pause 1s to spread out API calls - time.sleep(1) + # time.sleep(1) if is_valid_hostname(tag.get('Value')): @@ -489,7 +537,7 @@ def lambda_handler( has_valid_cname_tag = True LOGGER.debug("cname_domain_suffix already valid: %s", str( cname_domain_suffix) + lineno()) - elif is_valid_zone(route53, cname_domain_suffix, hosted_zones, vpc_id, private_hosted_zone_collection): + elif is_valid_zone(route53, instance_id, cname_domain_suffix, hosted_zones, vpc_id, private_hosted_zone_collection): LOGGER.debug("cname domain is valid: %s", cname_domain_suffix + lineno()) valid_dns_zones.append(cname_domain_suffix) @@ -502,7 +550,7 @@ def lambda_handler( LOGGER.debug("Custom Hostname Tag key: %s", tag.get('Key') + lineno()) # pause 1s to spread out API calls - time.sleep(1) + # time.sleep(1) if is_valid_hostname(tag.get('Value')): LOGGER.debug("Custom hostname of %s is valid %s", @@ -524,7 +572,7 @@ def lambda_handler( LOGGER.debug("Name Tag key: %s", tag.get('Key') + lineno()) # pause 1s to spread out API calls - time.sleep(1) + # time.sleep(1) # if name exist, split into hostname/domain if is_valid_hostname(tag.get('Value')): @@ -559,7 +607,7 @@ def lambda_handler( has_valid_Name_tag_zonename = True LOGGER.debug("name_domain_suffix already valid: %s", str( name_domain_suffix) + lineno()) - elif is_valid_zone(route53, name_domain_suffix, hosted_zones, vpc_id, private_hosted_zone_collection): + elif is_valid_zone(route53, instance_id, name_domain_suffix, hosted_zones, vpc_id, private_hosted_zone_collection): valid_dns_zones.append(name_domain_suffix) has_valid_Name_tag_zonename = True @@ -574,35 +622,41 @@ def lambda_handler( # determine correct A/PTR record to be created based upon the boolean values from the tags above if has_valid_hostname_tag and has_valid_zone_tag: - LOGGER.info("custom hostname tag and custom zone tag valid.") + LOGGER.info( + "instance: %s, custom hostname tag and custom zone tag valid.", instance_id) final_private_hostname = custom_host_name final_hosted_zone_name = zone_tag_hosted_zone_name elif has_valid_hostname_tag and not (has_valid_zone_tag) and has_dhcp_dns_zone_associated_vpc: # 3 - LOGGER.info("custom hostname tag valid only.") + LOGGER.info("instance: %s, custom hostname tag valid only.", instance_id) final_private_hostname = custom_host_name final_hosted_zone_name = private_hosted_zone_name elif has_valid_Name_tag_hostname and has_valid_Name_tag_zonename: - LOGGER.info("Name tag hostname valid and Name tag zonename valid.") + LOGGER.info( + "instance: %s, Name tag hostname valid and Name tag zonename valid.", instance_id) final_private_hostname = name_host final_hosted_zone_name = name_domain_suffix elif has_valid_Name_tag_hostname and has_valid_zone_tag: - LOGGER.info("Name tag hostname valid and custom zone tag valid.") + LOGGER.info( + "instance: %s, Name tag hostname valid and custom zone tag valid.", instance_id) final_private_hostname = name_host final_hosted_zone_name = zone_tag_hosted_zone_name elif has_valid_Name_tag_hostname and has_dhcp_dns_zone_associated_vpc: - LOGGER.info("Name tag hostname valid and DHCP zone is valid.") + LOGGER.info( + "instance: %s, Name tag hostname valid and DHCP zone is valid.", instance_id) final_private_hostname = name_host final_hosted_zone_name = private_hosted_zone_name elif has_valid_zone_tag and not (has_valid_hostname_tag) and not(has_valid_Name_tag_hostname): - LOGGER.info("custom zone tag valid but no custom hostname, using IP address.") + LOGGER.info( + "instance: %s, custom zone tag valid but no custom hostname, using IP address.", instance_id) final_private_hostname = private_host_name final_hosted_zone_name = zone_tag_hosted_zone_name elif has_dhcp_dns_zone_associated_vpc: - LOGGER.info("no custom tags - use default.") + LOGGER.info("instance: %s, no custom tags - use default.", instance_id) final_private_hostname = private_host_name final_hosted_zone_name = private_hosted_zone_name - else: # none of the use-casem and no suitable zone to create the A record - LOGGER.info("No DHCP Associated for VPC and no custom tags. Exiting Script") + else: # none of the use-case and no suitable zone to create the A record + LOGGER.info( + "instance: %s, No DHCP Associated for VPC and no custom tags. Exiting Script", instance_id) # nothing to do, exit out script caller_response.append( 'No DHCP Associated for VPC and no custom tags. Exiting Script') @@ -610,8 +664,8 @@ def lambda_handler( # put together the FQDN of the dns name... final_private_dns_name = final_private_hostname + '.' + final_hosted_zone_name - LOGGER.info("final hostname for A and PTR record: %s", - str(final_private_dns_name) + lineno()) + LOGGER.info("instance: %s, final hostname for A and PTR record: %s", + instance_id, str(final_private_dns_name) + lineno()) # Get the PHZ ID for the Zone final_hosted_zone_id = get_zone_id(final_hosted_zone_name, hosted_zones) @@ -633,6 +687,11 @@ def lambda_handler( heritage_value = format_heritage(heritage) heritage_value = '"{}"'.format(heritage_value) if len( heritage_value) else heritage_value + + LOGGER.debug("heritage value:" + " %s", str(heritage_value) + lineno()) + + delete_records = True get_rr = False # Create OR Delete the A / PTR Record @@ -640,87 +699,144 @@ def lambda_handler( # create the records try: LOGGER.debug("Creating resource records %s", lineno()) - create_resource_record( + create_response = create_resource_record( route53, + instance_id, final_hosted_zone_id, final_private_hostname, final_hosted_zone_name, 'A', private_ip ) + append_msg = 'A record in zone id: ' + \ + str(final_hosted_zone_id) + \ + ' for hosted zone ' + \ + str(final_private_hostname) + '.' + \ + str(final_hosted_zone_name) + \ + ' with value: ' + \ + str(private_ip) + if create_response == 'success': + LOGGER.info("instance: %s, Created %s", + instance_id, append_msg + lineno()) + caller_response.append('Created ' + append_msg) + else: + caller_response.append(create_response) + caller_response.append('Failed to create ' + append_msg) + LOGGER.error('Failed to create A record: %s', create_response) + except BaseException as err: + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) - caller_response.append('Created A record in zone id: ' + - str(final_hosted_zone_id) + - ' for hosted zone ' + - str(final_private_hostname) + '.' + - str(final_hosted_zone_name) + - ' with value: ' + - str(private_ip)) - + try: if len(heritage) > 0: - LOGGER.debug("Creating heritage TXT resource records %s", lineno()) - create_resource_record( + LOGGER.debug("Creating heritage TXT resource records %s", + final_private_hostname + lineno()) + create_response = create_resource_record( route53, + instance_id, final_hosted_zone_id, final_private_hostname, final_hosted_zone_name, 'TXT', heritage_value ) + append_msg = 'TXT record in zone id: ' + \ + str(final_hosted_zone_id) + \ + ' for hosted zone ' + \ + str(final_private_hostname) + '.' + \ + str(final_hosted_zone_name) + \ + ' with value: ' + \ + str(heritage_value) + + if create_response == 'success': + LOGGER.info("instance: %s, Created %s", + instance_id, append_msg + lineno()) + caller_response.append('Created ' + append_msg) + else: + caller_response.append(create_response) + caller_response.append('Failed to create ' + append_msg) + LOGGER.error('Failed to create TXT record: %s', create_response) - caller_response.append('Created TXT record in zone id: ' + - str(final_hosted_zone_id) + - ' for hosted zone ' + - str(final_private_hostname) + '.' + - str(final_hosted_zone_name) + - ' with value: ' + - str(heritage_value)) + except BaseException as err: + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) + try: if reverse_zone_associated: - create_resource_record( + create_response = create_resource_record( route53, + instance_id, reverse_lookup_zone_id, reversed_ip_address, 'in-addr.arpa', 'PTR', final_private_dns_name ) + append_msg = 'PTR record in zone id: ' + \ + str(reverse_lookup_zone_id) + \ + ' for hosted zone ' + \ + str(reversed_ip_address) + \ + 'in-addr.arpa with value: ' + \ + str(final_private_dns_name) + if create_response == 'success': + LOGGER.info("instance: %s, Created %s", + instance_id, append_msg + lineno()) + caller_response.append('Created ' + append_msg) + else: + caller_response.append(create_response) + caller_response.append('Failed to create ' + append_msg) + LOGGER.error('Failed to create PTR record: %s', create_response) - caller_response.append('Created PTR record in zone id: ' + - str(reverse_lookup_zone_id) + - ' for hosted zone ' + - str(reversed_ip_address) + - 'in-addr.arpa with value: ' + - str(final_private_dns_name)) - - if len(heritage) > 0: - create_resource_record( - route53, - reverse_lookup_zone_id, - reversed_ip_address, - 'in-addr.arpa', - 'TXT', - heritage_value - ) + except BaseException as err: + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) - caller_response.append('Created TXT reverse record in zone id: ' + - str(reverse_lookup_zone_id) + - ' for hosted zone ' + - str(reversed_ip_address) + - 'in-addr.arpa with value: ' + - str(heritage_value)) + try: + if reverse_zone_associated and len(heritage) > 0: + LOGGER.debug("Creating heritage TXT resource records %s", + reversed_ip_address + lineno()) + create_response = create_resource_record( + route53, + instance_id, + reverse_lookup_zone_id, + reversed_ip_address, + 'in-addr.arpa', + 'TXT', + heritage_value + ) + append_msg = 'TXT reverse record in zone id: ' + \ + str(reverse_lookup_zone_id) + \ + ' for hosted zone ' + \ + str(reversed_ip_address) + \ + 'in-addr.arpa with value: ' + \ + str(heritage_value) + + if create_response == 'success': + LOGGER.info("instance: %s, Created %s", + instance_id, append_msg + lineno()) + caller_response.append('Created ' + append_msg) + else: + caller_response.append(create_response) + caller_response.append('Failed to create ' + append_msg) + LOGGER.error('Failed to create TXT record: %s', create_response) except BaseException as err: - LOGGER.info("unexpected error. %s\n", str(err) + lineno()) + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) # elif state == 'terminated': else: # not running so delete the records + # delete A record try: + LOGGER.debug("Deleting A record %s", final_private_hostname + lineno()) + response_text = 'Delete A record in zone id: ' + str(final_hosted_zone_id) + \ + ' for hosted zone ' + str(final_private_hostname) + \ + '.' + str(final_hosted_zone_name) + ' with value: ' + \ + str(private_ip) - # pause 1 before deleting to avoid API limit - time.sleep(1) - delete_resource_record( + delete_response = new_delete_resource_record( route53, + instance_id, final_hosted_zone_id, final_private_hostname, final_hosted_zone_name, @@ -728,19 +844,36 @@ def lambda_handler( private_ip ) - caller_response.append('Deleted A record in zone id: ' + - str(final_hosted_zone_id) + - ' for hosted zone ' + - str(final_private_hostname) + '.' + - str(final_hosted_zone_name) + - ' with value: ' + - str(private_ip)) + if delete_response == 'NoSuchHostedZone': + delete_records = False + caller_response.append("Failed, no such zone: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordNotFound': + caller_response.append("Failed, Record Not Found: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': + delete_records = False + caller_response.append( + "Failed, requested delete do not match existing record: " + response_text) + elif delete_response == {}: + delete_records = False + caller_response.append( + "Failed, could NOT delete Record: " + response_text) + else: + caller_response.append("Success: " + response_text) + LOGGER.info("instance: %s, Success: %s", + instance_id, response_text + lineno()) + except BaseException as err: + delete_records = False + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) + # delete TXT record associated with A record + try: # pause 1 before deleting to avoid API limit if get_rr: - time.sleep(1) - heritage_value = get_resource_record( + # time.sleep(1) + heritage_value = new_get_resource_record( route53, + instance_id, final_hosted_zone_id, final_private_hostname, final_hosted_zone_name, @@ -748,27 +881,58 @@ def lambda_handler( heritage_value ) if len(heritage) > 0: - delete_resource_record( + LOGGER.debug("Deleting heritage TXT resource records %s", + final_private_hostname + lineno()) + response_text = 'Delete TXT record in zone id: ' + str(final_hosted_zone_id) + \ + ' for hosted zone ' + str(final_private_hostname) + \ + '.' + str(final_hosted_zone_name) + ' with value: ' + \ + str(heritage_value) + + delete_response = new_delete_resource_record( route53, + instance_id, final_hosted_zone_id, final_private_hostname, final_hosted_zone_name, 'TXT', heritage_value ) + if delete_response == 'NoSuchHostedZone': + delete_records = False + caller_response.append("Failed, no such zone: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordNotFound': + caller_response.append("Failed, Record Not Found: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': + delete_records = False + caller_response.append( + "Failed, requested delete do not match existing record: " + response_text) + elif delete_response == {}: + delete_records = False + caller_response.append( + "Failed, could NOT delete Record: " + response_text) + else: + caller_response.append("Success: " + response_text) + LOGGER.info("instance: %s, Success: %s", + instance_id, response_text + lineno()) + except BaseException as err: + delete_records = False + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) - caller_response.append('Deleted TXT record in zone id: ' + - str(final_hosted_zone_id) + - ' for hosted zone ' + - str(final_private_hostname) + '.' + - str(final_hosted_zone_name) + - ' with value: ' + - str(heritage_value)) - + # delete PTR record + try: # pause 1 before deleting to avoid API limit - time.sleep(1) - delete_resource_record( + # time.sleep(1) + + LOGGER.debug("Deleting PTR record %s", reversed_ip_address + lineno()) + response_text = 'Delete PTR record in zone id: ' + str(reverse_lookup_zone_id) + \ + ' for hosted zone ' + str(reversed_ip_address) + \ + str(private_dns_name) + ' with value: ' + \ + str(final_private_dns_name) + + delete_response = new_delete_resource_record( route53, + instance_id, reverse_lookup_zone_id, reversed_ip_address, 'in-addr.arpa', @@ -776,19 +940,35 @@ def lambda_handler( final_private_dns_name ) - caller_response.append('Deleted PTR record in zone id: ' + - str(reverse_lookup_zone_id) + - ' for hosted zone ' + - str(reversed_ip_address) + - str(private_dns_name) + - ' with value: ' + - str(final_private_dns_name)) + if delete_response == 'NoSuchHostedZone': + delete_records = False + caller_response.append("Failed, no such zone: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordNotFound': + caller_response.append("Failed, Record Not Found: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': + delete_records = False + caller_response.append( + "Failed, requested delete do not match existing record: " + response_text) + elif delete_response == {}: + delete_records = False + caller_response.append( + "Failed, could NOT delete Record: " + response_text) + else: + caller_response.append("Success: " + response_text) + LOGGER.info("instance: %s, Success: %s", + instance_id, response_text + lineno()) + except BaseException as err: + delete_records = False + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) + try: if get_rr: # pause 1 before deleting to avoid API limit - time.sleep(1) - heritage_value = get_resource_record( + # time.sleep(1) + heritage_value = new_get_resource_record( route53, + instance_id, reverse_lookup_zone_id, reversed_ip_address, 'in-addr.arpa', @@ -796,25 +976,42 @@ def lambda_handler( heritage_value ) if len(heritage) > 0: - delete_resource_record( + LOGGER.debug("Deleting heritage TXT resource records %s", + reversed_ip_address + lineno()) + response_text = 'Delete TXT record in zone id: ' + str(reverse_lookup_zone_id) + \ + ' for hosted zone ' + str(reversed_ip_address) + str(private_dns_name) + \ + ' with value: ' + str(heritage_value) + + delete_response = new_delete_resource_record( route53, + instance_id, reverse_lookup_zone_id, reversed_ip_address, 'in-addr.arpa', 'TXT', heritage_value ) - - caller_response.append('Deleted TXT record in zone id: ' + - str(reverse_lookup_zone_id) + - ' for hosted zone ' + - str(reversed_ip_address) + - str(private_dns_name) + - ' with value: ' + - str(heritage_value)) - + if delete_response == 'NoSuchHostedZone': + delete_records = False + caller_response.append("Failed, no such zone: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordNotFound': + caller_response.append("Failed, Record Not Found: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': + delete_records = False + caller_response.append( + "Failed, requested delete do not match existing record: " + response_text) + elif delete_response == {}: + delete_records = False + caller_response.append( + "Failed, could NOT delete Record: " + response_text) + else: + caller_response.append("Success: " + response_text) + LOGGER.info("instance: %s, Success: %s", + instance_id, response_text + lineno()) except BaseException as err: - LOGGER.debug("%s", str(err) + lineno()) + delete_records = False + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) # Create the CNAME record only if it has passed the check if has_valid_cname_tag: @@ -835,52 +1032,85 @@ def lambda_handler( LOGGER.debug("cname_domain_suffix_id:" " %s", str(cname_domain_suffix_id) + lineno()) - create_resource_record( + create_response = create_resource_record( route53, + instance_id, cname_domain_suffix_id, cname_host_name, cname_domain_suffix, 'CNAME', final_private_dns_name ) + append_msg = 'CNAME record in zone id: ' + \ + str(cname_domain_suffix_id) + \ + ' for hosted zone ' + \ + str(cname_host_name) + '.' + \ + str(cname_domain_suffix) + \ + ' with value: ' + \ + str(final_private_dns_name) + + if create_response == 'success': + LOGGER.info("instance: %s, Created %s", + instance_id, append_msg + lineno()) + caller_response.append('Created ' + append_msg) + else: + caller_response.append(create_response) + caller_response.append('Failed to create ' + append_msg) + LOGGER.error('Failed to create CNAME record: %s', create_response) + except BaseException as err: + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) - caller_response.append('Created CNAME record in zone id: ' + - str(cname_domain_suffix_id) + - ' for hosted zone ' + - str(cname_host_name) + '.' + - str(cname_domain_suffix) + - ' with value: ' + - str(final_private_dns_name)) - + try: if len(heritage) > 0: - create_resource_record( + LOGGER.debug("Creating heritage TXT resource records %s", + TXT_RR_PREFIX + '.' + cname_host_name + lineno()) + create_response = create_resource_record( route53, + instance_id, cname_domain_suffix_id, TXT_RR_PREFIX + '.' + cname_host_name, cname_domain_suffix, 'TXT', heritage_value ) - - caller_response.append('Created TXT for CNAME record in zone id: ' + - str(cname_domain_suffix_id) + - ' for hosted zone ' + - str(TXT_RR_PREFIX) + '.' + - str(cname_host_name) + '.' + - str(cname_domain_suffix) + - ' with value: ' + - str(heritage_value)) + append_msg = 'TXT for CNAME record in zone id: ' + \ + str(cname_domain_suffix_id) + \ + ' for hosted zone ' + \ + str(TXT_RR_PREFIX) + '.' + \ + str(cname_host_name) + '.' + \ + str(cname_domain_suffix) + \ + ' with value: ' + \ + str(heritage_value) + + if create_response == 'success': + LOGGER.info("instance: %s, Created %s", + instance_id, append_msg + lineno()) + caller_response.append('Created ' + append_msg) + else: + caller_response.append(create_response) + caller_response.append('Failed to create ' + append_msg) + LOGGER.error( + 'Failed to create TXT fpr CNAME record: %s', create_response) except BaseException as err: - LOGGER.debug("%s", str(err) + lineno()) + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) + else: + # delete the CNAME record try: - LOGGER.debug( - "deleting resource record %s", lineno()) + LOGGER.debug("deleting CNAME record %s", lineno()) + response_text = 'Delete CNAME record in zone id: ' + str(cname_domain_suffix_id) + \ + ' for hosted zone ' + str(cname_host_name) + '.' + \ + str(cname_domain_suffix) + ' with value: ' + \ + str(final_private_dns_name) + # pause 1 before deleting to avoid API limit - time.sleep(1) - delete_resource_record( + # time.sleep(1) + delete_response = new_delete_resource_record( route53, + instance_id, cname_domain_suffix_id, cname_host_name, cname_domain_suffix, @@ -888,17 +1118,34 @@ def lambda_handler( final_private_dns_name ) - caller_response.append('Deleted CNAME record in zone id: ' + - str(cname_domain_suffix_id) + - ' for hosted zone ' + - str(cname_host_name) + '.' + - str(cname_domain_suffix) + - ' with value: ' + - str(final_private_dns_name)) + if delete_response == 'NoSuchHostedZone': + delete_records = False + caller_response.append("Failed, no such zone: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordNotFound': + caller_response.append("Failed, Record Not Found: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': + delete_records = False + caller_response.append( + "Failed, requested delete do not match existing record: " + response_text) + elif delete_response == {}: + delete_records = False + caller_response.append( + "Failed, could NOT delete Record: " + response_text) + else: + caller_response.append("Success: " + response_text) + LOGGER.info("instance: %s, Success: %s", + instance_id, response_text + lineno()) + except BaseException as err: + delete_records = False + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) + # delete the CNAME txt record + try: if get_rr: - heritage_value = get_resource_record( + heritage_value = new_get_resource_record( route53, + instance_id, cname_domain_suffix_id, TXT_RR_PREFIX + '.' + cname_host_name, cname_domain_suffix, @@ -906,8 +1153,16 @@ def lambda_handler( heritage_value ) if len(heritage) > 0: - delete_resource_record( + LOGGER.debug("Deleting heritage TXT resource records %s", + TXT_RR_PREFIX + '.' + cname_host_name + lineno()) + response_text = 'Delete TXT for CNAME record in zone id: ' + str(cname_domain_suffix_id) \ + + ' for hosted zone ' + str(TXT_RR_PREFIX) + '.' + str(cname_host_name) \ + + '.' + str(cname_domain_suffix) + ' with value: ' \ + + str(heritage_value) + + delete_response = new_delete_resource_record( route53, + instance_id, cname_domain_suffix_id, TXT_RR_PREFIX + '.' + cname_host_name, cname_domain_suffix, @@ -915,28 +1170,49 @@ def lambda_handler( heritage_value ) - caller_response.append('Deleted TXT for CNAME record in zone id: ' + - str(cname_domain_suffix_id) + - ' for hosted zone ' + - str(TXT_RR_PREFIX) + '.' + - str(cname_host_name) + '.' + - str(cname_domain_suffix) + - ' with value: ' + - str(heritage_value)) - + if delete_response == 'NoSuchHostedZone': + delete_records = False + caller_response.append("Failed, no such zone: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordNotFound': + caller_response.append( + "Failed, Record Not Found: " + response_text) + elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': + delete_records = False + caller_response.append( + "Failed, requested delete do not match existing record: " + response_text) + elif delete_response == {}: + delete_records = False + caller_response.append( + "Failed, could NOT delete Record: " + response_text) + else: + caller_response.append("Success: " + response_text) + LOGGER.info("instance: %s, Success: %s", + instance_id, response_text + lineno()) except BaseException as err: - LOGGER.debug("%s", str(err) + lineno()) + delete_records = False + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) # Clean up DynamoDB after deleting records if state != 'running': - delete_item_from_dynamodb_table(dynamodb_client, DDBNAME, instance_id) - caller_response.insert(0, 'Successfully removed recordsets') - return caller_response - - caller_response.insert(0, 'Successfully created recordsets') + # only if all records were succesfully deleted + if delete_records: + delete_item_from_dynamodb_table(dynamodb_client, DDBNAME, instance_id) + LOGGER.info("instance: %s, deleted the item from DynamoDB: %s", + instance_id, DDBNAME + lineno()) + caller_response.insert(0, 'Successfully removed recordsets') + return caller_response + else: + LOGGER.info("instance: %s, not all records deleted, leaving item in DynamoDB: %s", + instance_id, DDBNAME + lineno()) + caller_response.insert( + 0, 'Failed to remove recordsets, leaving DynamoDB item for instance: ' + instance_id) + return caller_response - return caller_response + else: + caller_response.insert(0, 'Successfully created recordsets') + return caller_response def get_cname_from_tags(tags): @@ -970,16 +1246,51 @@ def get_instances(client, instance_id): LOGGER.info("unexpected error. %s\n", str(err) + lineno()) -def list_hosted_zones(client): +# def list_hosted_zones(client): +# """ +# Get route53 hosted zones +# :param client: +# :return: +# """ +# try: +# return client.list_hosted_zones() +# except ClientError as err: +# LOGGER.info("unexpected error. %s\n", str(err) + lineno()) + +def new_list_hosted_zones(client, instance_id): """ Get route53 hosted zones :param client: + :param instance_id: :return: """ - try: - return client.list_hosted_zones() - except ClientError as err: - LOGGER.info("unexpected error. %s\n", str(err) + lineno()) + + i = 0 + hosted_zones = {} + # retry to handle errors in the possible API call + while i < MAX_API_RETRY: + try: + hosted_zones = client.list_hosted_zones() + LOGGER.debug("list_hosted_zones returned without error. %s", lineno()) + break + except ClientError as err: + error_message = str(err) + if "(Throttling)" in str(err): + LOGGER.debug( + "list_hosted_zones throttled due to API limit, retrying: %s", str(err) + lineno()) + else: + LOGGER.info("instance: %s, unexpected error. %s\n", + instance_id, error_message + lineno()) + i += 1 + LOGGER.info("instance: %s, list_hosted_zones returned error, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) + + if hosted_zones == {}: + LOGGER.error("instance: %s, list_hosted_zones returned error. Timed out. %s", + instance_id, str(i) + lineno()) + + return hosted_zones def list_tables(client): @@ -1009,7 +1320,8 @@ def delete_item_from_dynamodb_table(client, table, instance_id): 'InstanceId': {'S': instance_id} }) except ClientError as err: - LOGGER.info("unexpected error. %s\n", str(err) + lineno()) + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) def put_item_in_dynamodb_table(client, table, instance_id, instance_attributes): @@ -1033,7 +1345,8 @@ def put_item_in_dynamodb_table(client, table, instance_id, instance_attributes): } ) except ClientError as err: - LOGGER.info("unexpected error. %s\n", str(err) + lineno()) + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) def get_item_from_dynamodb_table(client, table, instance_id): @@ -1068,7 +1381,8 @@ def get_item_from_dynamodb_table(client, table, instance_id): return json.loads(item) return None except ClientError as err: - LOGGER.info("unexpected error. %s\n", str(err) + lineno()) + LOGGER.error("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) def get_private_hosted_zone_collection(private_hosted_zones): @@ -1149,7 +1463,59 @@ def get_dynamodb_table(client, table_name): LOGGER.info("unexpected error. %s\n", str(err) + lineno()) -def change_resource_recordset(client, zone_id, host_name, hosted_zone_name, record_type, value): +# def change_resource_recordset(client, zone_id, host_name, hosted_zone_name, record_type, value): +# """ +# Change resource recordset +# :param client: +# :param zone_id: +# :param host_name: +# :param hosted_zone_name: +# :param value: +# :return: +# """ +# try: +# response = client.change_resource_record_sets( +# HostedZoneId=zone_id, +# ChangeBatch={ +# "Comment": "Updated by Lambda DDNS", +# "Changes": [ +# { +# "Action": "UPSERT", +# "ResourceRecordSet": { +# "Name": host_name + hosted_zone_name, +# "Type": record_type, +# "TTL": DNS_RR_TTL, +# "ResourceRecords": [ +# { +# "Value": value +# }, +# ] +# } +# }, +# ] +# } +# ) + +# LOGGER.debug("response: %s", str(response) + lineno()) +# return response +# except ClientError as err: +# LOGGER.debug("Error creating resource record: %s", str(err) + lineno()) +# error_message = str(err) + +# if "conflicts with other records" in error_message: +# LOGGER.debug( +# "Can not create dns record because of duplicates: %s", str(err) + lineno()) +# return 'Duplicate resource record' +# elif "conflicting RRSet" in error_message: +# LOGGER.debug( +# "Can not create dns record because of duplicates: %s", str(err) + lineno()) +# return 'Conflicting resource record' +# else: +# LOGGER.info("unexpected error. %s\n", str(err) + lineno()) +# return 'Unexpected error: ' + str(err) + + +def new_change_resource_recordset(client, instance_id, zone_id, host_name, hosted_zone_name, record_type, value): """ Change resource recordset :param client: @@ -1159,10 +1525,15 @@ def change_resource_recordset(client, zone_id, host_name, hosted_zone_name, reco :param value: :return: """ - try: - response = client.change_resource_record_sets( - HostedZoneId=zone_id, - ChangeBatch={ + + i = 0 + update_response = {} + # retry to handle errors in the possible API call + while i < MAX_API_RETRY: + try: + LOGGER.debug("Creating %s record %s in zone %s" + " %s", record_type, host_name, hosted_zone_name, lineno()) + change_batch = { "Comment": "Updated by Lambda DDNS", "Changes": [ { @@ -1180,31 +1551,63 @@ def change_resource_recordset(client, zone_id, host_name, hosted_zone_name, reco }, ] } - ) - LOGGER.debug("response: %s", str(response) + lineno()) - return response - except ClientError as err: - LOGGER.debug("Error creating resource record: %s", str(err) + lineno()) - error_message = str(err) + LOGGER.debug("change_resource_record_sets change_batch: %s", + json.dumps(change_batch) + lineno()) + update_response = client.change_resource_record_sets( + HostedZoneId=zone_id, + ChangeBatch=change_batch + ) - if "conflicts with other records" in error_message: - LOGGER.debug( - "Can not create dns record because of duplicates: %s", str(err) + lineno()) - return 'Duplicate resource record' - elif "conflicting RRSet" in error_message: - LOGGER.debug( - "Can not create dns record because of duplicates: %s", str(err) + lineno()) - return 'Conflicting resource record' - else: - LOGGER.info("unexpected error. %s\n", str(err) + lineno()) - return 'Unexpected error: ' + str(err) + LOGGER.debug("change_resource_record_sets UPSERT returned without error - response: %s", + str(update_response) + lineno()) + break + # return response + except ClientError as err: + if 'NoSuchHostedZone' in str(err) and 'No hosted zone found with ID' in str(err): + LOGGER.error("Hosted zone not found error: %s", str(err) + lineno()) + update_response = "NoSuchHostedZone" + break + elif 'InvalidChangeBatch' in str(err) and 'is not permitted in zone' in str(err): + LOGGER.error( + "Cannot create record - most likely wrong zone name specified: %s", str(err) + lineno()) + update_response = "InvalidChangeBatch-WrongZoneName" + break + elif "(Throttling)" in str(err): + LOGGER.debug("change_resource_record_sets UPSERT throttled due to API limit, retrying: %s", str( + err) + lineno()) + else: + LOGGER.info("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) + i += 1 + LOGGER.info("instance: %s, change_resource_record_sets UPSERT returned error, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) -def create_resource_record(client, zone_id, host_name, hosted_zone_name, record_type, value): + if update_response == {}: + if SNS_ENABLE: + try: + sns_msg = {} + sns_msg['instance_id'] = instance_id + sns_msg['client'] = 'route53' + sns_msg['boto3_method'] = 'change_resource_record_sets' + sns_msg['message'] = 'change_resource_record_sets could not UPSERT record' + sns_msg['change_resource_record_sets'] = { + 'HostedZoneId': zone_id, 'ChangeBatch': change_batch} + publish_to_sns(get_sns_client(), json.dumps(sns_msg)) + except: + LOGGER.info("instance: %s, error: %s", instance_id, + str(sys.exc_info()[0]) + lineno()) + + return update_response + + +def create_resource_record(client, instance_id, zone_id, host_name, hosted_zone_name, record_type, value): """ This function creates resource records in the hosted zone passed by the calling function. :param client: + :param instance_id: :param zone_id: :param host_name: :param hosted_zone_name: @@ -1224,19 +1627,37 @@ def create_resource_record(client, zone_id, host_name, hosted_zone_name, record_ hosted_zone_name, value, lineno()) # To prevent rate throttling - time.sleep(1) + # time.sleep(1) - response = change_resource_recordset( + create_response = new_change_resource_recordset( client, + instance_id, zone_id, host_name, hosted_zone_name, record_type, value ) + if create_response == 'NoSuchHostedZone': + LOGGER.debug("DNS Record create failed: %s", + str(create_response) + lineno()) + msg = 'NoSuchHostedZone: ' + str(create_response) + elif create_response == 'InvalidChangeBatch-WrongZoneName': + LOGGER.debug("DNS Record create failed: %s", + str(create_response) + lineno()) + msg = 'InvalidChangeBatch-WrongZoneName: ' + str(create_response) + elif create_response == {}: + LOGGER.debug("DNS Record create failed: %s", + str(create_response) + lineno()) + msg = 'DNS Recored Create Failed: ' + str(create_response) + else: + LOGGER.debug("DNS Record create success: %s", + str(create_response) + lineno()) + msg = 'success' + + LOGGER.debug("response: %s", str(create_response) + lineno()) + return msg - LOGGER.debug("response: %s", str(response) + lineno()) - return response except ClientError as err: LOGGER.debug("Error creating resource record: %s", str(err) + lineno()) if 'is not permitted as it conflicts with other records ' \ @@ -1245,10 +1666,50 @@ def create_resource_record(client, zone_id, host_name, hosted_zone_name, record_ "of duplicates: %s", str(err) + lineno()) -def get_resource_record(client, zone_id, host_name, hosted_zone_name, record_type, unused=None): +# def get_resource_record(client, zone_id, host_name, hosted_zone_name, record_type, unused=None): +# """ +# This function getts resource records from the hosted zone passed by the calling function. +# :param str client: +# :param str zone_id: +# :param str host_name: +# :param str hosted_zone_name: +# :param str record_type: +# :param str unused: Placeholder for same calling parameters as delete_resource_record(); unused +# :return str value: Value of record if found, None if not +# """ +# value = None +# try: +# LOGGER.debug("Getting %s record %s in zone %s" +# " %s", record_type, host_name, hosted_zone_name, lineno()) +# if host_name[-1] != '.': +# host_name = host_name + '.' +# response = client.list_resource_record_sets( +# HostedZoneId=zone_id, +# StartRecordName=host_name, +# StartRecordType=record_type, +# MaxItems=1) + +# if len(response) > 0: +# rr_set = response['ResourceRecordSets'][0] +# if rr_set['Name'] == host_name and rr_set['Type'] == record_type: +# value = rr_set['ResourceRecords'][0]['Value'] + +# except ClientError as err: +# if 'Not Found' in str(err): +# LOGGER.debug("Get record not found error: %s", str(err) + lineno()) + +# if 'InvalidChangeBatch' in str(err) and 'it was not found' in str(err): +# LOGGER.debug("Get record not found error: %s", str(err) + lineno()) + +# LOGGER.info("Get record unexpected error. %s\n", str(err) + lineno()) + +# return value + +def new_get_resource_record(client, instance_id, zone_id, host_name, hosted_zone_name, record_type, unused=None): """ This function getts resource records from the hosted zone passed by the calling function. :param str client: + :param str instance_id: :param str zone_id: :param str host_name: :param str hosted_zone_name: @@ -1256,39 +1717,109 @@ def get_resource_record(client, zone_id, host_name, hosted_zone_name, record_typ :param str unused: Placeholder for same calling parameters as delete_resource_record(); unused :return str value: Value of record if found, None if not """ + + i = 0 value = None - try: - LOGGER.debug("Getting %s record %s in zone %s" - " %s", record_type, host_name, hosted_zone_name, lineno()) - if host_name[-1] != '.': - host_name = host_name + '.' - response = client.list_resource_record_sets( - HostedZoneId=zone_id, - StartRecordName=host_name, - StartRecordType=record_type, - MaxItems=1) - if len(response) > 0: - rr_set = response['ResourceRecordSets'][0] - if rr_set['Name'] == host_name and rr_set['Type'] == record_type: - value = rr_set['ResourceRecords'][0]['Value'] + while i < MAX_API_RETRY: + try: + LOGGER.debug("Getting %s record %s in zone %s" + " %s", record_type, host_name, hosted_zone_name, lineno()) + if host_name[-1] != '.': + host_name = host_name + '.' + response = client.list_resource_record_sets( + HostedZoneId=zone_id, + StartRecordName=host_name, + StartRecordType=record_type, + MaxItems=1) + + if len(response) > 0: + rr_set = response['ResourceRecordSets'][0] + if rr_set['Name'] == host_name and rr_set['Type'] == record_type: + value = rr_set['ResourceRecords'][0]['Value'] - except ClientError as err: - if 'Not Found' in str(err): - LOGGER.debug("Get record not found error: %s", str(err) + lineno()) + LOGGER.debug( + "list_resource_record_sets returned without error. %s", lineno()) + break + + except ClientError as err: + if 'Not Found' in str(err): + LOGGER.debug("list_resource_record_sets not found error: %s", + str(err) + lineno()) - if 'InvalidChangeBatch' in str(err) and 'it was not found' in str(err): - LOGGER.debug("Get record not found error: %s", str(err) + lineno()) + if 'InvalidChangeBatch' in str(err) and 'it was not found' in str(err): + LOGGER.debug("list_resource_record_sets not found error: %s", + str(err) + lineno()) - LOGGER.info("Get record unexpected error. %s\n", str(err) + lineno()) + LOGGER.info("instance: %s, list_resource_record_sets unexpected error. %s\n", + instance_id, str(err) + lineno()) + + i += 1 + LOGGER.info("instance: %s, list_resource_record_sets returned error, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) return value -def delete_resource_record(client, zone_id, host_name, hosted_zone_name, record_type, value): +# def delete_resource_record(client, zone_id, host_name, hosted_zone_name, record_type, value): +# """ +# This function deletes resource records from the hosted zone passed by the calling function. +# :param client: +# :param zone_id: +# :param host_name: +# :param hosted_zone_name: +# :param record_type: +# :param value: +# :return: +# """ +# try: +# LOGGER.debug("Deleting %s record %s in zone %s" +# " %s", record_type, host_name, hosted_zone_name, lineno()) +# if host_name[-1] != '.': +# host_name = host_name + '.' +# response = client.change_resource_record_sets( +# HostedZoneId=zone_id, +# ChangeBatch={ +# "Comment": "Updated by Lambda DDNS", +# "Changes": [ +# { +# "Action": "DELETE", +# "ResourceRecordSet": { +# "Name": host_name + hosted_zone_name, +# "Type": record_type, +# "TTL": DNS_RR_TTL, +# "ResourceRecords": [ +# { +# "Value": value +# }, +# ] +# } +# } +# ] +# } +# ) + +# LOGGER.debug("delete record response: %s", str(response) + lineno()) +# return response + +# except ClientError as err: +# if 'Not Found' in str(err): +# LOGGER.debug("Record not found error: %s", str(err) + lineno()) +# return + +# if 'InvalidChangeBatch' in str(err) and 'it was not found' in str(err): +# LOGGER.debug("Record not found error: %s", str(err) + lineno()) +# return + +# LOGGER.info("unexpected error. %s\n", str(err) + lineno()) + + +def new_delete_resource_record(client, instance_id, zone_id, host_name, hosted_zone_name, record_type, value): """ This function deletes resource records from the hosted zone passed by the calling function. :param client: + :param instance_id: :param zone_id: :param host_name: :param hosted_zone_name: @@ -1296,14 +1827,19 @@ def delete_resource_record(client, zone_id, host_name, hosted_zone_name, record_ :param value: :return: """ - try: - LOGGER.debug("Deleting %s record %s in zone %s" - " %s", record_type, host_name, hosted_zone_name, lineno()) - if host_name[-1] != '.': - host_name = host_name + '.' - response = client.change_resource_record_sets( - HostedZoneId=zone_id, - ChangeBatch={ + + i = 0 + delete_response = {} + # retry to handle errors in the possible API call + while i < MAX_API_RETRY: + + try: + LOGGER.debug("Deleting %s record %s in zone %s" + " %s", record_type, host_name, hosted_zone_name, lineno()) + if host_name[-1] != '.': + host_name = host_name + '.' + + change_batch = { "Comment": "Updated by Lambda DDNS", "Changes": [ { @@ -1321,21 +1857,61 @@ def delete_resource_record(client, zone_id, host_name, hosted_zone_name, record_ } ] } - ) - LOGGER.debug("delete record response: %s", str(response) + lineno()) - return response + LOGGER.debug("change_resource_record_sets change_batch: %s", + json.dumps(change_batch) + lineno()) + delete_response = client.change_resource_record_sets( + HostedZoneId=zone_id, + ChangeBatch=change_batch + ) - except ClientError as err: - if 'Not Found' in str(err): - LOGGER.debug("Record not found error: %s", str(err) + lineno()) - return + LOGGER.debug("change_resource_record_sets DELETE returned without error - response: %s", + str(delete_response) + lineno()) + break - if 'InvalidChangeBatch' in str(err) and 'it was not found' in str(err): - LOGGER.debug("Record not found error: %s", str(err) + lineno()) - return + except ClientError as err: + + if 'NoSuchHostedZone' in str(err) and 'No hosted zone found with ID' in str(err): + LOGGER.debug("Hosted zone not found error: %s", str(err) + lineno()) + delete_response = "NoSuchHostedZone" + break + elif 'InvalidChangeBatch' in str(err) and 'it was not found' in str(err): + LOGGER.debug("Record not found error: %s", str(err) + lineno()) + delete_response = "InvalidChangeBatch-RecordNotFound" + break + elif 'InvalidChangeBatch' in str(err) and 'values provided do not match the current values' in str(err): + LOGGER.debug("Record do not match current value error: %s", + str(err) + lineno()) + delete_response = "InvalidChangeBatch-RecordDoNotMatch" + break + elif '(Throttling)' in str(err): + LOGGER.debug("change_resource_record_sets DELETE throttled due to API limit, retrying: %s", str( + err) + lineno()) + else: + LOGGER.info("instance: %s, unexpected error. %s\n", + instance_id, str(err) + lineno()) - LOGGER.info("unexpected error. %s\n", str(err) + lineno()) + i += 1 + LOGGER.info("instance: %s, change_resource_record_sets DELETE returned error, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) + + if (delete_response == {} or delete_response == "InvalidChangeBatch-RecordDoNotMatch"): + if SNS_ENABLE: + try: + sns_msg = {} + sns_msg['instance_id'] = instance_id + sns_msg['client'] = 'route53' + sns_msg['boto3_method'] = 'change_resource_record_sets' + sns_msg['message'] = 'change_resource_record_sets could not DELETE record' + sns_msg['change_resource_record_sets'] = { + 'HostedZoneId': zone_id, 'ChangeBatch': change_batch} + publish_to_sns(get_sns_client(), json.dumps(sns_msg)) + except: + LOGGER.info("instance: %s, error: %s", instance_id, + str(sys.exc_info()[0]) + lineno()) + + return delete_response def get_zone_id(zone_name, hosted_zones, private_zone=True): @@ -1389,11 +1965,13 @@ def is_valid_hostname(hostname): LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) -def is_valid_zone(route53, zonename, hosted_zones, vpc_id, private_hosted_zone_collection): +def is_valid_zone(route53, instance_id, zonename, hosted_zones, vpc_id, private_hosted_zone_collection): """ This function checks to see whether the zone "name" entered is valid (PHZ zone exists and is associated with the VPC where instance is lauched in) + :param route53: + :param instance_id: :param zonename: :param vpc_id: :param route53: @@ -1415,26 +1993,33 @@ def is_valid_zone(route53, zonename, hosted_zones, vpc_id, private_hosted_zone_c LOGGER.debug("hosted_zone_id: %s", hosted_zone_id + lineno()) - private_hosted_zone_properties = get_hosted_zone_properties( + private_hosted_zone_properties = new_get_hosted_zone_properties( route53, + instance_id, hosted_zone_id ) - LOGGER.debug("private_hosted_zone_properties:" - " %s", str(private_hosted_zone_properties) + lineno()) - - # check if the VPC is associated with the PHZ - if vpc_id in map(lambda x: x['VPCId'], private_hosted_zone_properties['VPCs']): - LOGGER.debug("Privated Hosted Zone associated with VPC: %s", - zonename + lineno()) - return True + + # need to check if the property is empty {} + if private_hosted_zone_properties == {}: + LOGGER.error( + "get_private_hosted_zone_properties returned no zone property", hosted_zone_id + lineno()) else: - LOGGER.debug("Private Hosted Zone is NOT associated with vpc: %s", - zonename + lineno()) + LOGGER.debug("private_hosted_zone_properties:" + " %s", str(private_hosted_zone_properties) + lineno()) + + # check if the VPC is associated with the PHZ + if vpc_id in map(lambda x: x['VPCId'], private_hosted_zone_properties['VPCs']): + LOGGER.debug("Privated Hosted Zone associated with VPC: %s", + zonename + lineno()) + return True + else: + LOGGER.debug("Private Hosted Zone is NOT associated with vpc: %s", + zonename + lineno()) else: LOGGER.debug("Domain Name does not match Private Hosted Zones: %s", zonename + lineno()) - # if returned with True, return false + # all other cases, return false return False except: @@ -1627,24 +2212,65 @@ def is_dns_support_enabled(client, vpc_id): LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) -def get_hosted_zone_properties(client, zone_id): +# def get_hosted_zone_properties(client, zone_id): +# """ +# Get hosted zone properties +# :param client: +# :param zone_id: +# :return: +# """ +# try: +# LOGGER.debug('getting hosted zone properties: zone_id: %s', +# str(zone_id) + lineno()) +# hosted_zone_properties = client.get_hosted_zone(Id=zone_id) +# LOGGER.debug('hosted_zone_properties: %s', str( +# hosted_zone_properties) + lineno()) +# if 'ResponseMetadata' in hosted_zone_properties: +# hosted_zone_properties.pop('ResponseMetadata') +# return hosted_zone_properties +# except: +# LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) + + +def new_get_hosted_zone_properties(client, instance_id, zone_id): """ Get hosted zone properties :param client: + :param instance_id: :param zone_id: :return: """ - try: - LOGGER.debug('getting hosted zone properties: zone_id: %s', - str(zone_id) + lineno()) - hosted_zone_properties = client.get_hosted_zone(Id=zone_id) - LOGGER.debug('hosted_zone_properties: %s', str( - hosted_zone_properties) + lineno()) - if 'ResponseMetadata' in hosted_zone_properties: - hosted_zone_properties.pop('ResponseMetadata') - return hosted_zone_properties - except: - LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) + + i = 0 + hosted_zone_properties = {} + # retry to handle errors in the possible API call + while i < MAX_API_RETRY: + try: + LOGGER.debug('getting hosted zone properties: zone_id: %s', + str(zone_id) + lineno()) + hosted_zone_properties = client.get_hosted_zone(Id=zone_id) + LOGGER.debug('hosted_zone_properties: %s', str( + hosted_zone_properties) + lineno()) + if 'ResponseMetadata' in hosted_zone_properties: + hosted_zone_properties.pop('ResponseMetadata') + + LOGGER.debug("get_hosted_zone returned without error. %s", lineno()) + break + except ClientError as err: + error_message = str(err) + if "(Throttling)" in str(err): + LOGGER.debug( + "get_hosted_zone throttled due to API limit, retrying: %s", str(err) + lineno()) + else: + LOGGER.info("instance: %s, unexpected error. %s\n", + instance_id, error_message + lineno()) + + i += 1 + LOGGER.info("instance: %s, get_hosted_zone returned error, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) + + return hosted_zone_properties def get_subnet_cidr_block(client, subnet_id): @@ -1762,3 +2388,29 @@ def parse_heritage(info): return kv_results except: return {} + + +def publish_to_sns(client, message): + """ + Publish a simple message to the specified SNS topic + :param client: + :param account: + :param region: + :param message: + :return: + """ + + LOGGER.debug("Sending SNS message: %s to SNSTopic %s", + str(message), SNS_TOPIC_ARN + lineno()) + + if SNS_TOPIC_ARN != '': + try: + response = client.publish( + TopicArn=SNS_TOPIC_ARN, + Message=str(message) + ) + LOGGER.debug("sns response: %s", str(response) + lineno()) + except ClientError as err: + LOGGER.debug("Unexpected error: %s", str(err) + lineno()) + else: + LOGGER.debug("No SNS Topic specified, ignoring") diff --git a/role.tf b/role.tf index d4bd1f0..bca3db4 100644 --- a/role.tf +++ b/role.tf @@ -80,6 +80,16 @@ data "aws_iam_policy_document" "lambda_policy" { ] resources = [var.create ? aws_dynamodb_table.table[0].arn : null] } + dynamic "statement" { + for_each = var.create && var.enable_sns > 0 ? toset(["1"]) : toset([]) + iterator = s + content { + sid = "SNSLambdaAccess" + effect = "Allow" + actions = ["sns:Get*", "sns:Publish*"] + resources = [var.create && var.enable_sns ? aws_sns_topic.topic[0].arn : ""] + } + } } data "aws_iam_policy_document" "lambda_assume" { diff --git a/sns.tf b/sns.tf new file mode 100644 index 0000000..2992e4f --- /dev/null +++ b/sns.tf @@ -0,0 +1,55 @@ +locals { + sns_name = var.sns_topic_name != null ? var.sns_topic_name : local.name +} + +resource "aws_sns_topic" "topic" { + count = var.create && var.enable_sns ? 1 : 0 + name = local.sns_name + display_name = "dynr53" +} + +resource "aws_sns_topic_policy" "topic" { + count = var.create && var.enable_sns ? 1 : 0 + arn = var.create && var.enable_sns ? aws_sns_topic.topic[0].arn : "" + policy = data.aws_iam_policy_document.topic.json +} + +# is this too much? +data "aws_iam_policy_document" "topic" { + policy_id = local.sns_name + statement { + sid = "SNSPermissions" + effect = "Allow" + principals { + type = "AWS" + identifiers = ["*"] + } + actions = [ + "sns:Subscribe", + "sns:SetTopicAttributes", + "sns:RemovePermission", + "sns:Receive", + "sns:Publish", + "sns:ListSubscriptionsByTopic", + "sns:GetTopicAttributes", + "sns:DeleteTopic", + "sns:AddPermission", + ] + condition { + test = "StringEquals" + variable = "AWS:SourceOwner" + values = [local.account_id] + } + resources = [aws_sns_topic.topic.arn] + } +} +## +## { +## "Sid": "sns", +## "Effect": "Allow", +## "Action": { +## "sns:Get*", +## "sns:Publish" +## "Resource": +## } +## diff --git a/sqs.tf b/sqs.tf new file mode 100644 index 0000000..e69de29 diff --git a/variables.tf b/variables.tf index 060eafb..b011b4f 100644 --- a/variables.tf +++ b/variables.tf @@ -5,13 +5,25 @@ variable "name" { } variable "dynamodb_table_name" { - description = "Different DynamoDB table name to override default of var.name)" + description = "Different DynamoDB table name to override default of var.name" type = string default = null } variable "lambda_name" { - description = "Different Lambda name to override default of var.name)" + description = "Different Lambda name to override default of var.name" + type = string + default = null +} + +variable "sns_topic_name" { + description = "Different SNS Topic name to override default of var.name" + type = string + default = null +} + +variable "sqs_queue_name" { + description = "Different SQS queue name to override default of var.name" type = string default = null } @@ -26,7 +38,22 @@ variable "lambda_environment_variables" { TagKeyZone = "boc:dns:zone" TagKeyHostName = "boc:dns:name" DNS_RR_TimeToLive = 60 + MaxApiRetry = 10 + SnsTopicArn = "" + SnsEnable = false HeritageTXTRecordPrefix = "_txt" HeritageIdentifier = "dynr53" } } + +variable "enable_sns" { + description = "Enable use of SNS for reporting errors" + type = bool + default = false +} + +variable "enable_sqs" { + description = "Enable use of SQS for SNS to send errors" + type = bool + default = false +}