From 15fda843e5176e692985ec379c2ee10b2c73372f Mon Sep 17 00:00:00 2001 From: "Gangwoo \"Peter\" Cho" Date: Mon, 28 Mar 2022 09:04:13 -0400 Subject: [PATCH] added ddb item put to use json.dumps to convert to proper string format, added retry logic for ec2 api calls --- code/ddns-lambda.py | 403 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 314 insertions(+), 89 deletions(-) diff --git a/code/ddns-lambda.py b/code/ddns-lambda.py index d630eea..c72bff5 100755 --- a/code/ddns-lambda.py +++ b/code/ddns-lambda.py @@ -72,7 +72,7 @@ LOGGER = logging.getLogger() ACCOUNT = None REGION = None -VERSION = '0.2.1' +VERSION = '0.2.2' # Read Env variables DEBUG_LOG_LEVEL = os.environ.get('DebugLogLevel', 'INFO') @@ -273,8 +273,11 @@ def lambda_handler( LOGGER.debug("instance: %s", str(instance) + lineno()) instance = remove_empty_from_dict(instance) instance_dump = json.dumps(instance, default=json_serial) - instance_attributes = json.loads(instance_dump) - LOGGER.debug("instance_attributes: %s", str(instance_attributes) + lineno()) + + # dont' change to dictionary. Keep it as string to be written to DDB + # instance_attributes = json.loads(instance_dump) + instance_attributes = instance_dump + LOGGER.info("instance_attributes: %s", str(instance_attributes) + lineno()) LOGGER.debug("trying to put instance information in " "dynamo table %s", str(instance_attributes) + lineno()) put_item_in_dynamodb_table(dynamodb_client, DDBNAME, @@ -282,9 +285,9 @@ def lambda_handler( LOGGER.debug("done putting item in dynamo table %s", lineno()) else: # Fetch item from DynamoDB - LOGGER.debug("Fetching instance information from dynamodb %s", lineno()) + LOGGER.info("Fetching instance information from dynamodb %s", lineno()) instance = get_item_from_dynamodb_table(dynamodb_client, DDBNAME, instance_id) - LOGGER.debug("instance info: %s", str(instance) + lineno()) + LOGGER.info("instance attributes: %s", str(instance) + lineno()) # Get the instance tags and reorder them because we want a zone created before CNAME try: @@ -305,6 +308,7 @@ def lambda_handler( LOGGER.debug("reservations:" " %s", str(instance['Reservations'][0]['Instances'][0]) + lineno()) elif instance is None: + LOGGER.error("No instance found for: %s, exiting script", str(instance_id)) caller_response.append( 'No instance {} found for event state {}. Exiting script.'.format(instance_id, state)) return caller_response @@ -323,6 +327,8 @@ def lambda_handler( LOGGER.debug("private_dns_name: %s", str(private_dns_name) + lineno()) LOGGER.debug("private_host_name: %s", str(private_host_name) + lineno()) except: + LOGGER.error("Unable to extract IP Address or DNS name from instance: %s, exiting script", + str(instance_id) + lineno()) caller_response.append( 'Unable to extract IP Address or DNS name from instance {} for state {}. Exiting script.'.format(instance_id, state)) return caller_response @@ -330,7 +336,7 @@ def lambda_handler( # Get the subnet mask of the instance subnet_id = instance['Reservations'][0]['Instances'][0]['SubnetId'] LOGGER.debug("subnet_id: %s", str(subnet_id) + lineno()) - cidr_block = get_subnet_cidr_block(compute, subnet_id) + cidr_block = get_subnet_cidr_block(compute, instance_id, subnet_id) LOGGER.debug("cidr_block: %s", str(cidr_block) + lineno()) subnet_mask = int(cidr_block.split('/')[-1]) LOGGER.debug("subnet_mask: %s", str(subnet_mask) + lineno()) @@ -360,7 +366,7 @@ def lambda_handler( vpc_id = instance['Reservations'][0]['Instances'][0]['VpcId'] # Are DNS Hostnames and DNS Support enabled? - if is_dns_hostnames_enabled(compute, vpc_id): + if is_dns_hostnames_enabled(compute, instance_id, vpc_id): LOGGER.debug("DNS hostnames enabled for %s", str(vpc_id) + lineno()) else: LOGGER.error("DNS hostnames disabled for %s. You have to enable DNS hostnames to use Route 53 private hosted zones. %s", vpc_id, lineno()) @@ -376,7 +382,7 @@ def lambda_handler( publish_to_sns(sns_client, json.dumps(sns_msg)) return caller_response - if is_dns_support_enabled(compute, vpc_id): + if is_dns_support_enabled(compute, instance_id, vpc_id): LOGGER.debug("DNS support enabled for %s", str(vpc_id) + lineno()) else: LOGGER.error("DNS support disabled for %s. You have to enabled DNS support to use Route 53 private hosted zones. %s", vpc_id, lineno()) @@ -465,9 +471,9 @@ def lambda_handler( try: LOGGER.debug("trying to get dhcp option set id %s", lineno()) - dhcp_options_id = get_dhcp_option_set_id_for_vpc(compute, vpc_id) + dhcp_options_id = get_dhcp_option_set_id_for_vpc(compute, instance_id, vpc_id) LOGGER.debug("dhcp_options_id: %s", str(dhcp_options_id) + lineno()) - dhcp_configurations = get_dhcp_configurations(compute, dhcp_options_id) + dhcp_configurations = get_dhcp_configurations(compute, instance_id, dhcp_options_id) LOGGER.debug("dhcp_configurations: %s", str(get_dhcp_configurations) + lineno()) except BaseException as err: @@ -700,7 +706,7 @@ def lambda_handler( final_private_hostname = private_host_name final_hosted_zone_name = private_hosted_zone_name else: # none of the use-case and no suitable zone to create the A record - LOGGER.info( + LOGGER.error( "instance: %s, No DHCP Associated for VPC and no custom tags. Exiting Script", instance_id) # nothing to do, exit out script caller_response.append( @@ -891,20 +897,25 @@ def lambda_handler( if delete_response == 'NoSuchHostedZone': delete_records = False caller_response.append("Failed, no such zone: " + response_text) + LOGGER.info("instance: %s, NoSuchHostedZone: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordNotFound': caller_response.append("Failed, Record Not Found: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordNotFound: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': delete_records = False - caller_response.append( - "Failed, requested delete do not match existing record: " + response_text) + caller_response.append("Failed, requested delete do not match existing record: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordDoNotMatch: %s", + instance_id, response_text + lineno()) elif delete_response == {}: delete_records = False - caller_response.append( - "Failed, could NOT delete Record: " + response_text) + caller_response.append("Failed, could NOT delete Record: " + response_text) + LOGGER.info("instance: %s, Failed, could NOT delete Record: %s", + instance_id, response_text + lineno()) else: + LOGGER.info("instance: %s, Success: %s",instance_id, response_text + lineno()) caller_response.append("Success: " + response_text) - LOGGER.info("instance: %s, Success: %s", - instance_id, response_text + lineno()) except BaseException as err: delete_records = False LOGGER.error("instance: %s, unexpected error. %s\n", @@ -944,16 +955,24 @@ def lambda_handler( if delete_response == 'NoSuchHostedZone': delete_records = False caller_response.append("Failed, no such zone: " + response_text) + LOGGER.info("instance: %s, NoSuchHostedZone: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordNotFound': caller_response.append("Failed, Record Not Found: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordNotFound: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': delete_records = False caller_response.append( "Failed, requested delete do not match existing record: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordDoNotMatch: %s", + instance_id, response_text + lineno()) elif delete_response == {}: delete_records = False caller_response.append( "Failed, could NOT delete Record: " + response_text) + LOGGER.info("instance: %s, Failed Could NOT delete Record: %s", + instance_id, response_text + lineno()) else: caller_response.append("Success: " + response_text) LOGGER.info("instance: %s, Success: %s", @@ -987,16 +1006,24 @@ def lambda_handler( if delete_response == 'NoSuchHostedZone': delete_records = False caller_response.append("Failed, no such zone: " + response_text) + LOGGER.info("instance: %s, NoSuchHostedZone: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordNotFound': caller_response.append("Failed, Record Not Found: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordNotFound: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': delete_records = False caller_response.append( "Failed, requested delete do not match existing record: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordDoNotMatch: %s", + instance_id, response_text + lineno()) elif delete_response == {}: delete_records = False caller_response.append( "Failed, could NOT delete Record: " + response_text) + LOGGER.info("instance: %s, Failed could NOT delete Record: %s", + instance_id, response_text + lineno()) else: caller_response.append("Success: " + response_text) LOGGER.info("instance: %s, Success: %s", @@ -1038,16 +1065,24 @@ def lambda_handler( if delete_response == 'NoSuchHostedZone': delete_records = False caller_response.append("Failed, no such zone: " + response_text) + LOGGER.info("instance: %s, NoSuchHostedZone: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordNotFound': caller_response.append("Failed, Record Not Found: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordNotFound: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': delete_records = False caller_response.append( "Failed, requested delete do not match existing record: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordDoNotMatch: %s", + instance_id, response_text + lineno()) elif delete_response == {}: delete_records = False caller_response.append( "Failed, could NOT delete Record: " + response_text) + LOGGER.info("instance: %s, Failed could NOT delete Record: %s", + instance_id, response_text + lineno()) else: caller_response.append("Success: " + response_text) LOGGER.info("instance: %s, Success: %s", @@ -1165,16 +1200,24 @@ def lambda_handler( if delete_response == 'NoSuchHostedZone': delete_records = False caller_response.append("Failed, no such zone: " + response_text) + LOGGER.info("instance: %s, NoSuchHostedZone: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordNotFound': caller_response.append("Failed, Record Not Found: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordNotFound: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': delete_records = False caller_response.append( "Failed, requested delete do not match existing record: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordDoNotMatch: %s", + instance_id, response_text + lineno()) elif delete_response == {}: delete_records = False caller_response.append( "Failed, could NOT delete Record: " + response_text) + LOGGER.info("instance: %s, Failed could NOT delete Record: %s", + instance_id, response_text + lineno()) else: caller_response.append("Success: " + response_text) LOGGER.info("instance: %s, Success: %s", @@ -1217,17 +1260,25 @@ def lambda_handler( if delete_response == 'NoSuchHostedZone': delete_records = False caller_response.append("Failed, no such zone: " + response_text) + LOGGER.info("instance: %s, NoSuchHostedZone: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordNotFound': caller_response.append( "Failed, Record Not Found: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordNotFound: %s", + instance_id, response_text + lineno()) elif delete_response == 'InvalidChangeBatch-RecordDoNotMatch': delete_records = False caller_response.append( "Failed, requested delete do not match existing record: " + response_text) + LOGGER.info("instance: %s, InvalidChangeBatch-RecordDoNotMatch: %s", + instance_id, response_text + lineno()) elif delete_response == {}: delete_records = False caller_response.append( "Failed, could NOT delete Record: " + response_text) + LOGGER.info("instance: %s, Failed could NOT delete Record: %s", + instance_id, response_text + lineno()) else: caller_response.append("Success: " + response_text) LOGGER.info("instance: %s, Success: %s", @@ -1255,6 +1306,8 @@ def lambda_handler( return caller_response else: + LOGGER.info("instance: %s, Successfully created recordsets. %s", + instance_id, lineno()) caller_response.insert(0, 'Successfully created recordsets') return caller_response @@ -1284,11 +1337,29 @@ def get_instances(client, instance_id): Get ec2 instance information :return: """ - try: - return client.describe_instances(InstanceIds=[instance_id]) - except ClientError as err: - LOGGER.info("unexpected error. %s\n", str(err) + lineno()) + i = 0 + instance_data = {} + while i < MAX_API_RETRY: + + try: + instance_data = client.describe_instances(InstanceIds=[instance_id]) + LOGGER.debug("%s", str(instance_data) + lineno()) + break + except ClientError as err: + + if 'An error occurred (RequestLimitExceeded)' in str(err): + LOGGER.info("instance: %s, describe_instances RequestLimitExceeded, %s", + instance_id, str(err) + lineno()) + else: + LOGGER.error("unexpected error. %s\n", str(err) + lineno()) + + i += 1 + LOGGER.info("instance: %s, describe_instances returned RequestLimitExceeded, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) + + return instance_data # def list_hosted_zones(client): # """ @@ -1333,6 +1404,20 @@ def new_list_hosted_zones(client, instance_id): if hosted_zones == {}: LOGGER.error("instance: %s, list_hosted_zones returned error. Timed out. %s", instance_id, str(i) + lineno()) + if SNS_ENABLE: + try: + sns_msg = {} + sns_msg['instance_id'] = instance_id + sns_msg['account_id'] = get_caller_account_id() + sns_msg['client'] = 'route53' + sns_msg['boto3_method'] = 'list_hosted_zones' + sns_msg['message'] = 'list_hosted_zones timed out' + publish_to_sns(get_sns_client(), json.dumps(sns_msg)) + LOGGER.info("instance: %s, sending sns message %s", + json.dumps(sns_msg) + lineno()) + except: + LOGGER.info("instance: %s, error: %s", instance_id, + str(sys.exc_info()[0]) + lineno()) return hosted_zones @@ -1418,11 +1503,20 @@ def get_item_from_dynamodb_table(client, table, instance_id): if 'Item' in item: LOGGER.debug("returned item:" " %s", str(item['Item']['InstanceAttributes']['S']) + lineno()) - item = item['Item']['InstanceAttributes']['S'].replace("'", '"') - item = item.replace(" True,", ' "True",') - item = item.replace(" False,", ' "False",') - LOGGER.debug("item: %s", str(item) + lineno()) - return json.loads(item) + + instance_attribute = item['Item']['InstanceAttributes']['S'] + + # these 7 lines are handling for legacy DDB items created prior how items were written + instance_attribute = instance_attribute.replace("'", '"') + instance_attribute = instance_attribute.replace(" True,", ' "True",') + instance_attribute = instance_attribute.replace(" True}", ' "True"}') + instance_attribute = instance_attribute.replace(" True,", ' "True",') + instance_attribute = instance_attribute.replace(" False,", ' "False",') + instance_attribute = instance_attribute.replace(" False,", ' "False",') + instance_attribute = instance_attribute.replace(" False,", ' "False",') + + LOGGER.debug("item: %s", str(instance_attribute) + lineno()) + return json.loads(instance_attribute) return None except ClientError as err: LOGGER.error("instance: %s, unexpected error. %s\n", @@ -1467,30 +1561,44 @@ def get_private_hosted_zones(hosted_zones): LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) -def get_dhcp_option_set_id_for_vpc(client, vpc_id): +def get_dhcp_option_set_id_for_vpc(client, instance_id, vpc_id): """ Get the dhcp option set from vpc :param client: :param vpc_id: :return: """ - try: - option_sets = {} - results = client.describe_vpcs() + i = 0 + while i < MAX_API_RETRY: - for item in results['Vpcs']: + try: + + option_sets = {} + results = client.describe_vpcs() + for item in results['Vpcs']: + if 'DhcpOptionsId' in item: + option_sets[str(item['VpcId'])] = item['DhcpOptionsId'] + else: + option_sets[str(item['VpcId'])] = None + option_set_for_vpc = option_sets[vpc_id] + LOGGER.debug("option set for vpc: %s", + str(option_set_for_vpc) + lineno()) + break + except ClientError as err: - if 'DhcpOptionsId' in item: - option_sets[str(item['VpcId'])] = item['DhcpOptionsId'] + if 'An error occurred (RequestLimitExceeded)' in str(err): + LOGGER.info("instance: %s, describe_vpcs RequestLimitExceeded, %s", + instance_id, str(err) + lineno()) else: - option_sets[str(item['VpcId'])] = None - - return option_sets[vpc_id] + LOGGER.error("unexpected error. %s\n", str(err) + lineno()) - except ClientError as err: - LOGGER.info("unexpected error. %s\n", str(err) + lineno()) + i += 1 + LOGGER.info("instance: %s, describe_vpcs returned RequestLimitExceeded, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) + return option_set_for_vpc def get_dynamodb_table(client, table_name): """ @@ -1606,7 +1714,6 @@ def new_change_resource_recordset(client, instance_id, zone_id, host_name, hoste LOGGER.debug("change_resource_record_sets UPSERT returned without error - response: %s", str(update_response) + lineno()) break - # return response except ClientError as err: if 'NoSuchHostedZone' in str(err) and 'No hosted zone found with ID' in str(err): LOGGER.error("Hosted zone not found error: %s", str(err) + lineno()) @@ -1629,6 +1736,10 @@ def new_change_resource_recordset(client, instance_id, zone_id, host_name, hoste instance_id, str(i) + lineno()) time.sleep(i) + if i >= MAX_API_RETRY: + LOGGER.error("instance: %s, change_resource_record_sets exceeded max retry of %s", + instance_id, MAX_API_RETRY + lineno()) + if update_response == {}: if SNS_ENABLE: try: @@ -1641,6 +1752,8 @@ def new_change_resource_recordset(client, instance_id, zone_id, host_name, hoste sns_msg['change_resource_record_sets'] = { 'HostedZoneId': zone_id, 'ChangeBatch': change_batch} publish_to_sns(get_sns_client(), json.dumps(sns_msg)) + LOGGER.info("instance: %s, sending sns message %s", + json.dumps(sns_msg) + lineno()) except: LOGGER.info("instance: %s, error: %s", instance_id, str(sys.exc_info()[0]) + lineno()) @@ -1804,6 +1917,24 @@ def new_get_resource_record(client, instance_id, zone_id, host_name, hosted_zone instance_id, str(i) + lineno()) time.sleep(i) + if i >= MAX_API_RETRY: + LOGGER.error("instance: %s, list_resource_record_sets exceeded max retry of %s", + instance_id, MAX_API_RETRY + lineno()) + if SNS_ENABLE: + try: + sns_msg = {} + sns_msg['instance_id'] = instance_id + sns_msg['account_id'] = get_caller_account_id() + sns_msg['client'] = 'route53' + sns_msg['boto3_method'] = 'list_resource_record_sets' + sns_msg['message'] = 'list_resource_record_sets timed out' + publish_to_sns(get_sns_client(), json.dumps(sns_msg)) + LOGGER.info("instance: %s, sending sns message %s", + json.dumps(sns_msg) + lineno()) + except: + LOGGER.info("instance: %s, error: %s", instance_id, + str(sys.exc_info()[0]) + lineno()) + return value @@ -1941,6 +2072,10 @@ def new_delete_resource_record(client, instance_id, zone_id, host_name, hosted_z instance_id, str(i) + lineno()) time.sleep(i) + if i >= MAX_API_RETRY: + LOGGER.error("instance: %s, change_resource_record_sets exceeded max retry of %s", + instance_id, MAX_API_RETRY + lineno()) + if (delete_response == {} or delete_response == "InvalidChangeBatch-RecordDoNotMatch"): if SNS_ENABLE: try: @@ -1953,6 +2088,8 @@ def new_delete_resource_record(client, instance_id, zone_id, host_name, hosted_z sns_msg['change_resource_record_sets'] = { 'HostedZoneId': zone_id, 'ChangeBatch': change_batch} publish_to_sns(get_sns_client(), json.dumps(sns_msg)) + LOGGER.info("instance: %s, sending sns message %s", + json.dumps(sns_msg) + lineno()) except: LOGGER.info("instance: %s, error: %s", instance_id, str(sys.exc_info()[0]) + lineno()) @@ -2072,34 +2209,50 @@ def is_valid_zone(route53, instance_id, zonename, hosted_zones, vpc_id, private_ LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) -def get_dhcp_configurations(client, dhcp_options_id): +def get_dhcp_configurations(client, instance_id, dhcp_options_id): """ This function returns the names of the zones/domains that are in the option set. :param client: :param dhcp_options_id: :return: """ - try: - zone_names = [] - response = client.describe_dhcp_options( - DhcpOptionsIds=[ - str(dhcp_options_id) - ] - ) - LOGGER.debug("response: %s", str(response) + lineno()) - dhcp_configurations = response['DhcpOptions'][0]['DhcpConfigurations'] - LOGGER.debug("dhcp_configurations: %s", str(dhcp_configurations) + lineno()) - for configuration in dhcp_configurations: - if configuration['Key'] == 'domain-name': # only if the key is domain-name - for item in configuration['Values']: - LOGGER.debug("item: %s", str(item) + lineno()) - zone_names.append(str(item['Value']) + '.') - LOGGER.debug("zone name: %s", str(zone_names) + lineno()) - return zone_names - except: - LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) + i = 0 + while i < MAX_API_RETRY: + + try: + zone_names = [] + + response = client.describe_dhcp_options( + DhcpOptionsIds=[ + str(dhcp_options_id) + ] + ) + LOGGER.debug("response: %s", str(response) + lineno()) + dhcp_configurations = response['DhcpOptions'][0]['DhcpConfigurations'] + LOGGER.debug("dhcp_configurations: %s", str(dhcp_configurations) + lineno()) + for configuration in dhcp_configurations: + if configuration['Key'] == 'domain-name': # only if the key is domain-name + for item in configuration['Values']: + LOGGER.debug("item: %s", str(item) + lineno()) + zone_names.append(str(item['Value']) + '.') + LOGGER.debug("zone name: %s", str(zone_names) + lineno()) + break + except ClientError as err: + + if 'An error occurred (RequestLimitExceeded)' in str(err): + LOGGER.info("instance: %s, describe_dhcp_options RequestLimitExceeded, %s", + instance_id, str(err) + lineno()) + else: + LOGGER.error("unexpected error. %s\n", str(err) + lineno()) + + i += 1 + LOGGER.info("instance: %s, describe_dhcp_options returned RequestLimitExceeded, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) + + return zone_names def new_reverse_list(ip_list): """ @@ -2222,42 +2375,77 @@ def remove_empty_from_dict(dictionary): LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) -def is_dns_hostnames_enabled(client, vpc_id): +def is_dns_hostnames_enabled(client, instance_id, vpc_id): """ Whether dns hostnames is enabled :param client: :param vpc_id: :return: """ - try: - response = client.describe_vpc_attribute( - Attribute='enableDnsHostnames', - VpcId=vpc_id - ) - LOGGER.debug("%s", str(response) + lineno()) - return response['EnableDnsHostnames']['Value'] - except: - LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) + i = 0 + while i < MAX_API_RETRY: + try: + response = client.describe_vpc_attribute( + Attribute='enableDnsHostnames', + VpcId=vpc_id + ) + + LOGGER.debug("%s", str(response) + lineno()) + dns_hostname_enabled = response['EnableDnsHostnames']['Value'] + break + + except ClientError as err: + + if 'An error occurred (RequestLimitExceeded)' in str(err): + LOGGER.info("instance: %s, describe_vpc_attribute RequestLimitExceeded, %s", + instance_id, str(err) + lineno()) + else: + LOGGER.error("unexpected error. %s\n", str(err) + lineno()) + + i += 1 + LOGGER.info("instance: %s, describe_vpc_attribute returned RequestLimitExceeded, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) + + return dns_hostname_enabled -def is_dns_support_enabled(client, vpc_id): +def is_dns_support_enabled(client, instance_id, vpc_id): """ Whether dns support is enabled :param client: :param vpc_id: :return: """ - try: - response = client.describe_vpc_attribute( - Attribute='enableDnsSupport', - VpcId=vpc_id - ) - LOGGER.debug('response2: %s', str(response) + lineno()) - return response['EnableDnsSupport']['Value'] - except: - LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) + i = 0 + while i < MAX_API_RETRY: + + try: + response = client.describe_vpc_attribute( + Attribute='enableDnsSupport', + VpcId=vpc_id + ) + + LOGGER.debug('response2: %s', str(response) + lineno()) + dns_suppport_enabled = response['EnableDnsSupport']['Value'] + break + + except ClientError as err: + + if 'An error occurred (RequestLimitExceeded)' in str(err): + LOGGER.info("instance: %s, describe_vpc_attribute RequestLimitExceeded, %s", + instance_id, str(err) + lineno()) + else: + LOGGER.error("unexpected error. %s\n", str(err) + lineno()) + + i += 1 + LOGGER.info("instance: %s, describe_vpc_attribute returned RequestLimitExceeded, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) + + return dns_suppport_enabled # def get_hosted_zone_properties(client, zone_id): @@ -2318,25 +2506,62 @@ def new_get_hosted_zone_properties(client, instance_id, zone_id): instance_id, str(i) + lineno()) time.sleep(i) + if hosted_zone_properties == {}: + LOGGER.error("instance: %s, get_hosted_zone exceeded max retry of %s", + instance_id, MAX_API_RETRY + lineno()) + if SNS_ENABLE: + try: + sns_msg = {} + sns_msg['instance_id'] = instance_id + sns_msg['account_id'] = get_caller_account_id() + sns_msg['client'] = 'route53' + sns_msg['boto3_method'] = 'get_hosted_zone' + sns_msg['message'] = 'get_hosted_zone timed out' + publish_to_sns(get_sns_client(), json.dumps(sns_msg)) + LOGGER.info("instance: %s, sending sns message %s", + json.dumps(sns_msg) + lineno()) + except: + LOGGER.info("instance: %s, error: %s", instance_id, + str(sys.exc_info()[0]) + lineno()) + + return hosted_zone_properties -def get_subnet_cidr_block(client, subnet_id): +def get_subnet_cidr_block(client, instance_id, subnet_id): """ Get subnect cidr block :param client: :param subnet_id: :return: """ - try: - response = client.describe_subnets( - SubnetIds=[ - subnet_id - ] - ) - return response['Subnets'][0]['CidrBlock'] - except: - LOGGER.info("unexpected error. %s\n", str(sys.exc_info()[0]) + lineno()) + + + i = 0 + while i < MAX_API_RETRY: + try: + response = client.describe_subnets( + SubnetIds=[ + subnet_id + ] + ) + cidr_block = response['Subnets'][0]['CidrBlock'] + LOGGER.debug("%s", str(cidr_block) + lineno()) + break + except ClientError as err: + + if 'An error occurred (RequestLimitExceeded)' in str(err): + LOGGER.error("instance: %s, describe_subnets RequestLimitExceeded, %s", + instance_id, str(err) + lineno()) + else: + LOGGER.error("unexpected error. %s\n", str(err) + lineno()) + + i += 1 + LOGGER.info("instance: %s, describe_subnets returned RequestLimitExceeded, waiting before retry. %s", + instance_id, str(i) + lineno()) + time.sleep(i) + + return cidr_block def initialize_heritage(application_name, version='null', items={}):