Skip to content

Commit cd011af

Browse files
lza-upgrade: Improve network drift detection script (#1245)
* fix multi-line f-strings. fix not parsing all VPCs * add detection of modified route entries * linting
1 parent c818f69 commit cd011af

File tree

2 files changed

+125
-12
lines changed

2 files changed

+125
-12
lines changed

reference-artifacts/Custom-Scripts/lza-upgrade/tools/network-drift-detection/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ This section details drift in subnets and their route tables. Careful inspection
7474

7575
|Key|Description|Notes and upgrade impact|
7676
|---|-----------|------------------------|
77+
|route_table_entries_mismatches|Difference in route entries between ASEA config and AWS account|Route entries may have been modified manually, **the changes will be overwritten during the upgrade**. Note: the script doesn't handle all route target types, manual verification is still recommended|
7778
|route_tables_not_deployed|Route tables found in the ASEA config, but not in the AWS account|These route tables may have been manually removed and **will be re-created during the upgrade**|
7879
|route_tables_not_in_config|Route tables not found in the ASEA config, but are present in the AWS account|This is for information, these route tables won't be modified during the upgrade. See note below.|
7980
|subnet_route_table_mismatches|There is a configuration difference between the ASEA config and the current state of the route table|These route tables may have been manually modified, **the changes will be overwritten during the upgrade**|

reference-artifacts/Custom-Scripts/lza-upgrade/tools/network-drift-detection/lza-upgrade-check.py

Lines changed: 124 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
import boto3
1010
from botocore.exceptions import ClientError
1111

12+
if "LOGLEVEL" in os.environ:
13+
logging.basicConfig(level=os.environ.get(
14+
"LOGLEVEL", "WARNING"), format='%(levelname)s:%(message)s')
1215
logger = logging.getLogger(__name__)
1316

1417

@@ -139,8 +142,7 @@ def process_vpc_config(account, vpc, vpc_dict):
139142
def flatten_subnet_config(vpc_name, subnets):
140143
"""Takes subnet object from ASEA config and generate list of subnets to be created per AZ"""
141144
return [
142-
{"Name": f"{subnet['name']}_{vpc_name}_az{d['az']}_net",
143-
"route-table": f"{d['route-table']}_rt"}
145+
{"Name": f"{subnet['name']}_{vpc_name}_az{d['az']}_net", "route-table": f"{d['route-table']}_rt"} # nopep8
144146
for subnet in subnets
145147
for d in subnet["definitions"]
146148
if not d.get('disabled', False)
@@ -278,8 +280,7 @@ def get_transit_gateway_route_tables(ec2_client, tgw_id: str) -> List[Dict]:
278280
blackhole_routes = get_transit_gateway_routes(
279281
ec2_client, tgwrt["TransitGatewayRouteTableId"], "blackhole")
280282
except Exception as e:
281-
logger.error(f"Failed to get routes for table {
282-
tgwrt['TransitGatewayRouteTableId']}: {str(e)}")
283+
logger.error(f"Failed to get routes for table {tgwrt['TransitGatewayRouteTableId']}: {str(e)}") # nopep8
283284
active_routes = []
284285

285286
name = next((tag["Value"] for tag in tgwrt.get("Tags", [])
@@ -322,8 +323,7 @@ def get_transit_gateway_routes(ec2_client, tgwrt_id: str, state: str) -> List[Di
322323
"""
323324
valid_states = ['active', 'blackhole', 'deleted', 'deleting', 'pending']
324325
if state not in valid_states:
325-
raise ValueError(f"Invalid route state. Must be one of: {
326-
', '.join(valid_states)}")
326+
raise ValueError(f"Invalid route state. Must be one of: {', '.join(valid_states)}") # nopep8
327327

328328
try:
329329
response = ec2_client.search_transit_gateway_routes(
@@ -376,10 +376,12 @@ def get_vpc_route_tables(ec2_client, vpcId):
376376
r = {"Name": name,
377377
"RouteTableId": rt["RouteTableId"],
378378
"VpcId": rt["VpcId"],
379+
"Main": any([asso["Main"] for asso in rt["Associations"] if "Main" in asso]),
379380
"SubnetAssociations": [asso["SubnetId"] for asso in rt["Associations"] if "SubnetId" in asso],
380381
"Routes": rt["Routes"],
381382
"RawResponse": rt
382383
}
384+
383385
rt_list.append(r)
384386

385387
return rt_list
@@ -447,6 +449,7 @@ def analyze_vpcs(vpc_from_config, account_list, role_to_assume, region):
447449
"subnets_not_deployed": [],
448450
"subnets_not_associated": [],
449451
"subnet_route_table_mismatches": [],
452+
"route_table_entries_mismatches": []
450453
}
451454
vpc_details = {}
452455

@@ -474,9 +477,12 @@ def analyze_vpcs(vpc_from_config, account_list, role_to_assume, region):
474477
if f"{rt['name']}_rt" == drt["Name"]]
475478
if len(crt) == 0:
476479
logger.warning(
477-
f"Route table {drt['Name']} exists in VPC {dv} but not in config")
478-
drift["route_tables_not_in_config"].append(
479-
{"RouteTable": drt["Name"], "Vpc": dv})
480+
f"Route table {drt['Name']} exists in VPC {dv} but not in config. {'(Main)' if drt['Main'] else ''}")
481+
482+
# Do not add to drift if its the main route table and there are no Subnet Associations
483+
if not drt['Main'] or len(drt['SubnetAssociations']) > 0:
484+
drift["route_tables_not_in_config"].append(
485+
{"RouteTable": drt["Name"], "Vpc": dv})
480486
continue
481487

482488
# check if all route tables from the config exist in the environment
@@ -490,6 +496,16 @@ def analyze_vpcs(vpc_from_config, account_list, role_to_assume, region):
490496
drift["route_tables_not_deployed"].append(
491497
{"RouteTable": crt['name'], "Vpc": dv})
492498
continue
499+
elif len(drt) > 0:
500+
if len(drt) > 1:
501+
logger.error(
502+
f"More than one route table named {crt['name']} is deployed! LZA upgrade already executed?")
503+
504+
# matching config and deployed route, compare the entries
505+
rteDrift = compare_route_table(crt, drt[0])
506+
if len(rteDrift) > 0:
507+
drift["route_table_entries_mismatches"].append(
508+
{"RouteTable": crt['name'], "Vpc": dv, "Entries": rteDrift})
493509

494510
# check if there are more subnets than in the config
495511
d_subnets = get_vpc_subnets(client, deployed_vpcs[dv])
@@ -536,7 +552,104 @@ def analyze_vpcs(vpc_from_config, account_list, role_to_assume, region):
536552
vpc_details[dv] = {
537553
"Account": account, "RouteTables": d_rtables, "Subnets": d_subnets}
538554

539-
return {"Drift": drift, "VpcDetails": vpc_details}
555+
return {"Drift": drift, "VpcDetails": vpc_details}
556+
557+
558+
def compare_route_table(crt, drt):
559+
"""
560+
Compare entries of configured and deployed route table
561+
crt: configured route table in ASEA config
562+
drt: deployed route table in AWS VPC
563+
"""
564+
drift = []
565+
566+
# ignoring gateway endpoint routes (S3 and DynamoDB) and local subnet routes
567+
cRoutes = [r for r in crt.get('routes', []) if r['target'].lower(
568+
) != 's3' and r['target'].lower() != 'dynamodb']
569+
dRoutes = [r for r in drt.get(
570+
'Routes', []) if 'DestinationCidrBlock' in r and r.get("GatewayId", "") != "local"]
571+
572+
if len(cRoutes) != len(dRoutes):
573+
logger.warning(
574+
f"Different number of routes in config and deployed route table for {crt['name']}")
575+
576+
# check if all route entries in config matches what is deployed
577+
for cr in cRoutes:
578+
if cr['target'].lower() == "pcx":
579+
logger.warning(
580+
f"Route {cr['destination']} is a VPC peering route. Skipping check")
581+
continue
582+
583+
dr = [r for r in dRoutes if cr['destination']
584+
== r['DestinationCidrBlock']]
585+
if len(dr) == 0:
586+
logger.warning(f"Route {cr['destination']} exists in config but not found in deployed route table") # nopep8
587+
drift.append(
588+
{"Route": cr['destination'], "Reason": "Not found in deployed route table"})
589+
continue
590+
elif len(dr) == 1:
591+
dre = dr[0]
592+
if cr['target'] == "IGW":
593+
if not ("GatewayId" in dre and dre['GatewayId'].startswith("igw-")):
594+
logger.warning(
595+
f"Route {cr['destination']} not matched to IGW")
596+
drift.append(
597+
{"Route": cr['destination'], "Reason": "Not matched to IGW"})
598+
elif cr['target'] == "TGW":
599+
if not "TransitGatewayId" in dre:
600+
logger.warning(
601+
f"Route {cr['destination']} not matched to TGW")
602+
drift.append(
603+
{"Route": cr['destination'], "Reason": "Not matched to TGW"})
604+
elif cr['target'].startswith("NFW_"):
605+
if not ("GatewayId" in dre and dre['GatewayId'].startswith("vpce-")):
606+
logger.warning(
607+
f"Route {cr['destination']} not matched to NFW VPCE")
608+
drift.append(
609+
{"Route": cr['destination'], "Reason": "Not matched to NFW VPCE"})
610+
elif cr['target'].startswith("NATGW_"):
611+
if not "NatGatewayId" in dre:
612+
logger.warning(
613+
f"Route {cr['destination']} not matched to NATGW")
614+
drift.append(
615+
{"Route": cr['destination'], "Reason": "Not matched to NATGW"})
616+
elif cr['target'] == "VGW":
617+
if not ("GatewayId" in dre and dre['GatewayId'].startswith("vgw-")):
618+
logger.warning(
619+
f"Route {cr['destination']} not matched to VGW")
620+
drift.append(
621+
{"Route": cr['destination'], "Reason": "Not matched to VGW"})
622+
elif cr['target'].lower() == "firewall":
623+
if not "InstanceId" in dre:
624+
logger.warning(
625+
f"Route {cr['destination']} not matched to firewall instance")
626+
drift.append(
627+
{"Route": cr['destination'], "Reason": "Not matched to firewall instance"})
628+
else:
629+
logger.error(f"Route target {cr['target']} is not supported!")
630+
drift.append({"Route": cr['destination'], "Reason": f"Route target {
631+
cr['target']} is not supported!"})
632+
else:
633+
# this should not be possible!
634+
logger.error(f"More than one route with destination {cr['destination']} is deployed!") # nopep8
635+
drift.append({"Route": cr['destination'], "Reason": f"More than one route with destination {
636+
cr['destination']} found"})
637+
638+
# check if there are route entries deployed that are not in the config
639+
for dr in dRoutes:
640+
if 'VpcPeeringConnectionId' in dr:
641+
logger.warning(
642+
f"Route {dr['DestinationCidrBlock']} is a VPC peering route. Skipping check")
643+
continue
644+
645+
cr = [r for r in cRoutes if r['destination']
646+
== dr['DestinationCidrBlock']]
647+
if len(cr) == 0:
648+
logger.warning(f"Route {dr['DestinationCidrBlock']} exists in deployed route table but not found in config") # nopep8
649+
drift.append(
650+
{"Route": dr['DestinationCidrBlock'], "Reason": "Not found in config"})
651+
652+
return drift
540653

541654

542655
def get_tgw_from_config(asea_config, region):
@@ -697,8 +810,7 @@ def main():
697810
accel_prefix = args.accel_prefix
698811
asea_config_path = args.raw_config_path
699812
output_path = args.output_dir
700-
role_to_assume = args.role_to_assume if args.role_to_assume else f"{
701-
accel_prefix}-PipelineRole"
813+
role_to_assume = args.role_to_assume if args.role_to_assume else f"{accel_prefix}-PipelineRole" # nopep8
702814
parameter_table = f"{accel_prefix}-Parameters"
703815
shared_network_key = 'shared-network'
704816
home_region = args.home_region

0 commit comments

Comments
 (0)