-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathemergency_repair.py
More file actions
116 lines (96 loc) · 4.42 KB
/
emergency_repair.py
File metadata and controls
116 lines (96 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from bs4 import BeautifulSoup
import re
import os
target_files = [
'posts/best-noise-cancelling-headphones-2026.html',
'posts/best-podcast-microphones-2026.html',
'posts/best-ultrawide-monitors-2026.html'
]
ref_file = 'posts/shure-sm7b-review.html'
base_dir = r"C:\Users\PMLS\Desktop\Youtube Shorts\b2b_blog"
# Read reference file for header and favicon
with open(os.path.join(base_dir, ref_file), 'r', encoding='utf-8') as f:
ref_soup = BeautifulSoup(f, 'html.parser')
ref_header = ref_soup.find('header', class_='glass-header')
ref_favicons = ref_soup.find_all('link', rel=lambda r: r and ('icon' in r or 'apple-touch-icon' in r))
affiliate_links_fixed = 0
non_affiliate_buttons_removed = 0
favicon_standardized_count = 0
for file_path in target_files:
full_path = os.path.join(base_dir, file_path)
with open(full_path, 'r', encoding='utf-8') as f:
html = f.read()
soup = BeautifulSoup(html, 'html.parser')
# 1. Navigation Restoration
# Replace existing header
existing_header = soup.find('header', class_='glass-header')
if existing_header and ref_header:
existing_header.replace_with(ref_soup.new_tag("div")) # placeholder
soup.find("div").replace_with(ref_header.copy())
# 2. Affiliate Link Integrity
# Find all amazon links. Ensure tag, target, rel
amazon_links = soup.find_all('a', href=re.compile(r'amazon\.com'))
for link in amazon_links:
href = link.get('href', '')
changed = False
# tag
if '?tag=techstackglob-20' not in href and '&tag=techstackglob-20' not in href:
if '?' in href:
link['href'] = href.replace('?', '?tag=techstackglob-20&', 1)
else:
link['href'] = href + '?tag=techstackglob-20'
changed = True
# target
if link.get('target') != '_blank':
link['target'] = '_blank'
changed = True
# rel
current_rel = link.get('rel', [])
if isinstance(current_rel, str):
current_rel = current_rel.split()
required_rels = ['nofollow', 'noopener', 'sponsored']
if not all(r in current_rel for r in required_rels):
# Combine current logic plus required
final_rels = set(current_rel + required_rels)
link['rel'] = " ".join(sorted(final_rels)) # output space separated string
changed = True
if changed:
affiliate_links_fixed += 1
# 3. Remove non-affiliate CTR buttons
btn_links = soup.find_all('a', class_=re.compile(r'btn-(primary|secondary|tertiary)'))
for btn in btn_links:
href = btn.get('href', '')
if 'amazon.com' not in href:
btn.decompose()
non_affiliate_buttons_removed += 1
# 4. Favicon Restore
# Remove existing favicon links
for old_fav in soup.find_all('link', rel=lambda r: r and ('icon' in r or 'apple-touch-icon' in r)):
old_fav.decompose()
# Insert standard favicons in head
head = soup.find('head')
if head and list(ref_favicons):
for fav in reversed(ref_favicons): # reversed since inserting at beginning usually pushes down, but insert after meta
if head.find_all('meta'):
head.find_all('meta')[-1].insert_after(fav.copy())
else:
head.insert(0, fav.copy())
favicon_standardized_count += 1
# Bump v1 to v5
for link in soup.find_all('link', rel=lambda r: r and ('icon' in r or 'apple-touch-icon' in r)):
if '?v=1' in link['href']:
link['href'] = link['href'].replace('?v=1', '?v=5')
# 5. Mobile toggle fix
# Ensure script is loaded exactly before </body>
if not soup.find('script', src='../script.js'):
script_tag = soup.new_tag('script', src='../script.js')
if soup.body:
soup.body.append(script_tag)
# Write out
with open(full_path, 'w', encoding='utf-8') as f:
# Use str(soup) to keep formatting reasonably close (though it might reformat slightly)
# Using a safer approach with regex on original text if we just wanted exact text replace, but bs4 is robust for DOM manipulation.
f.write(soup.prettify(formatter="html"))
print(f"Affiliate Links Fixed: {affiliate_links_fixed}")
print(f"Non-Affiliate Buttons Removed: {non_affiliate_buttons_removed}")
print(f"Favicon Standardized Pages: {favicon_standardized_count}")