-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_sitemap.py
More file actions
63 lines (49 loc) · 2.39 KB
/
generate_sitemap.py
File metadata and controls
63 lines (49 loc) · 2.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
from datetime import datetime
BASE_DIR = os.path.abspath(os.path.dirname(__file__))
BASE_URL = "https://techstackglobal.github.io"
def get_html_files():
skip_dirs = ['.git', '.vscode', 'node_modules', '.gemini', '.agent', '.planning', '.venv', 'tmp', 'tools', 'blogging_project']
html_files = []
for root, dirs, files in os.walk(BASE_DIR):
dirs[:] = [d for d in dirs if d not in skip_dirs]
for file in files:
if file.endswith('.html'):
html_files.append(os.path.join(root, file))
return html_files
def main():
date_str = datetime.now().strftime('%Y-%m-%d')
sitemap_path = os.path.join(BASE_DIR, 'sitemap.xml')
# HERALD Standard: Ultra-Compatible Schema for Google 2026
xml_header = '<?xml version="1.0" encoding="UTF-8"?>\n'
xml_urlset_open = (
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" '
'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" '
'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 '
'http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">\n'
)
xml_content = xml_header + xml_urlset_open
html_files = sorted(get_html_files())
# 1. Primary Page (Root)
xml_content += f' <url>\n <loc>{BASE_URL}/</loc>\n <lastmod>{date_str}</lastmod>\n <changefreq>daily</changefreq>\n <priority>1.0</priority>\n </url>\n'
for f in html_files:
rel_path = os.path.relpath(f, BASE_DIR).replace('\\', '/')
if rel_path in ['404.html', 'thank-you.html', 'index.html'] or 'google' in rel_path:
continue
url = f"{BASE_URL}/{rel_path}"
priority = "0.8"
if "posts/" in rel_path:
priority = "0.7"
if rel_path in ["blog.html", "amazon-stack.html"]:
priority = "0.9"
xml_content += f' <url>\n <loc>{url}</loc>\n <lastmod>{date_str}</lastmod>\n <changefreq>weekly</changefreq>\n <priority>{priority}</priority>\n </url>\n'
xml_content += '</urlset>'
with open(sitemap_path, 'wb') as f:
f.write(xml_content.encode('utf-8'))
# Also ensure .nojekyll exists
nojekyll_path = os.path.join(BASE_DIR, '.nojekyll')
with open(nojekyll_path, 'w') as f:
pass
print(f"GSD Advanced Sitemap and .nojekyll refreshed.")
if __name__ == "__main__":
main()