diff --git a/save_ebook.py b/save_ebook.py index b1992df..da3b451 100644 --- a/save_ebook.py +++ b/save_ebook.py @@ -2,6 +2,7 @@ # cd d:\tom\scripts # py save_ebook.py +import re import requests twig_base = "https://twigserial.wordpress.com" @@ -679,11 +680,36 @@ # out.write(r.text) # out.write("\n\n" + "="*50 + "\n\n") +def extract_chapter_content(html): + title_match = re.search(r'

(.*?)

', html, re.DOTALL) + title = title_match.group(0) if title_match else "" + content_match = re.search( + r'
(.*?)
', + html, + re.DOTALL, + ) + content = content_match.group(1).strip() if content_match else "" + if content: + content = re.sub( + r'