Skip to content

Commit 8a7190f

Browse files
committed
Replace more bs4.find functions with select
1 parent 5d873a3 commit 8a7190f

File tree

13 files changed

+142
-117
lines changed

13 files changed

+142
-117
lines changed

docs/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,4 +200,6 @@ def setup(app):
200200
intersphinx_mapping = {
201201
'python': ('https://docs.python.org/3', None),
202202
'aiohttp': ('https://aiohttp.readthedocs.io/en/stable/', None),
203+
'beautifulsoup4': ('https://www.crummy.com/software/BeautifulSoup/bs4/doc/', None),
204+
'bs4': ('https://www.crummy.com/software/BeautifulSoup/bs4/doc/', None),
203205
}

tibiapy/parsers/bazaar.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from tibiapy.models import (AchievementEntry, AjaxPaginator, Auction, AuctionFilters, BestiaryEntry, BlessingEntry,
1414
CharacterBazaar, CharmEntry, FamiliarEntry, Familiars, ItemEntry, ItemSummary, MountEntry,
1515
Mounts, OutfitEntry, OutfitImage, Outfits, SalesArgument, SkillEntry)
16-
from tibiapy.utils import (convert_line_breaks, parse_form_data, parse_integer, parse_pagination,
16+
from tibiapy.utils import (clean_text, convert_line_breaks, get_rows, parse_form_data, parse_integer, parse_pagination,
1717
parse_tibia_datetime, parse_tibiacom_content, try_enum)
1818

1919
results_pattern = re.compile(r"Results: (\d+)")
@@ -264,8 +264,8 @@ def _parse_auction(cls, auction_row: bs4.Tag, auction_id=0) -> Auction:
264264

265265
dates_containers = auction_row.select_one("div.ShortAuctionData")
266266
start_date_tag, end_date_tag, *_ = dates_containers.select("div.ShortAuctionDataValue")
267-
builder.auction_start(parse_tibia_datetime(start_date_tag.text.replace("\xa0", " ")))
268-
builder.auction_end(parse_tibia_datetime(end_date_tag.text.replace("\xa0", " ")))
267+
builder.auction_start(parse_tibia_datetime(clean_text(start_date_tag)))
268+
builder.auction_end(parse_tibia_datetime(clean_text(end_date_tag)))
269269
bids_container = auction_row.select_one("div.ShortAuctionDataBidRow")
270270
bid_tag = bids_container.select_one("div.ShortAuctionDataValue")
271271
bid_type_tag = bids_container.select("div.ShortAuctionDataLabel")[0]
@@ -324,7 +324,7 @@ def _parse_data_table(cls, table: bs4.Tag) -> Dict[str, str]:
324324
:class:`dict`
325325
A mapping containing the table's data.
326326
"""
327-
rows = table.select("tr")
327+
rows = get_rows(table)
328328
data = {}
329329
for row in rows:
330330
name = row.select_one("span").text
@@ -343,7 +343,7 @@ def _parse_skills_table(cls, builder: AuctionDetailsBuilder, table):
343343
table: :class:`bs4.Tag`
344344
The table containing the character's skill.
345345
"""
346-
rows = table.select("tr")
346+
rows = get_rows(table)
347347
skills = []
348348
for row in rows:
349349
cols = row.select("td")
@@ -364,7 +364,7 @@ def _parse_blessings_table(cls, builder: AuctionDetailsBuilder, table):
364364
The table containing the character's blessings.
365365
"""
366366
table_content = table.select_one("table.TableContent")
367-
_, *rows = table_content.select("tr")
367+
_, *rows = get_rows(table_content)
368368
blessings = []
369369
for row in rows:
370370
cols = row.select("td")
@@ -389,7 +389,7 @@ def _parse_single_column_table(cls, table: bs4.Tag):
389389
A list with the contents of each row.
390390
"""
391391
table_content = table.select("table.TableContent")[-1]
392-
_, *rows = table_content.select("tr")
392+
_, *rows = get_rows(table_content)
393393
ret = []
394394
for row in rows:
395395
col = row.select_one("td")
@@ -411,7 +411,7 @@ def _parse_charms_table(cls, builder: AuctionDetailsBuilder, table):
411411
The table containing the charms.
412412
"""
413413
table_content = table.select_one("table.TableContent")
414-
_, *rows = table_content.select("tr")
414+
_, *rows = get_rows(table_content)
415415
charms = []
416416
for row in rows:
417417
cols = row.select("td")
@@ -434,7 +434,7 @@ def _parse_achievements_table(cls, builder: AuctionDetailsBuilder, table: bs4.Ta
434434
The table containing the achievements.
435435
"""
436436
table_content = table.select_one("table.TableContent")
437-
_, *rows = table_content.select("tr")
437+
_, *rows = get_rows(table_content)
438438
achievements = []
439439
for row in rows:
440440
col = row.select_one("td")
@@ -457,7 +457,7 @@ def _parse_bestiary_table(cls, builder: AuctionDetailsBuilder, table: bs4.Tag, b
457457
The table containing the bestiary information.
458458
"""
459459
table_content = table.select_one("table.TableContent")
460-
_, *rows = table_content.select("tr")
460+
_, *rows = get_rows(table_content)
461461
bestiary = []
462462
for row in rows:
463463
cols = row.select("td")
@@ -497,7 +497,7 @@ def _parse_general_table(cls, builder: AuctionDetailsBuilder, table):
497497
cls._parse_skills_table(builder, content_containers[1])
498498

499499
additional_stats = cls._parse_data_table(content_containers[2])
500-
builder.creation_date(parse_tibia_datetime(additional_stats.get("creation_date", "").replace("\xa0", " ")))
500+
builder.creation_date(parse_tibia_datetime(clean_text(additional_stats.get("creation_date", ""))))
501501
builder.experience(parse_integer(additional_stats.get("experience", "0")))
502502
builder.gold(parse_integer(additional_stats.get("gold", "0")))
503503
builder.achievement_points(parse_integer(additional_stats.get("achievement_points", "0")))

tibiapy/parsers/character.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from tibiapy.errors import InvalidContent
1111
from tibiapy.models import (Achievement, Character, AccountBadge, AccountInformation, OtherCharacter, DeathParticipant,
1212
Death, GuildMembership, CharacterHouse)
13-
from tibiapy.utils import (parse_popup, parse_tibia_date, parse_tibia_datetime, parse_tibiacom_content, split_list,
13+
from tibiapy.utils import (get_rows, parse_popup, parse_tibia_date, parse_tibia_datetime, parse_tibiacom_content,
14+
split_list,
1415
try_enum, parse_link_info, clean_text, parse_integer)
1516

1617
if TYPE_CHECKING:
@@ -100,8 +101,8 @@ def _parse_account_information(cls, rows):
100101
cols_raw = row.select("td")
101102
cols = [ele.text.strip() for ele in cols_raw]
102103
field, value = cols
103-
field = field.replace("\xa0", "_").replace(" ", "_").replace(":", "").lower()
104-
value = value.replace("\xa0", " ")
104+
field = clean_text(field).replace(" ", "_").replace(":", "").lower()
105+
value = clean_text(value)
105106
acc_info[field] = value
106107

107108
created = parse_tibia_datetime(acc_info["created"])
@@ -127,7 +128,7 @@ def _parse_achievements(cls, rows):
127128
field, value = cols
128129
grade = str(field).count("achievement-grade-symbol")
129130
name = value.text.strip()
130-
secret_image = value.find("img")
131+
secret_image = value.select_one("img")
131132
secret = False
132133
if secret_image:
133134
secret = True
@@ -247,7 +248,7 @@ def _parse_house_column(cls, builder: CharacterBuilder, column: bs4.Tag):
247248
m = house_regexp.search(house_text)
248249
paid_until = m.group(1)
249250
paid_until_date = parse_tibia_date(paid_until)
250-
house_link_tag = column.find("a")
251+
house_link_tag = column.select_one("a")
251252
house_link = parse_link_info(house_link_tag)
252253
builder.add_house(
253254
CharacterHouse(
@@ -264,7 +265,7 @@ def _parse_guild_column(cls, builder: CharacterBuilder, column: bs4.Tag):
264265
guild_link = column.select_one("a")
265266
value = clean_text(column)
266267
rank = value.split("of the")[0]
267-
builder.guild_membership(GuildMembership(name=guild_link.text.replace("\xa0", " "), rank=rank.strip()))
268+
builder.guild_membership(GuildMembership(name=clean_text(guild_link), rank=rank.strip()))
268269

269270
@classmethod
270271
def _parse_deaths(cls, builder: CharacterBuilder, rows):
@@ -326,7 +327,7 @@ def _parse_killer(cls, killer):
326327
traded = False
327328
summon = None
328329
if traded_label in killer:
329-
name = killer.replace("\xa0", " ").replace(traded_label, "").strip()
330+
name = clean_text(killer).replace(traded_label, "").strip()
330331
traded = True
331332
player = True
332333

@@ -359,7 +360,7 @@ def _parse_other_characters(cls, rows):
359360
continue
360361

361362
name, world, status, *__ = cols
362-
_, *name = name.replace("\xa0", " ").split(" ")
363+
_, *name = clean_text(name).split(" ")
363364
name = " ".join(name)
364365
traded = False
365366
if traded_label in name:
@@ -408,6 +409,6 @@ def _parse_tables(cls, parsed_content):
408409
title = table.select_one("td").text.strip()
409410
offset = 1
410411

411-
output[title] = table.select("tr")[offset:]
412+
output[title] = get_rows(table)[offset:]
412413

413414
return output

tibiapy/parsers/creature.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class BoostedCreaturesParser:
3434

3535
@classmethod
3636
def _parse_boosted_platform(cls, parsed_content: bs4.BeautifulSoup, tag_id: str):
37-
img = parsed_content.find("img", attrs={"id": tag_id})
37+
img = parsed_content.select_one(f"#{tag_id}")
3838
name = BOOSTED_ALT.sub("", img["title"]).strip()
3939
image_url = img["src"]
4040
identifier = image_url.split("/")[-1].replace(".gif", "")
@@ -94,13 +94,13 @@ def from_content(cls, content: str) -> BoostableBosses:
9494
"""
9595
try:
9696
parsed_content = parse_tibiacom_content(content)
97-
boosted_creature_table = parsed_content.find("div", {"class": "TableContainer"})
98-
boosted_creature_text = boosted_creature_table.find("div", {"class": "Text"})
97+
boosted_creature_table = parsed_content.select_one("div.TableContainer")
98+
boosted_creature_text = boosted_creature_table.select_one("div.Text")
9999
if not boosted_creature_text or "Boosted" not in boosted_creature_text.text:
100100
raise InvalidContent("content is not from the boostable bosses section.")
101101

102-
boosted_boss_tag = boosted_creature_table.find("b")
103-
boosted_boss_image = boosted_creature_table.find("img")
102+
boosted_boss_tag = boosted_creature_table.select_one("b")
103+
boosted_boss_image = boosted_creature_table.select_one("img")
104104
image_url = urllib.parse.urlparse(boosted_boss_image["src"])
105105
boosted_boss = BossEntry(name=boosted_boss_tag.text,
106106
identifier=os.path.basename(image_url.path).replace(".gif", ""))
@@ -110,7 +110,7 @@ def from_content(cls, content: str) -> BoostableBosses:
110110
entries = []
111111
for entry_container in entries_container:
112112
name = entry_container.text.strip()
113-
image = entry_container.find("img")
113+
image = entry_container.select_one("img")
114114
image_url = urllib.parse.urlparse(image["src"])
115115
identifier = os.path.basename(image_url.path).replace(".gif", "")
116116
entries.append(BossEntry(name=name, identifier=identifier))
@@ -188,7 +188,7 @@ def from_content(cls, content: str) -> CreaturesSection:
188188
if not boosted_creature_text or "Boosted" not in boosted_creature_text.text:
189189
raise InvalidContent("content is not from the creatures section.")
190190

191-
boosted_creature_link = boosted_creature_table.find("a")
191+
boosted_creature_link = boosted_creature_table.select_one("a")
192192
url = urllib.parse.urlparse(boosted_creature_link["href"])
193193
query = urllib.parse.parse_qs(url.query)
194194
boosted_creature = CreatureEntry(name=boosted_creature_link.text, identifier=query["race"][0])
@@ -231,17 +231,17 @@ def from_content(cls, content: str) -> Optional[Creature]:
231231
pagination_container, content_container = (
232232
parsed_content.find_all("div", style=lambda v: v and "position: relative" in v)
233233
)
234-
title_container, description_container = content_container.find_all("div")
235-
title = title_container.find("h2")
234+
title_container, description_container = content_container.select("div")
235+
title = title_container.select_one("h2")
236236
name = title.text.strip()
237237

238-
img = title_container.find("img")
238+
img = title_container.select_one("img")
239239
img_url = img["src"]
240240
race = img_url.split("/")[-1].replace(".gif", "")
241241
builder = CreatureBuilder().name(name).identifier(race)
242242

243243
convert_line_breaks(description_container)
244-
paragraph_tags = description_container.find_all("p")
244+
paragraph_tags = description_container.select("p")
245245
paragraphs = [p.text for p in paragraph_tags]
246246
builder.description("\n".join(paragraphs[:-2]).strip())
247247
hp_text = paragraphs[-2]

tibiapy/parsers/forum.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from tibiapy.models import (AnnouncementEntry, BoardEntry, CMPost, CMPostArchive, ForumAnnouncement, ForumAuthor,
1313
ForumBoard, ForumEmoticon, ForumPost, ForumSection, ForumThread, GuildMembership, LastPost,
1414
ThreadEntry)
15-
from tibiapy.utils import (convert_line_breaks, parse_form_data, parse_integer, parse_link_info, parse_pagination,
15+
from tibiapy.utils import (clean_text, convert_line_breaks, get_rows, parse_form_data, parse_integer, parse_link_info,
16+
parse_pagination,
1617
parse_tables_map, parse_tibia_datetime, parse_tibia_forum_datetime, parse_tibiacom_content,
1718
split_list, try_enum)
1819

@@ -83,16 +84,16 @@ def from_content(cls, content: str) -> CMPostArchive:
8384

8485
inner_table_container = table.select_one("div.InnerTableContainer")
8586
inner_table = inner_table_container.select_one("table")
86-
inner_table_rows = inner_table.select("tr")
87+
inner_table_rows = get_rows(inner_table)
8788
inner_table_rows = [e for e in inner_table_rows if e.parent == inner_table]
8889
table_content = inner_table_container.select_one("table.TableContent")
8990

90-
header_row, *rows = table_content.select("tr")
91+
header_row, *rows = get_rows(table_content)
9192

9293
for row in rows:
9394
columns = row.select("td")
9495
date_column = columns[0]
95-
date = parse_tibia_datetime(date_column.text.replace("\xa0", " "))
96+
date = parse_tibia_datetime(clean_text(date_column))
9697
board_thread_column = columns[1]
9798
convert_line_breaks(board_thread_column)
9899
board, thread = board_thread_column.text.splitlines()
@@ -414,7 +415,7 @@ def from_content(cls, content: str) -> Optional[ForumBoard]:
414415
builder.current_page(pages)
415416
builder.total_pages(total)
416417

417-
*thread_rows, times_row = tables[-1].select("tr")
418+
*thread_rows, times_row = get_rows(tables[-1])
418419
for thread_row in thread_rows[1:]:
419420
columns = thread_row.select("td")
420421
entry = cls._parse_thread_row(columns)
@@ -424,7 +425,7 @@ def from_content(cls, content: str) -> Optional[ForumBoard]:
424425
builder.add_entry(entry)
425426

426427
if len(tables) > 1:
427-
announcement_rows = tables[0].select("tr")
428+
announcement_rows = get_rows(tables[0])
428429
for announcement_row in announcement_rows[1:]:
429430
author_link, title_link = announcement_row.select("a")
430431
author = author_link.text.strip()
@@ -724,13 +725,13 @@ def _parse_column(cls, last_post_column, offset=1):
724725
permalink_tag = last_post_info.select_one("a")
725726
permalink_info = parse_link_info(permalink_tag)
726727
post_id = int(permalink_info["query"]["postid"])
727-
date_text = last_post_info.text.replace("\xa0", " ").strip()
728+
date_text = clean_text(last_post_info)
728729
last_post_date = parse_tibia_forum_datetime(date_text, offset)
729730

730731
last_post_author_tag = last_post_column.select_one("font")
731732
author_link = last_post_author_tag.select_one("a")
732733
deleted = author_link is None
733-
author = last_post_author_tag.text.replace("by", "", 1).replace("\xa0", " ").strip()
734+
author = clean_text(last_post_author_tag).replace("by", "", 1)
734735
traded = False
735736
if "(traded)" in author:
736737
author = author.replace("(traded)", "").strip()

0 commit comments

Comments
 (0)