aboutsummaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-11-08 21:19:53 -0800
committerPinapelz <yukais@pinapelz.com>2025-11-08 21:19:53 -0800
commit69e11de80c00a12d2c35d55ddfffa40550713442 (patch)
tree1c94cda8383f0caf3b1ec5372dd6c3944a444743 /scripts
parente967cc93ac85a07082da67211513815ec7f2eab3 (diff)
taiko: handle pagination in donder hiroba
Diffstat (limited to 'scripts')
-rw-r--r--scripts/taiko/taiko_donder_hiroba_export.py160
1 files changed, 92 insertions, 68 deletions
diff --git a/scripts/taiko/taiko_donder_hiroba_export.py b/scripts/taiko/taiko_donder_hiroba_export.py
index 9b3773b..dd32f3b 100644
--- a/scripts/taiko/taiko_donder_hiroba_export.py
+++ b/scripts/taiko/taiko_donder_hiroba_export.py
@@ -92,86 +92,108 @@ def get_play_hist(token: str, chart_data):
"""
Fetch and parse Donder Hiroba play history page.
Extracts scores, difficulty, ranks, and performance breakdowns.
+ Handles pagination by going through all pages until duplicate results are found.
"""
- play_hist_page = requests.get(PLAY_HISTORY_URL, cookies={"_token_v2": token}, headers=headers)
- soup = BeautifulSoup(play_hist_page.text, "html.parser")
+ all_results = []
+ page = 1
+ previous_page_titles = set()
- results = []
- scores = soup.find_all(class_="scoreUser")
- current_time_ms = int(time.time() * 1000)
+ while True:
+ page_url = f"{PLAY_HISTORY_URL}?page={page}" if page > 1 else PLAY_HISTORY_URL
+ print(f"[INFO] Fetching page {page}...")
+ play_hist_page = requests.get(page_url, cookies={"_token_v2": token}, headers=headers)
+ soup = BeautifulSoup(play_hist_page.text, "html.parser")
+ scores = soup.find_all(class_="scoreUser")
- for s in scores:
- title_tag = s.find("h2")
- title = title_tag.text.strip() if title_tag else None
+ if not scores:
+ print(f"[INFO] No scores found on page {page}. Ending pagination.")
+ break
- total_score_tag = s.find("div", class_="scoreScore")
- total_score = total_score_tag.text.strip().replace("点", "") if total_score_tag else None
+ current_page_titles = set()
+ page_results = []
- # Skip unknown songs
- if not title or chart_data.get(title) is None:
- print(f"[WARN] {title} is unknown in chart_data. Skipping.")
- continue
+ for s in scores:
+ title_tag = s.find("h2")
+ title = title_tag.text.strip() if title_tag else None
- # Extract difficulty, crown, and lamp (rank icons)
- difficulty = crown = lamp = None
- score_element = s.find("div", class_="playDataArea", attrs={"style": True})
- img_tags = score_element.find_all("img") if score_element else []
+ total_score_tag = s.find("div", class_="scoreScore")
+ total_score = total_score_tag.text.strip().replace("点", "") if total_score_tag else None
- for img in img_tags:
- src = img["src"].split("/")[-1]
- if src in DIFFICULTY_MAP:
- difficulty = DIFFICULTY_MAP[src]
- elif src in CROWN_MAP:
- crown = CROWN_MAP[src]
- elif src in LAMP_MAP:
- lamp = LAMP_MAP[src]
-
- # Extract detailed score data (judgements, combo, pound)
- judgements = {}
- combo = pound = None
+ # Skip unknown songs
+ if not title or chart_data.get(title) is None:
+ print(f"[WARN] {title} is unknown in chart_data. Skipping.")
+ continue
- score_data_area = s.find("div", class_="scoreDataArea")
- if score_data_area:
- score_elements = score_data_area.find_all("div", class_="playDataArea", recursive=True)
- for el in score_elements:
- img = el.find("img", class_="score_name")
- val_tag = el.find("div", class_="playDataScore")
- if not img or not val_tag:
- continue
+ current_page_titles.add(title)
+ difficulty = crown = lamp = None
+ score_element = s.find("div", class_="playDataArea", attrs={"style": True})
+ img_tags = score_element.find_all("img") if score_element else []
+ for img in img_tags:
src = img["src"].split("/")[-1]
- value = val_tag.get_text(strip=True).replace("回", "")
- if not value.isdigit():
- continue
- value = int(value)
+ if src in DIFFICULTY_MAP:
+ difficulty = DIFFICULTY_MAP[src]
+ elif src in CROWN_MAP:
+ crown = CROWN_MAP[src]
+ elif src in LAMP_MAP:
+ lamp = LAMP_MAP[src]
+
+ judgements = {}
+ combo = pound = None
- if "score_name_good" in src:
- judgements["good"] = value
- elif "score_name_ok" in src:
- judgements["ok"] = value
- elif "score_name_ng" in src:
- judgements["bad"] = value
- elif "score_name_combo" in src:
- combo = value
- elif "score_name_pound" in src:
- pound = value
+ score_data_area = s.find("div", class_="scoreDataArea")
+ if score_data_area:
+ score_elements = score_data_area.find_all("div", class_="playDataArea", recursive=True)
+ for el in score_elements:
+ img = el.find("img", class_="score_name")
+ val_tag = el.find("div", class_="playDataScore")
+ if not img or not val_tag:
+ continue
- result_entry = {
- "title": title,
- "timestamp": current_time_ms,
- "artist": chart_data[title]["artist"],
- "difficulty": difficulty,
- "level": int(chart_data[title].get(difficulty.lower(), 0)) if difficulty else None,
- "crown_rank": crown,
- "score_rank": lamp,
- "score": int(total_score) if total_score and total_score.isdigit() else total_score,
- "judgements": judgements,
- "optional": {
- "combo": combo,
- "pound": pound
+ src = img["src"].split("/")[-1]
+ value = val_tag.get_text(strip=True).replace("回", "")
+ if not value.isdigit():
+ continue
+ value = int(value)
+
+ if "score_name_good" in src:
+ judgements["good"] = value
+ elif "score_name_ok" in src:
+ judgements["ok"] = value
+ elif "score_name_ng" in src:
+ judgements["bad"] = value
+ elif "score_name_combo" in src:
+ combo = value
+ elif "score_name_pound" in src:
+ pound = value
+
+ result_entry = {
+ "title": title,
+ "timestamp": 0,
+ "artist": chart_data[title]["artist"],
+ "difficulty": difficulty,
+ "level": int(chart_data[title].get(difficulty.lower(), 0)) if difficulty else None,
+ "crown_rank": crown,
+ "score_rank": lamp,
+ "score": int(total_score) if total_score and total_score.isdigit() else total_score,
+ "judgements": judgements,
+ "optional": {
+ "combo": combo,
+ "pound": pound
+ }
}
- }
- results.append(result_entry)
+ page_results.append(result_entry)
+ if page > 1 and current_page_titles.issubset(previous_page_titles):
+ print(f"[INFO] Page {page} contains duplicate results. Stopping pagination.")
+ break
+
+ all_results.extend(page_results)
+ print(f"[INFO] Page {page} processed: {len(page_results)} scores found")
+
+ previous_page_titles.update(current_page_titles)
+ page += 1
+
+ print(f"[INFO] Total scores collected: {len(all_results)} across {page - 1} pages")
return {
"meta": {
@@ -179,11 +201,13 @@ def get_play_hist(token: str, chart_data):
"playtype": "Single",
"service": "Donder Hiroba Export"
},
- "scores": results,
+ "scores": all_results,
}
if __name__ == "__main__":
+ print("[ALERT!] Please first refresh your scores on Donder Hiroba so that it has the latest info. Visit: https://donderhiroba.jp/score_list.php and click on the top right\n\n")
+ print("!Your token will change after doing this!")
parser = argparse.ArgumentParser(
prog="taiko_donder_hiroba_export.py",
description="Exports Taiko no Tatsujin scores from Donder Hiroba into a Mirage compatible JSON",
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage