taiko_donder_hiroba_export.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237

import requests
from bs4 import BeautifulSoup
import json
import time
import argparse
import os


SONG_CATEGORIES = ["pops", "kids", "anime", "vocaloid", "game", "variety", "classic", "namco"]
SONG_LIST_BASE_URL = "https://taiko.namco-ch.net/taiko/en/songlist/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
}
PLAY_HISTORY_URL = "https://donderhiroba.jp/history_recent_score.php"
DIFFICULTIES = ["support", "easy", "normal", "hard", "oni", "ura_oni"]

DIFFICULTY_MAP = {
    "icon_course02_1_640.png": "EASY",
    "icon_course02_2_640.png": "NORMAL",
    "icon_course02_3_640.png": "HARD",
    "icon_course02_4_640.png": "ONI",
    "icon_course02_4_640.png": "URA_ONI"
}

CROWN_MAP = {
    "crown_02_640.png": "FULL COMBO",
    "crown_03_640.png": "CLEAR",
    "crown_04_640.png": "DONDERFUL COMBO",
}

LAMP_MAP = {
    "best_score_rank_2_640.png": "IKI 1",
    "best_score_rank_3_640.png": "IKI 2",
    "best_score_rank_4_640.png": "IKI 3",
    "best_score_rank_5_640.png": "MIYABI 1",
    "best_score_rank_6_640.png": "MIYABI 2",
    "best_score_rank_7_640.png": "MIYABI 3",
    "best_score_rank_8_640.png": "KIWAMI",
}

def load_chart_cache():
    with open("taiko_charts.json") as f:
        return dict(json.load(f))

def build_taiko_chart_metadata():
    """
    Unfortnatly Donder Hiroba doesn't store any data about the level, need to fetch this elsewhere
    """
    chart_data = {}
    for category in SONG_CATEGORIES:
        url = f"{SONG_LIST_BASE_URL}/{category}.php"
        print(f"[DATA] Getting {category} category charts")
        resp = requests.get(url, headers=headers)
        soup = BeautifulSoup(resp.text, 'html.parser')
        table = soup.find("tbody")
        if table is None:
            raise Exception("Unable to fetch chart data for ", category)
        rows = table.find_all("tr")
        for row in rows:
            cols = row.find_all("td")
            if len(cols) < 6:
                continue

            curr_song = {}
            song_metadata = row.find_all("th")
            if not song_metadata:
                continue

            title_th = song_metadata[0]
            artist_tag = title_th.find("p")
            song_artist = artist_tag.get_text(strip=True) if artist_tag else ""

            for tag in title_th.find_all(["p", "span"]):
                tag.decompose()
            song_title = title_th.get_text(strip=True)

            for i in range(len(DIFFICULTIES)):
                if DIFFICULTIES[i] == "support":
                    continue
                diff = str(cols[i].get_text())
                curr_song[DIFFICULTIES[i]] = None if diff == "-" else diff

            curr_song["artist"] = song_artist
            chart_data[song_title] = curr_song

    with open("taiko_charts.json", "w") as f:
        print("Writing charts to cache. Delete this file when new charts come out!")
        json.dump(chart_data, f)
    return chart_data

def get_play_hist(token: str, chart_data):
    """
    Fetch and parse Donder Hiroba play history page.
    Extracts scores, difficulty, ranks, and performance breakdowns.
    Handles pagination by going through all pages until duplicate results are found.
    """
    all_results = []
    page = 1
    previous_page_titles = set()

    while True:
        page_url = f"{PLAY_HISTORY_URL}?page={page}" if page > 1 else PLAY_HISTORY_URL
        print(f"[INFO] Fetching page {page}...")
        play_hist_page = requests.get(page_url, cookies={"_token_v2": token}, headers=headers)
        soup = BeautifulSoup(play_hist_page.text, "html.parser")
        scores = soup.find_all(class_="scoreUser")

        if not scores:
            print(f"[INFO] No scores found on page {page}. Ending pagination.")
            break

        current_page_titles = set()
        page_results = []

        for s in scores:
            title_tag = s.find("h2")
            title = title_tag.text.strip() if title_tag else None

            total_score_tag = s.find("div", class_="scoreScore")
            total_score = total_score_tag.text.strip().replace("点", "") if total_score_tag else None

            # Skip unknown songs
            if not title or chart_data.get(title) is None:
                print(f"[WARN] {title} is unknown in chart_data. Skipping.")
                continue

            current_page_titles.add(title)
            difficulty = crown = lamp = None
            score_element = s.find("div", class_="playDataArea", attrs={"style": True})
            img_tags = score_element.find_all("img") if score_element else []

            for img in img_tags:
                src = img["src"].split("/")[-1]
                if src in DIFFICULTY_MAP:
                    difficulty = DIFFICULTY_MAP[src]
                elif src in CROWN_MAP:
                    if src not in CROWN_MAP:
                        crown = "NOT CLEAR"
                    else:
                        crown = CROWN_MAP[src]
                elif src in LAMP_MAP:
                    lamp = LAMP_MAP[src]

            judgements = {}
            combo = pound = None

            score_data_area = s.find("div", class_="scoreDataArea")
            if score_data_area:
                score_elements = score_data_area.find_all("div", class_="playDataArea", recursive=True)
                for el in score_elements:
                    img = el.find("img", class_="score_name")
                    val_tag = el.find("div", class_="playDataScore")
                    if not img or not val_tag:
                        continue

                    src = img["src"].split("/")[-1]
                    value = val_tag.get_text(strip=True).replace("回", "")
                    if not value.isdigit():
                        continue
                    value = int(value)

                    if "score_name_good" in src:
                        judgements["good"] = value
                    elif "score_name_ok" in src:
                        judgements["ok"] = value
                    elif "score_name_ng" in src:
                        judgements["bad"] = value
                    elif "score_name_combo" in src:
                        combo = value
                    elif "score_name_pound" in src:
                        pound = value

            result_entry = {
                "title": title,
                "timestamp": 0,
                "artist": chart_data[title]["artist"],
                "difficulty": difficulty,
                "level": int(chart_data[title].get(difficulty.lower(), 0)) if difficulty else None,
                "crown_rank": crown,
                "score_rank": lamp,
                "score": int(total_score) if total_score and total_score.isdigit() else total_score,
                "judgements": judgements,
                "optional": {
                    "combo": combo,
                    "pound": pound
                }
            }
            page_results.append(result_entry)
        if page > 1 and current_page_titles.issubset(previous_page_titles):
            print(f"[INFO] Page {page} contains duplicate results. Stopping pagination.")
            break

        all_results.extend(page_results)
        print(f"[INFO] Page {page} processed: {len(page_results)} scores found")

        previous_page_titles.update(current_page_titles)
        page += 1

    print(f"[INFO] Total scores collected: {len(all_results)} across {page - 1} pages")

    return {
        "meta": {
            "game": "taiko",
            "playtype": "Single",
            "service": "Donder Hiroba Export"
        },
        "scores": all_results,
    }


if __name__ == "__main__":
    print("[ALERT!] Please first refresh your scores on Donder Hiroba so that it has the latest info. Visit: https://donderhiroba.jp/score_list.php and click on the top right\n\n")
    print("!Your token will change after doing this!")
    parser = argparse.ArgumentParser(
        prog="taiko_donder_hiroba_export.py",
        description="Exports Taiko no Tatsujin scores from Donder Hiroba into a Mirage compatible JSON",
    )
    parser.add_argument("-t", "--token", help="Donder Hiroba _token_v2. See README for instructions on how to get this!")
    args = parser.parse_args()
    if not args.token:
        args.token = input("Please enter your Donder Hiroba _token_v2: ")
    chart_data = {}
    if os.path.exists("taiko_charts.json"):
        file_time = os.path.getmtime("taiko_charts.json")
        current_time = time.time()
        if current_time - file_time > 7 * 24 * 60 * 60:
            print("Chart cache is older than 1 week, regenerating...")
            chart_data = build_taiko_chart_metadata()
        else:
            print("Using cached chart data")
            chart_data = load_chart_cache()
    else:
        print("No chart cache found, generating...")
        chart_data = build_taiko_chart_metadata()
    score_data = get_play_hist(args.token, chart_data)
    with open("mirage_donder_hiroba_export.json", "w") as f:
        json.dump(score_data, f)