usc: nautica to maps.db to seeds.jsonseeds-nautica

author: Pinapelz <yukais@pinapelz.com> 2025-11-11 03:25:29 -0800
committer: Pinapelz <yukais@pinapelz.com> 2025-11-11 03:25:29 -0800
commit: ea4415b705c41c9a43c423b89b4fc4083229df5e (patch)
tree: fc8f26e7cec65a1f919f36f1e3e9c38ae325ea05
parent: 2cc32a69293c56cbc039ad8830e4d5b6ebedf029 (diff)
4 files changed, 279 insertions, 6 deletions
diff --git a/usc/seeds/nautica/README.md b/usc/seeds/nautica/README.md
index e69de29..721b5a9 100644
--- a/usc/seeds/nautica/README.md
+++ b/usc/seeds/nautica/README.md
@@ -0,0 +1,31 @@
+# Nautica (ksm.dev) to maps.db
+This is a "seed" generation helper for Tachi. It generates the `maps.db` file which can be used to generate a new seed file. The instructions here expect that you are already somewhat understand how Tachi works (since you're persumably running your own instance)
+
+i.e https://github.com/zkrising/Tachi/blob/main/seeds/scripts/rerunners/usc/add-usc-converts.js
+
+This pulls **ALL** charts from Nautica and produces the minimal requirements for seed generation in Tachi. Please run responsibly since this requires downloading all charts and producing a `SHA-1` hash for them.
+
+
+```bash
+python nautica_to_maps.py --start-page <START_PAGE> --db <PATH_TO_DB>
+```
+- START_PAGE default = 1
+- PATH_TO_DB default = maps.db
+
+
+As an alternative, I provide the one I have generated on Nov.10 2025 (`maps-nov-10-2025.db`)
+
+# Merging into Tachi
+Firstly, move the generated `maps.db` into Tachi's environment
+
+Tachi relies on fuzzy matching title and artist which doesn't work great for nautica since there are many people who re-chart songs. In these cases they will have the same name.
+
+To work around this, a custom script can be used to finally convert `maps.db` into seeds. This code is largely based off of the usc-converts re-runner.
+
+If there is a collision in `songs-usc.json` where the name and artist of a song already exists, the string `(<EFFECTOR_NAME> Nautica Edition)` is appended to the title.
+
+To run this script, place it in `seeds/scripts/single-use/usc` (you need to create the usc folder if it doesn't already exist in single-use). Then within Tachi's environment run `node add-usc-nautica.js -f /nautica -d maps.db`.
+
+Then just load the seeds and you're good to go!
+
+*It may still TECHNICALLY be possible to have collisions, but the chances of 2 effectors with the same name who create charts for the same song are unlikely. In those cases you should manually resolve the difference after running the script.*
diff --git a/usc/seeds/nautica/add-usc-nautica.js b/usc/seeds/nautica/add-usc-nautica.js
new file mode 100644
index 0000000..d1a359e
--- /dev/null
+++ b/usc/seeds/nautica/add-usc-nautica.js
@@ -0,0 +1,143 @@
+const { Command } = require("commander");
+const sqlite3 = require("better-sqlite3");
+const fs = require("fs");
+const {
+	CreateChartID,
+	ReadCollection,
+	WriteCollection,
+	GetFreshSongIDGenerator,
+} = require("../../util");
+
+const program = new Command();
+program.requiredOption("-d, --db <maps.db>");
+program.requiredOption("-f, --filter <path_filter>");
+program.option("--debug", "Show debug logs", false);
+program.parse(process.argv);
+const options = program.opts();
+
+const DEBUG = options.debug;
+const db = sqlite3(options.db);
+const dbRows = db.prepare(`SELECT * FROM Charts WHERE path LIKE '%${options.filter}%'`).all();
+console.log(`Found ${dbRows.length} charts.`);
+
+const songs = ReadCollection("songs-usc.json");
+const charts = ReadCollection("charts-usc.json");
+const folderIdToSongId = {};
+
+let newSongs = 0;
+let newCharts = 0;
+
+const getFreshSongID = GetFreshSongIDGenerator("usc");
+const log = DEBUG ? console.log : () => undefined;
+
+function makeUscEditionName(title, artist) {
+	return `${title} (${artist} Nautica Edition)`;
+}
+
+for (const chart of dbRows) {
+	// 1️⃣ Skip if chart already exists (same hash)
+	const existingChart = charts.find((c) => c.data.hashSHA1 === chart.hash);
+	if (existingChart) {
+		log(
+			`Chart ${chart.title} ${existingChart.difficulty} already exists (hash match, internalId=${chart.internalId}).`
+		);
+		continue;
+	}
+
+	// 2️⃣ Determine song
+	if (!folderIdToSongId[chart.folderid]) {
+		const splitPath = chart.path.replaceAll("\\", "/").split("/");
+		const folderName = splitPath[splitPath.length - 2];
+
+		const matchingSongs = songs.filter(
+			(s) => s.title === chart.title && s.artist === chart.artist
+		);
+
+		let songID;
+		if (matchingSongs.length === 0) {
+			// normal song
+			songID = getFreshSongID();
+			songs.push({
+				id: songID,
+				title: chart.title,
+				altTitles: [],
+				artist: chart.artist,
+				data: {},
+				searchTerms: [folderName],
+			});
+			newSongs++;
+			log(`Added song ${songID} for folder ${folderName}.`);
+		} else {
+			// duplicate name/artist → make USC Edition
+			const effectorName = chart.effector ? chart.effector : "Unknown Effector";
+			const uscTitle = makeUscEditionName(chart.title, effectorName);
+
+			const existingUsc = songs.find((s) => s.title === uscTitle && s.artist === chart.artist);
+			if (existingUsc) {
+				songID = existingUsc.id;
+				log(`Found existing USC edition song for ${chart.title}.`);
+			} else {
+				songID = getFreshSongID();
+				songs.push({
+					id: songID,
+					title: uscTitle,
+					altTitles: [chart.title],
+					artist: chart.artist,
+					data: {},
+					searchTerms: [folderName],
+				});
+				newSongs++;
+				log(`Duplicate name detected → created USC edition song ${uscTitle} (ID ${songID}).`);
+			}
+		}
+
+		folderIdToSongId[chart.folderid] = songID;
+	}
+
+	const songID = folderIdToSongId[chart.folderid];
+
+	// 3️⃣ Add charts, skip duplicates
+	for (const playtype of ["Controller", "Keyboard"]) {
+		const duplicate = charts.find(
+			(c) =>
+				c.songID === songID &&
+				c.playtype === playtype &&
+				c.difficulty === chart.diff_shortname &&
+				c.isPrimary === true
+		);
+
+		if (duplicate) {
+			log(
+				`Skipped duplicate chart: ${chart.title} ${chart.diff_shortname} (${playtype}, songID=${songID}, internalId=${chart.internalId}).`
+			);
+			continue;
+		}
+
+		charts.push({
+			chartID: CreateChartID(),
+			data: {
+				effector: chart.effector,
+				hashSHA1: chart.hash,
+				isOfficial: false,
+				tableFolders: [],
+			},
+			difficulty: chart.diff_shortname,
+			isPrimary: true,
+			level: chart.level.toString(),
+			levelNum: 0,
+			playtype,
+			songID,
+			versions: [],
+		});
+
+		newCharts++;
+		log(
+			`Added chart ${chart.title} ${chart.diff_shortname} (${playtype}, internalId=${chart.internalId}).`
+		);
+	}
+}
+
+console.log(`Added ${newSongs} new songs and ${newCharts} new charts.`);
+
+WriteCollection("songs-usc.json", songs);
+WriteCollection("charts-usc.json", charts);
diff --git a/usc/seeds/nautica/maps-nov-10-2025.db b/usc/seeds/nautica/maps-nov-10-2025.db
new file mode 100644
index 0000000..99693b1
--- /dev/null
+++ b/usc/seeds/nautica/maps-nov-10-2025.db
diff --git a/usc/seeds/nautica/nautica_to_maps.py b/usc/seeds/nautica/nautica_to_maps.py
index f62c144..0a727e5 100644
--- a/usc/seeds/nautica/nautica_to_maps.py
+++ b/usc/seeds/nautica/nautica_to_maps.py
@@ -13,6 +13,10 @@ import argparse
 import requests
 import json
 import sqlite3
+import zipfile
+from pathlib import Path
+import hashlib
+import shutil
 import os
 
 headers = {
@@ -38,6 +42,14 @@ DIFF_NAME_MAP = {
     3: "EXH",
     4: "INF"
 }
+
+USC_DIFFICULTY_MAP = {
+    "light": "NOV",
+    "challenge": "ADV",
+    "extended": "EXH",
+    "infinite": "INF"
+}
+
 def create_maps_db_if_not_exists(filepath: str):
     if os.path.exists(filepath):
         print("[DB] Maps DB already exists, skipping creation")
@@ -73,10 +85,79 @@ def chart_already_processed(db_path: str, internal_id: str):
     conn.close()
     return result is not None
 
-def download_and_generate_charts():
-    pass
+def find_all_ksh_files(path):
+    target_dir = Path(path)
+    ksh_files = [f for f in target_dir.rglob("*.ksh") if f.is_file() and "__MACOSX" not in f.parts]
+    return ksh_files
+
+
+def get_ksh_difficulty(path):
+    try:
+        with open(path, "r") as f:
+            for line in f:
+                line = line.strip()
+                if line.startswith("difficulty="):
+                    return line.split("=")[1]
+            return None
+    except:
+        return None
+
+def compute_sha1(path: Path) -> str:
+    sha1 = hashlib.sha1()
+    with path.open("rb") as f:
+        while chunk := f.read(0x80):
+            sha1.update(chunk)
+    digest = sha1.digest()
+    parts = [int.from_bytes(digest[i:i+4], "big") for i in range(0, 20, 4)]
+    return "".join(f"{x:08x}" for x in parts)
+
+def download_and_generate_chart_hash(download_url: str):
+    chart_file_name = download_url.split("/")[-1]
+    try:
+        print(f"[DOWNLOAD] Downloading Chart Data {chart_file_name}")
+        response = requests.get(download_url, stream=True)
+        response.raise_for_status()
+        with open("chart.zip", "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print("[DOWNLOAD] Downloading ")
+    except Exception as e:
+        print(f"[DOWNLOAD] ERROR! Failed to download {chart_file_name}. EXCEPTION: {e}")
+        return None
+    os.makedirs("working", exist_ok=True)
+    try:
+        with zipfile.ZipFile("chart.zip", 'r') as zip_ref:
+            zip_ref.extractall("working")
+    except Exception:
+        print("[ERROR] Unable to extract, bad ZIP file? Skipping...")
+        return None
+    print("[EXTRACT] Successfully extracted chart data")
+    ksh_files = find_all_ksh_files("working")
+    processed_charts = {}
+    for chart_path in ksh_files:
+        print(f"[HASH] Now generating hash for {chart_path}")
+        ksh_difficulty = get_ksh_difficulty(chart_path)
+        if not ksh_difficulty:
+            print("[ERROR] No difficulty found in KSM chart. This may be an invalid chart")
+            continue
+        usc_diff_name = USC_DIFFICULTY_MAP[ksh_difficulty]
+        sha1_hash = compute_sha1(chart_path)
+        processed_charts[usc_diff_name] = sha1_hash
+    return processed_charts
+
+def create_row_db(data):
+    conn = sqlite3.connect(db_path)
+    cursor = conn.cursor()
+    cursor.execute("INSERT INTO Charts (internalId, hash, title, artist, effector, level, diff_shortname, path, folderid) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", (
+        data["internalId"], data["hash"], data["title"], data["artist"], data["effector"], data["level"], data["diff_shortname"], data["path"], data["folderid"]
+    ))
+    result = cursor.fetchone()
+    conn.commit()
+    print("[DB] Wrote row to DB")
+    conn.close()
+    return result is not None
 
-def get_charts_from_page(page_num: int, db_path: str):
+def process_chart_page(page_num: int, db_path: str):
     charts = []
     response = requests.get(NAUTICA_URL + "?page="+str(page_num), headers=headers)
     resp_page_obj = json.loads(response.text)
@@ -85,10 +166,19 @@ def get_charts_from_page(page_num: int, db_path: str):
         title = entry["title"]
         artist = entry["artist"]
         download_url = entry["cdn_download_url"]
+        if os.path.exists("working"):
+            shutil.rmtree("working")
+            print("[CLEANUP] Removed existing working directory")
+        if os.path.exists("chart.zip"):
+            os.remove("chart.zip")
+            print("[CLEANUP] Removed existing chart.zip")
+        hash_data = download_and_generate_chart_hash(download_url)
+        if hash_data is None:
+            continue
         for chart in entry["charts"]:
             difficulty = int(chart["difficulty"])
             level = chart["level"]
-            if chart_already_processed(chart["id"]):
+            if chart_already_processed(db_path, chart["id"]):
                 print(f"[SKIP] {title} - {difficulty} already exists. Skipping...")
                 continue
             effector = chart["effector"]
@@ -96,11 +186,12 @@ def get_charts_from_page(page_num: int, db_path: str):
             charts.append({
                 "internalId": chart["id"],
                 "title": title,
+                "hash": hash_data[diff_shortname],
                 "artist": artist,
                 "effector": effector,
                 "level": level,
                 "diff_shortname": diff_shortname,
-                "path": "/charts/blahblah",
+                "path": f"/nautica/chart.ksh",
                 "folderid": folder_id
             })
         folder_id += 1
@@ -113,9 +204,17 @@ if __name__ == "__main__":
         description="Converts ALL charts on Nautica (ksm.dev) to a USC maps.db file",
     )
     parser.add_argument("--db", help="Path to existing maps.db if none-specified this script will search in current working dir or create a new one")
+    parser.add_argument("--start-page", help="Start from this page on ksm.dev", default=1)
     args = parser.parse_args()
     db_path = args.db
     if not db_path:
         db_path = "maps.db"
     create_maps_db_if_not_exists(db_path)
-    print(get_charts_from_page(1, db_path))
+    num_pages = get_nautica_num_pages()
+    start_page = int(args.start_page)
+    print(f"Found {num_pages} to process...")
+    for i in range(start_page, num_pages + 1):
+        print(f"[PROGRESS] {i}/{num_pages + 1} COMPLETED")
+        charts = process_chart_page(i, db_path)
+        for chart in charts:
+            create_row_db(chart)
author	Pinapelz <yukais@pinapelz.com>	2025-11-11 03:25:29 -0800
committer	Pinapelz <yukais@pinapelz.com>	2025-11-11 03:25:29 -0800
commit	ea4415b705c41c9a43c423b89b4fc4083229df5e (patch)
tree	fc8f26e7cec65a1f919f36f1e3e9c38ae325ea05
parent	2cc32a69293c56cbc039ad8830e4d5b6ebedf029 (diff)