Khinsider to Webdav

author: Pinapelz <donaldshan1@outlook.com> 2026-04-05 14:22:45 -0700
committer: Pinapelz <donaldshan1@outlook.com> 2026-04-05 14:22:45 -0700
commit: 8faf54c678ec1fb15dc6ac696a814384e0746335 (patch)
tree: 704bbd53ad1ba30a60e27f4f12fd87e531cfe267 /Khinsider_To_WebDAV.ipynb
parent: 68a99b3607c614bad511fbf8aa25e3d0bb8c6dc4 (diff)
1 files changed, 501 insertions, 0 deletions
diff --git a/Khinsider_To_WebDAV.ipynb b/Khinsider_To_WebDAV.ipynb
new file mode 100644
index 0000000..5755dcb
--- /dev/null
+++ b/Khinsider_To_WebDAV.ipynb
@@ -0,0 +1,501 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "authorship_tag": "ABX9TyOwX80jyPXCpuCbpRs2azko",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/pinapelz/nc-media-tools/blob/main/Khinsider_To_WebDAV.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Khinsider to WebDAV\n",
+        "Download album from Khinsider and upload to WebDAV"
+      ],
+      "metadata": {
+        "id": "gKrjW5sGQcQT"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Khinsider\n",
+        "Uses obskry's khinsider.py to download video game music. Module below has been modified to run in a notebook environment.\n",
+        "\n",
+        "https://github.com/obskyr/khinsider/blob/master/khinsider.py"
+      ],
+      "metadata": {
+        "id": "jI0Dpv6-Q6fT"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from __future__ import print_function, unicode_literals\n",
+        "\n",
+        "import os\n",
+        "import re\n",
+        "import sys\n",
+        "from functools import wraps\n",
+        "from itertools import chain\n",
+        "\n",
+        "# Install required libraries\n",
+        "!pip install requests beautifulsoup4\n",
+        "\n",
+        "try:\n",
+        "    from urllib.parse import unquote, urljoin, urlsplit\n",
+        "except ImportError: # Python 2\n",
+        "    from urlparse import unquote, urljoin, urlsplit\n",
+        "\n",
+        "from os import getcwd\n",
+        "\n",
+        "import requests\n",
+        "from bs4 import BeautifulSoup\n",
+        "\n",
+        "BASE_URL = 'https://downloads.khinsider.com/'\n",
+        "\n",
+        "# Suppress printing\n",
+        "class Silence(object):\n",
+        "    def __enter__(self):\n",
+        "        self._stdout = sys.stdout\n",
+        "        self._stderr = sys.stderr\n",
+        "        sys.stdout = open(os.devnull, 'w')\n",
+        "        sys.stderr = open(os.devnull, 'w')\n",
+        "\n",
+        "    def __exit__(self, *_):\n",
+        "        sys.stdout = self._stdout\n",
+        "        sys.stderr = self._stderr\n",
+        "\n",
+        "\n",
+        "# --- Helper functions ---\n",
+        "\n",
+        "FILENAME_INVALID_RE = re.compile(r'[<>:\"/\\\\|?*]')\n",
+        "def to_valid_filename(s):\n",
+        "    s = s.rstrip(' .')\n",
+        "\n",
+        "    if s in {'', '.', '..', '~', 'CON', 'PRN', 'AUX', 'NUL', 'COM1', 'COM2',\n",
+        "             'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9', 'LPT1',\n",
+        "             'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'}:\n",
+        "        return s + '_'\n",
+        "\n",
+        "    return FILENAME_INVALID_RE.sub('-', s)\n",
+        "\n",
+        "def unicodePrint(*args, **kwargs):\n",
+        "    unicodeType = str if sys.version_info[0] > 2 else unicode\n",
+        "    encoding = sys.stdout.encoding or 'utf-8'\n",
+        "    args = [\n",
+        "        arg.encode(encoding, 'replace').decode(encoding)\n",
+        "        if isinstance(arg, unicodeType) else arg\n",
+        "        for arg in args\n",
+        "    ]\n",
+        "    print(*args, **kwargs)\n",
+        "\n",
+        "def lazyProperty(func):\n",
+        "    attrName = '_lazy_' + func.__name__\n",
+        "    @property\n",
+        "    @wraps(func)\n",
+        "    def lazyVersion(self):\n",
+        "        if not hasattr(self, attrName):\n",
+        "            setattr(self, attrName, func(self))\n",
+        "        return getattr(self, attrName)\n",
+        "    return lazyVersion\n",
+        "\n",
+        "def getSoup(*args, **kwargs):\n",
+        "    r = requests.get(*args, **kwargs)\n",
+        "    return toSoup(r)\n",
+        "\n",
+        "REMOVE_RE = re.compile(br\"^</td>\\s*$\", re.MULTILINE)\n",
+        "BAD_AMPERSAND_RE = re.compile(br\"&#([^0-9x]|x[^0-9A-Fa-f])\")\n",
+        "def toSoup(r):\n",
+        "    content = r.content\n",
+        "    # Fix errors in khinsider's HTML.\n",
+        "    content = REMOVE_RE.sub(b'', content)\n",
+        "    content = BAD_AMPERSAND_RE.sub(b'&amp;#\\1', content)\n",
+        "\n",
+        "    with Silence():\n",
+        "        return BeautifulSoup(content, 'html.parser')\n",
+        "\n",
+        "def getAppropriateFile(song, formatOrder):\n",
+        "    if formatOrder is None:\n",
+        "        return song.files[0]\n",
+        "\n",
+        "    for extension in formatOrder:\n",
+        "        for file in song.files:\n",
+        "            if os.path.splitext(file.filename)[1][1:].lower() == extension:\n",
+        "                return file\n",
+        "\n",
+        "    return song.files[0]\n",
+        "\n",
+        "def friendlyDownloadFile(file, path, index, total, verbose=False):\n",
+        "    numberStr = \"{}/{}\".format(\n",
+        "        str(index).zfill(len(str(total))),\n",
+        "        str(total)\n",
+        "    )\n",
+        "\n",
+        "    if file is None and verbose:\n",
+        "        print(\"Song {} is nonexistent (404: Not Found). Skipping over.\".format(numberStr), file=sys.stderr)\n",
+        "        return False\n",
+        "\n",
+        "    encoding = 'utf-8'\n",
+        "    filename = file.filename.encode(encoding, 'replace').decode(encoding)\n",
+        "\n",
+        "    byTheWay = \"\"\n",
+        "    if filename != file.filename:\n",
+        "        byTheWay = \" (replaced characters not in the filesystem's \\\"{}\\\" encoding)\".format(encoding)\n",
+        "\n",
+        "    filename = to_valid_filename(filename)\n",
+        "    path = os.path.join(path, filename)\n",
+        "\n",
+        "    if not os.path.exists(path):\n",
+        "        if verbose:\n",
+        "            unicodePrint(\"Downloading {}: {}{}...\".format(numberStr, filename, byTheWay))\n",
+        "        for triesElapsed in range(3):\n",
+        "            if verbose and triesElapsed:\n",
+        "                unicodePrint(\"Couldn't download {}. Trying again...\".format(filename), file=sys.stderr)\n",
+        "            try:\n",
+        "                file.download(path)\n",
+        "            except (requests.ConnectionError, requests.Timeout):\n",
+        "                pass\n",
+        "            else:\n",
+        "                break\n",
+        "        else:\n",
+        "            if verbose:\n",
+        "                unicodePrint(\"Couldn't download {}. Skipping over.\".format(filename), file=sys.stderr)\n",
+        "            return False\n",
+        "    else:\n",
+        "        if verbose:\n",
+        "            unicodePrint(\"Skipping over {}: {}{}. Already exists.\".format(numberStr, filename, byTheWay))\n",
+        "\n",
+        "    return True\n",
+        "\n",
+        "class KhinsiderError(Exception):\n",
+        "    pass\n",
+        "\n",
+        "class NonexistentSongError(KhinsiderError):\n",
+        "    pass\n",
+        "\n",
+        "class SoundtrackError(Exception):\n",
+        "    def __init__(self, soundtrack):\n",
+        "        self.soundtrack = soundtrack\n",
+        "\n",
+        "class NonexistentSoundtrackError(SoundtrackError, ValueError):\n",
+        "    def __str__(self):\n",
+        "        ost = '\"{}\" '.format(self.soundtrack.id) if len(self.soundtrack.id) <= 80 else \"\"\n",
+        "        s = \"The soundtrack {}does not exist.\".format(ost)\n",
+        "        return s\n",
+        "\n",
+        "class NonexistentFormatsError(SoundtrackError, ValueError):\n",
+        "    def __init__(self, soundtrack, requestedFormats):\n",
+        "        super(NonexistentFormatsError, self).__init__(soundtrack)\n",
+        "        self.requestedFormats = requestedFormats\n",
+        "    def __str__(self):\n",
+        "        ost = '\"{}\" '.format(self.soundtrack.id) if len(self.soundtrack.id) <= 80 else \"\"\n",
+        "        s = \"The soundtrack {}is not available in the requested formats ({}).\".format(\n",
+        "            ost,\n",
+        "            \", \".join('\"{}\"'.format(extension) for extension in self.requestedFormats))\n",
+        "        return s\n",
+        "\n",
+        "class Soundtrack(object):\n",
+        "    \"\"\"A KHInsider soundtrack. Initialize with a soundtrack ID.\n",
+        "\n",
+        "    Properties:\n",
+        "    * id:     The soundtrack's unique ID, used at the end of its URL.\n",
+        "    * url:    The full URL of the soundtrack.\n",
+        "    * name:   The textual title of the soundtrack.\n",
+        "    * availableFormats: A list of the formats the soundtrack is available in.\n",
+        "    * songs:  A list of Song objects representing the songs in the soundtrack.\n",
+        "    * images: A list of File objects representing the images in the soundtrack.\n",
+        "    \"\"\"\n",
+        "\n",
+        "    def __init__(self, soundtrackId):\n",
+        "        self.id = soundtrackId\n",
+        "        self.url = urljoin(BASE_URL, 'game-soundtracks/album/' + self.id)\n",
+        "\n",
+        "    def __repr__(self):\n",
+        "        return \"<{}: {}>\".format(self.__class__.__name__, self.id)\n",
+        "\n",
+        "    def _isLoaded(self, property):\n",
+        "        return hasattr(self, '_lazy_' + property)\n",
+        "\n",
+        "    @lazyProperty\n",
+        "    def _contentSoup(self):\n",
+        "        soup = getSoup(self.url)\n",
+        "        contentSoup = soup.find(id='pageContent')\n",
+        "        if contentSoup.find('p').string == \"No such album\":\n",
+        "            raise NonexistentSoundtrackError(self)\n",
+        "        return contentSoup\n",
+        "\n",
+        "    @lazyProperty\n",
+        "    def name(self):\n",
+        "        return next(self._contentSoup.find('h2').stripped_strings)\n",
+        "\n",
+        "    @lazyProperty\n",
+        "    def availableFormats(self):\n",
+        "        table = self._contentSoup.find('table', id='songlist')\n",
+        "        header = table.find('tr')\n",
+        "        headings = [td.get_text(strip=True) for td in header(['th', 'td'])]\n",
+        "        formats = [s.lower() for s in headings if s not in {\"\", \"Track\", \"Song Name\", \"Download\", \"Size\"}]\n",
+        "        formats = formats or ['mp3']\n",
+        "        return formats\n",
+        "\n",
+        "    @lazyProperty\n",
+        "    def songs(self):\n",
+        "        table = self._contentSoup.find('table', id='songlist')\n",
+        "        anchors = [tr.find('a') for tr in table('tr') if not tr.find('th')]\n",
+        "        urls = [a['href'] for a in anchors]\n",
+        "        songs = [Song(urljoin(self.url, url)) for url in urls]\n",
+        "        return songs\n",
+        "\n",
+        "    @lazyProperty\n",
+        "    def images(self):\n",
+        "        table = self._contentSoup.find('table')\n",
+        "        if not table:\n",
+        "            return []\n",
+        "        anchors = [a for a in table('a') if a.find('img')]\n",
+        "        urls = [a['href'] for a in anchors]\n",
+        "        images = [File(urljoin(self.url, url)) for url in urls]\n",
+        "        return images\n",
+        "\n",
+        "    def download(self, path='', makeDirs=True, formatOrder=None, verbose=False):\n",
+        "        path = os.path.join(getcwd(), path)\n",
+        "        path = os.path.abspath(os.path.realpath(path))\n",
+        "        if formatOrder:\n",
+        "            formatOrder = [extension.lower() for extension in formatOrder]\n",
+        "            if not set(self.availableFormats) & set(formatOrder):\n",
+        "                raise NonexistentFormatsError(self, formatOrder)\n",
+        "\n",
+        "        if verbose and not self._isLoaded('songs'):\n",
+        "            print(\"Getting song list...\")\n",
+        "        files = []\n",
+        "        for song in self.songs:\n",
+        "            try:\n",
+        "                files.append(getAppropriateFile(song, formatOrder))\n",
+        "            except NonexistentSongError:\n",
+        "                files.append(None)\n",
+        "        files.extend(self.images)\n",
+        "        totalFiles = len(files)\n",
+        "\n",
+        "        if makeDirs and not os.path.isdir(path):\n",
+        "            os.makedirs(os.path.abspath(os.path.realpath(path)))\n",
+        "\n",
+        "        success = True\n",
+        "        for fileNumber, file in enumerate(files, 1):\n",
+        "            if not friendlyDownloadFile(file, path, fileNumber, totalFiles, verbose):\n",
+        "                success = False\n",
+        "\n",
+        "        return success\n",
+        "\n",
+        "class Song(object):\n",
+        "    \"\"\"A song on KHInsider.\n",
+        "\n",
+        "    Properties:\n",
+        "    * url:   The full URL of the song page.\n",
+        "    * name:  The name of the song.\n",
+        "    * files: A list of the song's files - there may be several if the song\n",
+        "             is available in more than one format.\n",
+        "    \"\"\"\n",
+        "\n",
+        "    def __init__(self, url):\n",
+        "        self.url = url\n",
+        "\n",
+        "    def __repr__(self):\n",
+        "        return \"<{}: {}>\".format(self.__class__.__name__, self.url)\n",
+        "\n",
+        "    @lazyProperty\n",
+        "    def _soup(self):\n",
+        "        r = requests.get(self.url, timeout=10)\n",
+        "        if r.url.rsplit('/', 1)[-1] == '404':\n",
+        "            raise NonexistentSongError(\"Nonexistent song page (404).\")\n",
+        "        return getSoup(self.url)\n",
+        "\n",
+        "    @lazyProperty\n",
+        "    def name(self):\n",
+        "        return self._soup('p')[2]('b')[1].get_text()\n",
+        "\n",
+        "    @lazyProperty\n",
+        "    def files(self):\n",
+        "        anchors = self._soup('a', href=re.compile(r'^https?://[^/]+/(?:soundtracks|ost)/.+$'))\n",
+        "        return [File(urljoin(self.url, a['href'])) for a in anchors]\n",
+        "\n",
+        "class File(object):\n",
+        "    \"\"\"A file belonging to a soundtrack on KHInsider.\n",
+        "\n",
+        "    Properties:\n",
+        "    * url:      The full URL of the file.\n",
+        "    * filename: The file's... filename. You got it.\n",
+        "    \"\"\"\n",
+        "\n",
+        "    def __init__(self, url):\n",
+        "        self.url = url\n",
+        "\n",
+        "        try:\n",
+        "            url = str(url)\n",
+        "        except UnicodeError:\n",
+        "            url = url.encode('utf-8')\n",
+        "        self.filename = unquote(url.rsplit(str('/'), 1)[-1])\n",
+        "        try:\n",
+        "            self.filename = self.filename.decode('utf-8')\n",
+        "        except AttributeError:\n",
+        "            pass\n",
+        "\n",
+        "    def __repr__(self):\n",
+        "        return \"<{}: {}>\".format(self.__class__.__name__, self.url)\n",
+        "\n",
+        "    def download(self, path):\n",
+        "        \"\"\"Download the file to `path`.\"\"\"\n",
+        "        response = requests.get(self.url, timeout=10)\n",
+        "        with open(path, 'wb') as outFile:\n",
+        "            outFile.write(response.content)\n",
+        "\n",
+        "def download_soundtrack(soundtrack_id, path='', make_dirs=True, format_order=None, verbose=False):\n",
+        "    \"\"\"Download the soundtrack with the ID `soundtrackId`.\n",
+        "    See Soundtrack.download for more information.\n",
+        "    \"\"\"\n",
+        "    soundtrack = Soundtrack(soundtrack_id)\n",
+        "    soundtrack.name  # Load content in advance.\n",
+        "    path = to_valid_filename(soundtrack.name) if path is None else path\n",
+        "    if verbose:\n",
+        "        print(f\"Downloading to \\\"{path}\\\".\")\n",
+        "    return soundtrack.download(path, make_dirs, format_order, verbose)\n",
+        "\n",
+        "def search_soundtrack(term):\n",
+        "    \"\"\"Return a tuple of two lists of Soundtrack objects for the search term\n",
+        "    `term`. The first tuple contains album name results, and the second song\n",
+        "    name results.\n",
+        "    \"\"\"\n",
+        "    r = requests.get(urljoin(BASE_URL, 'search'), params={'search': term})\n",
+        "    path = urlsplit(r.url).path\n",
+        "    if path.split('/', 2)[1] == 'game-soundtracks':\n",
+        "        return [Soundtrack(path.rsplit('/', 1)[-1])]\n",
+        "\n",
+        "    soup = toSoup(r)\n",
+        "\n",
+        "    tables = soup('table', class_='albumList')\n",
+        "    if not tables:\n",
+        "        raise SearchError(soup.find('p').get_text(strip=True))\n",
+        "\n",
+        "    soundtracks = [soundtracksInSearchTable(table) for table in tables]\n",
+        "    if len(soundtracks) == 1:\n",
+        "        if \"song\" in soup.find(id='pageContent').find('p').get_text():\n",
+        "            soundtracks.insert(0, [])\n",
+        "        else:\n",
+        "            soundtracks.append([])\n",
+        "\n",
+        "    return soundtracks\n",
+        "\n",
+        "def soundtracksInSearchTable(table):\n",
+        "    anchors = (tr('td')[1].find('a') for tr in table('tr')[1:])\n",
+        "    soundtrackParams = [(a['href'].split('/')[-1], a.get_text(strip=True)) for a in anchors]\n",
+        "\n",
+        "    soundtracks = []\n",
+        "    for id, name in soundtrackParams:\n",
+        "        curSoundtrack = Soundtrack(id)\n",
+        "        curSoundtrack._lazy_name = name\n",
+        "        soundtracks.append(curSoundtrack)\n",
+        "\n",
+        "    return soundtracks\n",
+        "\n",
+        "def print_search_results(search_results):\n",
+        "    pad_len = max(len(x.id) for x in chain(*search_results))\n",
+        "    result_str = \"\"\n",
+        "    has_previous_list = False\n",
+        "    for heading, soundtracks in zip((\"Album title results:\", \"Song name results:\"), search_results):\n",
+        "        if soundtracks:\n",
+        "            if has_previous_list:\n",
+        "                result_str += \"\\n\"\n",
+        "            result_str += heading + \"\\n\"\n",
+        "            for soundtrack in soundtracks:\n",
+        "                result_str += \"{} {}. {}\\n\".format(soundtrack.id, '.' * (pad_len - len(soundtrack.id)), soundtrack.name)\n",
+        "            has_previous_list = True\n",
+        "    print(result_str)\n",
+        "\n",
+        "\n",
+        "ALBUM_NAME = \"\" # @param {type: \"string\"}\n",
+        "DOWNLOAD_PATH = \"output/\"+ALBUM_NAME\n",
+        "if download_soundtrack(ALBUM_NAME, path=DOWNLOAD_PATH, format_order=['mp3'], verbose=True):\n",
+        "  print(\"Download was successful! Go ahead and proceed\")\n",
+        "else:\n",
+        "  print(\"Download was unsuccessful. Please check the logs above\")"
+      ],
+      "metadata": {
+        "id": "L2TvwtMkQaD6",
+        "cellView": "form"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Connect\n",
+        "Connect and see if there are any problems with your credentials"
+      ],
+      "metadata": {
+        "id": "9N7JL34GGHaZ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install webdavclient3\n",
+        "from webdav3.client import Client\n",
+        "\n",
+        "HOSTNAME = \"\" # @param {type: \"string\"}\n",
+        "USERNAME = \"\" # @param {type: \"string\"}\n",
+        "PASSWORD = \"\" # @param {type: \"string\"}\n",
+        "\n",
+        "options = {\n",
+        " 'webdav_hostname': HOSTNAME,\n",
+        " 'webdav_login':    USERNAME,\n",
+        " 'webdav_password': PASSWORD\n",
+        "}\n",
+        "client = Client(options)\n",
+        "client.verify = False # To not check SSL certificates (Default = True)\n",
+        "files1 = client.list()\n",
+        "files1"
+      ],
+      "metadata": {
+        "id": "aU33YKZUF-JN"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "TARGET_PATH = \"Music/Video Game OST/\" # @param {type: \"string\"}\n",
+        "import os\n",
+        "folders = [f for f in os.listdir(\"output\") if os.path.isdir(os.path.join(\"output\", f))]\n",
+        "for folder in folders:\n",
+        "  print(\"Processing \" + folder)\n",
+        "  target_path = os.path.join(TARGET_PATH, folder)\n",
+        "  client.upload_sync(target_path, os.path.join(\"output\", folder))"
+      ],
+      "metadata": {
+        "id": "hTJxy2y21HRo"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
+\ No newline at end of file
author	Pinapelz <donaldshan1@outlook.com>	2026-04-05 14:22:45 -0700
committer	Pinapelz <donaldshan1@outlook.com>	2026-04-05 14:22:45 -0700
commit	8faf54c678ec1fb15dc6ac696a814384e0746335 (patch)
tree	704bbd53ad1ba30a60e27f4f12fd87e531cfe267 /Khinsider_To_WebDAV.ipynb
parent	68a99b3607c614bad511fbf8aa25e3d0bb8c6dc4 (diff)