diff options
Diffstat (limited to 'sega')
| -rw-r--r-- | sega/chuni_intl.py | 31 | ||||
| -rw-r--r-- | sega/maimaidx_intl.py | 3 |
2 files changed, 33 insertions, 1 deletions
diff --git a/sega/chuni_intl.py b/sega/chuni_intl.py index a6c4b5f..ea8cd7f 100644 --- a/sega/chuni_intl.py +++ b/sega/chuni_intl.py @@ -1,6 +1,7 @@ import re from datetime import datetime, timedelta, timezone from enum import Enum +import json from urllib.parse import urljoin from bs4 import BeautifulSoup @@ -94,6 +95,36 @@ def make_image_extractor(version: ParserVersion): else: raise ValueError("Unknown Parser Version") +def parse_chuni_intl_api_route(raw_api_data: str, identifier: str, limit: int): + route_data = json.loads(raw_api_data) + route_data = route_data[:limit] + entries = [] + for post_data in route_data: + date_str = post_data["date"] + dt = datetime.strptime(date_str, "%Y.%m.%d").replace(tzinfo=timezone(timedelta(hours=9))) + timestamp = int(dt.timestamp()) + full_image_url = post_data["thumbnail"] + content = post_data["desc"] + headline = post_data["title"] + url = post_data["permalink"] + images = [{ + "image": full_image_url, + "link": None + }] + entry = { + "date": date_str, + "identifier": identifier, + "type": None, + "timestamp": timestamp, + "headline": headline, + "content": content, + "url": url, + "images": images, + "is_ai_summary": False + } + entries.append(entry) + return entries + parse_chuni_intl_news_site = make_chuni_intl_parser( "CHUNITHM_INTL", ParserVersion.ALPHA diff --git a/sega/maimaidx_intl.py b/sega/maimaidx_intl.py index 073a211..34eaa43 100644 --- a/sega/maimaidx_intl.py +++ b/sega/maimaidx_intl.py @@ -49,8 +49,9 @@ def make_maimaidx_intl_parser(identifier: str, parser: ParserVersion): if parser == ParserVersion.ALPHA: return alpha_parser -def parse_maimaidx_intl_api_route(raw_api_data: str, identifier: str): +def parse_maimaidx_intl_api_route(raw_api_data: str, identifier: str, limit: int): route_data = json.loads(raw_api_data) + route_data = route_data[:limit] entries = [] for post_data in route_data: date_data = post_data["date"] |
