aboutsummaryrefslogtreecommitdiffstats
path: root/sega/ongeki_jp.py
blob: 587f3581a70e7451271a6e3f4d58081ed308f48e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from bs4 import BeautifulSoup
from datetime import datetime
import time

def parse_ongeki_refresh_news_site(html: str):
    soup = BeautifulSoup(html, "html.parser")
    items = []

    for li in soup.select("li.p-news__listChild"):
        a_tag = li.select_one("a.p-news__listLink")
        url = a_tag["href"] if a_tag else None

        img_tag = li.select_one(".p-news__listThumb img")
        image_url = img_tag["src"] if img_tag else None
        image_alt = img_tag["alt"] if img_tag else ""
        image_link = url if image_url else None

        date_type_text = li.select_one(".p-news__listTextUpper")
        date_text = date_type_text.text.strip().split("/")[0].strip() if date_type_text else None
        type_text = date_type_text.text.strip().split("/")[-1].strip() if "/" in date_type_text.text else None

        headline_tag = li.select_one(".p-news__listTextUnder")
        headline = headline_tag.text.strip() if headline_tag else None

        timestamp = None
        if date_text:
            try:
                dt = datetime.strptime(date_text, "%Y.%m.%d %a")
                timestamp = int(time.mktime(dt.timetuple()))
            except:
                timestamp = None
        entry = {
            "date": date_text,
            "identifier": "ONGEKI_JPN_REFRESH",
            "type": type_text if type_text not in ["GAME", "CARDMAKER"] else None,
            "timestamp": timestamp,
            "headline": None,
            "content": image_alt,
            "url": url,
            "images": [{
                "image": image_url,
                "link": image_link
            }] if image_url else []
        }

        items.append(entry)

    return items
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage