From 4583d95b06071b637cf6794f332b099d30b9a5c3 Mon Sep 17 00:00:00 2001
From: Pinapelz <yukais@pinapelz.com>
Date: Tue, 22 Apr 2025 17:36:13 -0700
Subject: add support for mus_plus

---
 community/museca_plus.py | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 community/museca_plus.py

(limited to 'community/museca_plus.py')

diff --git a/community/museca_plus.py b/community/museca_plus.py
new file mode 100644
index 0000000..81c7313
--- /dev/null
+++ b/community/museca_plus.py
@@ -0,0 +1,41 @@
+from bs4 import BeautifulSoup
+from datetime import datetime
+from urllib.parse import urljoin
+import time
+import re
+
+def parse_museca_plus_news_site(html: str) -> list:
+    soup = BeautifulSoup(html, "html.parser")
+    news_posts = []
+    base_url = "https://museca.plus/"
+    for p in soup.select("div.subcontainer.center.text > p"):
+        text = p.get_text(strip=True, separator=' ')
+        date_match = re.search(r'(\d{4}-\d{2}-\d{2})', text)
+        if not date_match:
+            continue
+        date_str = date_match.group(1)
+        try:
+            dt = datetime.strptime(date_str, "%Y-%m-%d")
+            timestamp = int(time.mktime(dt.timetuple()))
+        except ValueError:
+            continue
+        images = []
+        for img in p.find_all("img"):
+            img_url = urljoin(base_url, img.get("src"))
+            parent_a = img.find_parent("a")
+            images.append({"image": img_url, "link": None})
+
+        content = p.get_text(separator=' ', strip=True)
+
+        news_posts.append({
+            'date': date_str,
+            'identifier': 'MUSECA_PLUS',
+            'type': None,
+            'timestamp': timestamp,
+            'headline': None,
+            'content': content,
+            'url': None,
+            'images': images
+        })
+
+    return news_posts
-- 
cgit v1.2.3