1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
import os
import json
import requests
import mimetypes
import xml.etree.ElementTree as ET
from xml.dom import minidom
from datetime import datetime, timezone
from constants import RSS_FEED_URL
def build_rss_from_news_feed(title: str, description: str, json_file_path: str, output_path: str, limit: int = 12):
"""
Build RSS from an existing JSON file containing news_posts.
Reads the JSON, extracts posts, and generates a valid RSS XML.
"""
if not os.path.exists(json_file_path):
raise FileNotFoundError(f"JSON file not found: {json_file_path}")
with open(json_file_path, "r", encoding="utf-8") as f:
data = json.load(f)
news_feeds = data.get("news_posts", [])[:limit]
file_name = os.path.basename(output_path)
url_to_feed = f"{RSS_FEED_URL}/{file_name}"
rss = ET.Element("rss", {
"version": "2.0",
"xmlns:atom": "http://www.w3.org/2005/Atom"
})
channel = ET.SubElement(rss, "channel")
ET.SubElement(channel, "title").text = title
ET.SubElement(channel, "description").text = description
ET.SubElement(channel, "link").text = url_to_feed
ET.SubElement(channel, "{http://www.w3.org/2005/Atom}link", {
"href": url_to_feed,
"rel": "self",
"type": "application/rss+xml"
})
for post in news_feeds:
item = ET.SubElement(channel, "item")
# Title
post_title = post.get("headline") or post.get("en_headline") or post.get("content", "")[:50]
ET.SubElement(item, "title").text = post_title
# Link
ET.SubElement(item, "link").text = post.get("url") or "https://arcade.moekyun.me"
# Description (JP + EN combined)
jp_content = post.get("content", "")
en_headline = post.get("en_headline")
en_content = post.get("en_content")
desc_parts = []
if jp_content:
desc_parts.append(jp_content.strip().replace("\n", "<br/>"))
if en_headline or en_content:
desc_parts.append("<hr/><b>English Translation</b><br/>")
if en_headline:
desc_parts.append(f"<i>{en_headline.strip()}</i><br/>")
if en_content:
desc_parts.append(en_content.strip().replace("\n", "<br/>"))
desc_combined = "\n".join(desc_parts)
# Placeholder for CDATA
desc_el = ET.SubElement(item, "description")
desc_el.text = f"__CDATA_PLACEHOLDER__{desc_combined}__END__"
# pubDate
if "timestamp" in post and post["timestamp"]:
pub_date = datetime.fromtimestamp(
post["timestamp"], timezone.utc
).strftime("%a, %d %b %Y %H:%M:%S +0000")
ET.SubElement(item, "pubDate").text = pub_date
# First image enclosure (if any)
images = post.get("images", [])
if images:
image_url = images[0].get("image")
if image_url:
mime = mimetypes.guess_type(image_url)[0] or "application/octet-stream"
length = "0"
try:
r = requests.head(image_url, timeout=5, allow_redirects=True)
if "Content-Length" in r.headers:
length = r.headers["Content-Length"]
except Exception:
pass
ET.SubElement(item, "enclosure", url=image_url, type=mime, length=length)
# Serialize XML
rough_xml = ET.tostring(rss, encoding="utf-8", xml_declaration=True)
# Replace placeholders with real CDATA
final_xml = rough_xml.decode("utf-8").replace(
"__CDATA_PLACEHOLDER__", "<![CDATA[").replace("__END__", "]]>"
)
# Pretty print
dom = minidom.parseString(final_xml)
with open(output_path, "w", encoding="utf-8") as f:
f.write(dom.toprettyxml(indent=" "))
|