fix: cdata to wrap cleanly

author: Pinapelz <yukais@pinapelz.com> 2025-10-02 20:49:35 -0700
committer: Pinapelz <yukais@pinapelz.com> 2025-10-02 20:49:35 -0700
commit: d9197178248156cc8c09824a8e7e54cc64539805 (patch)
tree: 405efc6e4bea40c28b0b8ed9f632bb0d797099bd
parent: 7461f283233b571a3012c07df55974563318a64d (diff)
1 files changed, 25 insertions, 12 deletions
diff --git a/feed.py b/feed.py
index a8bda0e..6a6d0ef 100644
--- a/feed.py
+++ b/feed.py
@@ -3,13 +3,10 @@ import json
 import requests
 import mimetypes
 import xml.etree.ElementTree as ET
+from xml.dom import minidom
 from datetime import datetime, timezone
 from constants import RSS_FEED_URL
 
-def _wrap_cdata(text: str) -> str:
-    if text is None:
-        return ""
-    return f"<![CDATA[{text}]]>"
 
 def build_rss_from_news_feed(title: str, description: str, json_file_path: str, output_path: str, limit: int = 12):
     """
@@ -43,19 +40,22 @@ def build_rss_from_news_feed(title: str, description: str, json_file_path: str,
 
     for post in news_feeds:
         item = ET.SubElement(channel, "item")
+
         # Title
         post_title = post.get("headline") or post.get("en_headline") or post.get("content", "")[:50]
         ET.SubElement(item, "title").text = post_title
+
         # Link
         ET.SubElement(item, "link").text = post.get("url") or "https://arcade.moekyun.me"
-        # Description (combine JP + EN if available)
+
+        # Description (JP + EN combined)
         jp_content = post.get("content", "")
         en_headline = post.get("en_headline")
         en_content = post.get("en_content")
+
         desc_parts = []
         if jp_content:
             desc_parts.append(jp_content.strip().replace("\n", "<br/>"))
-
         if en_headline or en_content:
             desc_parts.append("<hr/><b>English Translation</b><br/>")
             if en_headline:
@@ -64,11 +64,16 @@ def build_rss_from_news_feed(title: str, description: str, json_file_path: str,
                 desc_parts.append(en_content.strip().replace("\n", "<br/>"))
 
         desc_combined = "\n".join(desc_parts)
-        ET.SubElement(item, "description").text = _wrap_cdata(desc_combined)
+
+        # Placeholder for CDATA
+        desc_el = ET.SubElement(item, "description")
+        desc_el.text = f"__CDATA_PLACEHOLDER__{desc_combined}__END__"
 
         # pubDate
         if "timestamp" in post and post["timestamp"]:
-            pub_date = datetime.fromtimestamp(post["timestamp"], timezone.utc).strftime("%a, %d %b %Y %H:%M:%S +0000")
+            pub_date = datetime.fromtimestamp(
+                post["timestamp"], timezone.utc
+            ).strftime("%a, %d %b %Y %H:%M:%S +0000")
             ET.SubElement(item, "pubDate").text = pub_date
 
         # First image enclosure (if any)
@@ -86,7 +91,15 @@ def build_rss_from_news_feed(title: str, description: str, json_file_path: str,
                     pass
                 ET.SubElement(item, "enclosure", url=image_url, type=mime, length=length)
 
-    # Write out
-    tree = ET.ElementTree(rss)
-    ET.indent(tree, space="  ")
-    tree.write(output_path, encoding="utf-8", xml_declaration=True)
+    # Serialize XML
+    rough_xml = ET.tostring(rss, encoding="utf-8", xml_declaration=True)
+
+    # Replace placeholders with real CDATA
+    final_xml = rough_xml.decode("utf-8").replace(
+        "__CDATA_PLACEHOLDER__", "<![CDATA[").replace("__END__", "]]>"
+    )
+
+    # Pretty print
+    dom = minidom.parseString(final_xml)
+    with open(output_path, "w", encoding="utf-8") as f:
+        f.write(dom.toprettyxml(indent="  "))
author	Pinapelz <yukais@pinapelz.com>	2025-10-02 20:49:35 -0700
committer	Pinapelz <yukais@pinapelz.com>	2025-10-02 20:49:35 -0700
commit	d9197178248156cc8c09824a8e7e54cc64539805 (patch)
tree	405efc6e4bea40c28b0b8ed9f632bb0d797099bd
parent	7461f283233b571a3012c07df55974563318a64d (diff)