aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPinapelz <yukais@pinapelz.com>2025-10-02 21:38:13 -0700
committerPinapelz <yukais@pinapelz.com>2025-10-02 21:39:14 -0700
commit8d422f6224d5f02ac8a21428c951398a637e7e69 (patch)
tree8539bbd702c0f0f2ebb9f7ff8b68c587aa298275
parent9b6920e633cfc98900d1cb817c8f59ab6b88af3a (diff)
fix: adjustments to CDATA processing for RSS feeds
-rw-r--r--feed.py34
1 files changed, 21 insertions, 13 deletions
diff --git a/feed.py b/feed.py
index 6a6d0ef..617be3e 100644
--- a/feed.py
+++ b/feed.py
@@ -57,7 +57,7 @@ def build_rss_from_news_feed(title: str, description: str, json_file_path: str,
if jp_content:
desc_parts.append(jp_content.strip().replace("\n", "<br/>"))
if en_headline or en_content:
- desc_parts.append("<hr/><b>English Translation</b><br/>")
+ desc_parts.append("<br/>――――――――――――――――<br/><b>English Translation</b><br/>")
if en_headline:
desc_parts.append(f"<i>{en_headline.strip()}</i><br/>")
if en_content:
@@ -65,11 +65,9 @@ def build_rss_from_news_feed(title: str, description: str, json_file_path: str,
desc_combined = "\n".join(desc_parts)
- # Placeholder for CDATA
desc_el = ET.SubElement(item, "description")
- desc_el.text = f"__CDATA_PLACEHOLDER__{desc_combined}__END__"
+ desc_el.text = desc_combined
- # pubDate
if "timestamp" in post and post["timestamp"]:
pub_date = datetime.fromtimestamp(
post["timestamp"], timezone.utc
@@ -91,15 +89,25 @@ def build_rss_from_news_feed(title: str, description: str, json_file_path: str,
pass
ET.SubElement(item, "enclosure", url=image_url, type=mime, length=length)
- # Serialize XML
- rough_xml = ET.tostring(rss, encoding="utf-8", xml_declaration=True)
+ # Convert to string for CDATA processing
+ xml_str = ET.tostring(rss, encoding="unicode", method="xml")
- # Replace placeholders with real CDATA
- final_xml = rough_xml.decode("utf-8").replace(
- "__CDATA_PLACEHOLDER__", "<![CDATA[").replace("__END__", "]]>"
- )
+ # Process the XML string to wrap description content in CDATA
+ import re
+
+ def replace_description(match):
+ content = match.group(1)
+ # Unescape the XML entities that were escaped by ET
+ content = content.replace('&lt;', '<')
+ content = content.replace('&gt;', '>')
+ content = content.replace('&amp;', '&')
+ content = content.replace('&quot;', '"')
+ content = content.replace('&apos;', "'")
+ return '<description><![CDATA[' + content + ']]></description>'
+ xml_str = re.sub(r'<description>([^<]*)</description>', replace_description, xml_str)
+ dom = minidom.parseString(xml_str)
+ pretty_xml = dom.toprettyxml(indent=" ")
+ pretty_xml = '\n'.join([line for line in pretty_xml.split('\n') if line.strip()])
- # Pretty print
- dom = minidom.parseString(final_xml)
with open(output_path, "w", encoding="utf-8") as f:
- f.write(dom.toprettyxml(indent=" "))
+ f.write(pretty_xml)
send patches to the email below
yukais@pinapelz.com
include the subject [PATCH repo_name]
pinapelz.com
homepage