blob: e4b1f49b2fe21aabf69ac74609a575b6b2efa042 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from datetime import datetime
import time
def get_carousel_posts(html: str):
soup = BeautifulSoup(html, 'html.parser')
base_url = "https://dxplus.chilundui.com/"
carousel = soup.find('div', class_='carousel-inner')
if not carousel:
return []
news_posts = []
current_date_string = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
current_unix_time = int(time.time())
for item in carousel.find_all('div', class_='carousel-item'):
img_tag = item.find('img')
if img_tag and img_tag.get('src'):
news_posts.append({
"date": current_date_string,
"identifier": "REFLEC_BEAT_DELUXE_PLUS",
"type": None,
"timestamp": current_unix_time,
"url": None,
"headline": None,
"content": "[お知らせ] ANNOUNCEMENT FROM REFLECT BEAT DELUXE PLUS",
"images": [{"image": urljoin(base_url, img_tag['src']), "link": None}],
'is_ai_summary': False
})
return news_posts
|