"""
Generic format for a news entry. All keys are considered to be nullable
{
    'date': JST date of news post,
    'identifier': unique identifier for the game (usually some deriv. of the title),
    'type': Type of post if available, otherwise if not provided it will be None (aka Generic news)
    'timestamp': Unixtime of date above,
    'headline': Headline,
    'content': All text content of news,
    'url': URL to full post if available,
    'images': [
    {
        'image': URL to image,
        'link': If there's an associated href. Else None
        }

    ],
    'is_ai_summary': boolean
}
"""

from site_scraper import SiteScraper, download_site_as_html
import konami.eamuse_app as eamuse_app
import bemani.sdvx as sound_voltex
import bemani.iidx as iidx
import bemani.ddr as ddr
import sega.chuni_jp as chunithm_jp
import bemani.polaris_chord as polaris_chord
import sega.chuni_intl as chuni_intl
import sega.maimaidx_jp as maimaidx_jp
import sega.maimaidx_intl as maimaidx_intl
import sega.ongeki_jp as ongeki_jp
import sega.idac as idac
import taito.music_diver as music_diver
import taito.street_fighter as street_fighter
import bandai_namco.taiko as taiko
import bandai_namco.wmmt as wmmt
import community.disc as disc
import community.wacca_plus.wacca_plus as wac_plus
import community.museca_plus as mus_plus
import community.rbdx as rbdx
import constants
import translate
import summarizer

from datetime import datetime

def _attach_llm_summaries(news_posts: list, game_name: str):
    for post in news_posts:
        image_urls = [img["image"] for img in post.get("images", []) if "image" in img]
        if image_urls:
            headline, content = summarizer.generate_headline_and_content_from_images(image_urls, game_name)
            if headline is None and content is None:
                datetime_str = datetime.now().strftime("%H:%M:%S")
                post["headline"] = f"{game_name} Update"
                post["content"] = f"573-UPDATES has found a news post for {game_name} at {datetime_str}, please refer to the image for more details!"
                post["is_ai_summary"] = False
            post["headline"] = headline
            post["content"] = content
            post["is_ai_summary"] = True


def get_news(news_url: str, version=None) -> list:
    if news_url == constants.SOUND_VOLTEX_EXCEED_GEAR_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        news_posts = sorted(sound_voltex.parse_exceed_gear_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
        news_posts = translate.add_translate_text_to_en(news_posts, overrides=[("ボルテ", "SDVX")])

    elif news_url == constants.IIDX_PINKY_CRUSH_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        news_posts = sorted(iidx.parse_pinky_crush_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
        news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL)

    elif news_url == constants.POLARIS_CHORD_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        news_posts = sorted(polaris_chord.parse_polaris_chord_news_site(site_data, constants.POLARIS_CHORD_RECENT_NEWS_LIMIT), key=lambda x: x['timestamp'], reverse=True)
        news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL)

    elif news_url == constants.EAMUSE_APP_API_ROUTE:
        site_data = download_site_as_html(news_url+"/?uuid_to="+version+"&format=json")
        match version:
            case constants.IIDX_EAMUSE_APP_ID:
                news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "IIDX_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
                news_posts = translate.add_translate_text_to_en(news_posts, iidx.KEY_TERMS_TL)
            case constants.DDR_EAMUSE_APP_ID:
                news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "DDR_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
                news_posts = translate.add_translate_text_to_en(news_posts)
            case constants.SDVX_EAMUSE_APP_ID:
                news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "SOUND_VOLTEX_EAMUSEMENT", constants.EAMUSE_POST_SITE ), key=lambda x: x['timestamp'], reverse=True)
                news_posts = translate.add_translate_text_to_en(news_posts)
            case constants.JUBEAT_EAMUSE_APP_ID:
                news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "JUBEAT_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
                news_posts = translate.add_translate_text_to_en(news_posts)
            case constants.POPN_MUSIC_EAMUSE_APP_ID:
                news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "POPN_MUSIC_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
                news_posts = translate.add_translate_text_to_en(news_posts)
            case constants.GITADORA_EAMUSE_APP_ID:
                news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "GITADORA_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
                news_posts = translate.add_translate_text_to_en(news_posts)
            case constants.NOSTALGIA_EAMUSE_APP_ID:
                news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "NOSTALGIA_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
                news_posts = translate.add_translate_text_to_en(news_posts)
            case constants.DANCE_RUSH_APP_ID:
                news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "DANCE_RUSH_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
                news_posts = translate.add_translate_text_to_en(news_posts)
            case constants.DANCE_AROUND_APP_ID:
                news_posts= sorted(eamuse_app.parse_news_api_route(site_data, "DANCE_AROUND_EAMUSEMENT", constants.EAMUSE_POST_SITE), key=lambda x: x['timestamp'], reverse=True)
                news_posts = translate.add_translate_text_to_en(news_posts)
            case _:
                raise ValueError("Cannot find provided e-amuse app gameId", version)

    elif news_url == constants.DDR_WORLD_NEWS_SITE:
        scraper = SiteScraper(headless=True)
        site_data = scraper.get_page_source(news_url)
        scraper.close()
        news_posts = sorted(ddr.parse_ddr_world_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
        news_posts = translate.add_translate_text_to_en(news_posts)

    elif news_url == constants.CHUNITHM_JP_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        if version in [ constants.CHUNITHM_VERSION.VERSE, constants.CHUNITHM_VERSION.X_VERSE ]:
            news_posts = sorted(chunithm_jp.parse_chuni_jp_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
            news_posts = translate.add_translate_text_to_en(news_posts)
            if constants.CHUNI_RECURSIVE_IMAGE:
                for i in range(len(news_posts)):
                    if not news_posts[i]["url"]:
                        continue
                    post_site_data = download_site_as_html(news_posts[i]["url"])
                    post_images = chunithm_jp.parse_chuni_jp_post_images(post_site_data)
                    news_posts[i]["images"].extend([image for image in post_images if not any(existing_image['image'] == image['image'] for existing_image in news_posts[i]["images"])])

    elif news_url == constants.CHUNITHM_INTL_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        news_posts = sorted(chuni_intl.parse_chuni_intl_api_route(site_data, "CHUNITHM_INTL", constants.CHUNITHM_INTL_RECENT_NEWS_LIMIT), key=lambda x: x['timestamp'], reverse=True)
        if constants.CHUNI_RECURSIVE_IMAGE:
            for i in range(len(news_posts)):
                if not news_posts[i]["url"]:
                    continue
                post_site_data = download_site_as_html(news_posts[i]["url"])
                post_images = chuni_intl.parse_chuni_intl_post_images(post_site_data)
                news_posts[i]["images"].extend([image for image in post_images if not any(existing_image['image'] == image['image'] for existing_image in news_posts[i]["images"])])


    elif news_url == constants.MAIMAIDX_JP_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        if version in [ constants.MAIMAIDX_VERSION.PRISM_PLUS, constants.MAIMAIDX_VERSION.CIRCLE ]:
            news_posts = sorted(maimaidx_jp.parse_maimaidx_jp_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
            news_posts = translate.add_translate_text_to_en(news_posts)

    elif news_url == constants.MAIMAIDX_INTL_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        news_posts = sorted(maimaidx_intl.parse_maimaidx_intl_api_route(site_data, "MAIMAIDX_INTL", constants.MAIMAIDX_INTL_RECENT_NEWS_LIMIT), key=lambda x: x['timestamp'], reverse=True)
        _attach_llm_summaries(news_posts, "maimai DX International")

    elif news_url == constants.ONGEKI_JP_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        if version == constants.ONGEKI_VERSION.REFRESH:
            news_posts = sorted(ongeki_jp.parse_ongeki_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
            news_posts = translate.add_translate_text_to_en(news_posts)

    elif news_url == constants.IDAC_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        news_posts = sorted(idac.parse_idac_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
        for news in news_posts:
            promo_image_url = idac.get_promo_image(download_site_as_html(news["url"]))
            news["images"] = [{'image': promo_image_url, 'link': None}]
        news_posts = translate.add_translate_text_to_en(news_posts)

    elif news_url == constants.MUSIC_DIVER_NEWS:
        api_data = download_site_as_html(news_url)
        news_posts = sorted(music_diver.parse_music_diver_news_json(api_data), key=lambda x: x['timestamp'], reverse=True)

    elif news_url == constants.STREET_FIGHTER_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        news_posts = sorted(street_fighter.parse_sf_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)
        news_posts = translate.add_translate_text_to_en(news_posts)


    elif news_url == constants.TAIKO_BLOG_SITE:
        site_data = download_site_as_html(news_url)
        news_posts = sorted(taiko.parse_taiko_blog_site(site_data), key=lambda x: x['timestamp'], reverse=True)
        news_posts = translate.add_translate_text_to_en(news_posts)

    elif news_url == constants.WANGAN_MAXI_GENERIC:
        news_posts = []
        na_site_data = download_site_as_html(constants.WANGAN_MAXI_NA_NEWS_SITE, response_encoding="utf-8")
        prelim_na_news_data = wmmt.get_wmmt_na_news_post_links(na_site_data)
        for data in prelim_na_news_data:
            post_site_data = download_site_as_html(data["url"])
            news = wmmt.parse_wmmt_na_news(post_site_data, data)
            if news is not None:
                news_posts.append(news)
        asia_oce_site_data = download_site_as_html(constants.WANGAN_MAXI_ASIA_OCE_NEWS_SITE, response_encoding="utf-8")
        prelim_asia_oce_news_data = wmmt.get_wmmt_asia_oce_news_post_links(asia_oce_site_data)
        for data in prelim_asia_oce_news_data:
            post_site_data = download_site_as_html(data["url"])
            news = wmmt.parse_wmmt_asia_oce_news(post_site_data, data)
            if news is not None:
                news_posts.append(news)
        jp_site_data = download_site_as_html(constants.WANGAN_MAXI_JP_NEWS_SITE, response_encoding="utf-8")
        prelim_jp_news_data = wmmt.get_wmmt_jp_news_post_links(jp_site_data)
        jp_news = []
        for data in prelim_jp_news_data:
            post_site_data = download_site_as_html(data["url"], response_encoding="utf-8")
            news = wmmt.parse_wmmt_jp_news(post_site_data, data)
            if news is not None:
                jp_news.append(news)
        jp_news = translate.add_translate_text_to_en(jp_news)
        news_posts.extend(jp_news)
        news_posts = sorted(news_posts, key=lambda x: x['timestamp'], reverse=True)
        return news_posts


    elif news_url == constants.WACCA_PLUS_MAGIC_STRING:
        if not wac_plus.check_is_generation_possible():
            news_posts = []
        else:
            messages = disc.fetch_messages(constants.WACCA_PLUS_MAGIC_STRING)
            news_posts = sorted(wac_plus.parse_announcement_messages(messages), key=lambda x: x['timestamp'], reverse=True)

    elif news_url == constants.MUSECA_PLUS_NEWS_SITE:
        site_data = download_site_as_html(news_url)
        news_posts = sorted(mus_plus.parse_museca_plus_news_site(site_data), key=lambda x: x['timestamp'], reverse=True)

    elif news_url == constants.RB_DELUXE_PLUS_NEWS:
        site_data = download_site_as_html(news_url)
        news_posts = rbdx.get_carousel_posts(site_data)
        _attach_llm_summaries(news_posts, "REFLEC BEAT PLUS DELUXE")

    else:
        news_posts = []
    return news_posts