#include "markdown_translator.hpp" #include #include #include MarkdownTranslator::MarkdownTranslator() : title("WikiMD2HTML"), cssPath("styles/carbon.css") { } MarkdownTranslator::~MarkdownTranslator() { } void MarkdownTranslator::prescanHeaders(std::stringstream& markdownStream) { auto savedPos = markdownStream.tellg(); markdownStream.seekg(0); std::regex headerRegex(headerRegexStr); std::smatch matches; std::string line; while (std::getline(markdownStream, line)) { if (std::regex_match(line, matches, headerRegex)) { int level = matches[1].length(); std::string content = matches[2]; headers.push_back(std::to_string(level) + ":" + content); } } markdownStream.clear(); markdownStream.seekg(savedPos); } void MarkdownTranslator::processMetadata(const std::vector& lines){ // format of keys -> key: value int externalLinkBuilderState{0}; std::string currExternalLinkName; std::string currExternalLinkUrl; for(std::string currentLine : lines){ size_t colonPos = currentLine.find(':'); if (colonPos != std::string::npos) { std::string key = currentLine.substr(0, colonPos); std::string value = currentLine.substr(colonPos + 1); value.erase(0, value.find_first_not_of(" \t")); if (key == "title") { title = value; } else if (key == "external-name") { currExternalLinkName = value; externalLinkBuilderState = 1; } else if (key == "external-url") { if (externalLinkBuilderState != 0) { currExternalLinkUrl = value; if (!currExternalLinkName.empty() && !currExternalLinkUrl.empty()) { addExternalMenuItem(currExternalLinkName, currExternalLinkUrl); currExternalLinkName = ""; currExternalLinkUrl = ""; externalLinkBuilderState = 0; } } else{ std::cout << "[WARNING] Unexpected external-url \"" << value << "\" found with no matching external-name. Ignoring..."; } } else { std::cerr << "ERROR OCCURED IN METADATA. UNKNOWN KEY" << std::endl; } if (key == "title") { title = value; } } } } std::string MarkdownTranslator::translate(const std::string& markdownContent) { std::stringstream htmlOutput; std::stringstream markdownStream(markdownContent); std::string line; std::string currentLine; // Parse metadata section if it exists bool metadataExists{false}; std::vector metadataLines; if (std::getline(markdownStream, currentLine) && currentLine == "---") { metadataExists = true; while (std::getline(markdownStream, currentLine)) { if (currentLine == "---") { break; } metadataLines.push_back(currentLine); } } else { markdownStream.seekg(0); } // Process and apply metadata to curr object if(metadataExists) processMetadata(metadataLines); // Pre-scan for headers before generating sidebar prescanHeaders(markdownStream); htmlOutput << buildHTMLHeader(title); generateSideBar(htmlOutput); // Main content container htmlOutput << "
\n"; htmlOutput << "
\n"; // Process each line of markdown // State of current parse MarkdownTranslator::ParseState parseState = MarkdownTranslator::ParseState::REGULAR; std::vector figureLines; std::vector codeblockLines; std::string language; while (std::getline(markdownStream, line)) { // Start of figure block if(line.find(":::figure") == 0){ parseState = MarkdownTranslator::ParseState::IN_FIGURE; figureLines.clear(); continue; } if(parseState == MarkdownTranslator::ParseState::IN_FIGURE && line == ":::"){ // End of figure block parseState = MarkdownTranslator::ParseState::REGULAR; htmlOutput << " " << processFigureBlock(figureLines) << "\n"; continue; } else if(parseState == MarkdownTranslator::ParseState::IN_FIGURE){ figureLines.push_back(line); continue; } if(line.find("```") == 0 && parseState != MarkdownTranslator::ParseState::IN_CODEBLOCK){ parseState = MarkdownTranslator::ParseState::IN_CODEBLOCK; codeblockLines.clear(); if(line.length() > 3) { language = line.substr(3); } std::cout << language << std::endl; continue; } else if(parseState == MarkdownTranslator::ParseState::IN_CODEBLOCK && line == "```"){ // End of figure block parseState = MarkdownTranslator::ParseState::REGULAR; htmlOutput << " " << processCodeBlock(language, codeblockLines) << "\n"; continue; } else if(parseState == MarkdownTranslator::ParseState::IN_CODEBLOCK){ codeblockLines.push_back(line); continue; } htmlOutput << " " << processLine(line); } htmlOutput << buildHTMLFooter(); return htmlOutput.str(); } void MarkdownTranslator::generateSideBar(std::stringstream& output) { output << "
\n"; output << "
\n"; output << "

" + title + "

\n"; output << "
\n"; output << "
    \n"; if(externalMenuLinks.size() > 0){ for(const ExternalMenuItem& menuItem : externalMenuLinks){ output << "
  • "+menuItem.name+"
  • \n"; } } output << "

    Table of Contents

    \n"; output << "
      \n"; for (const auto& header : headers) { size_t separatorPos = header.find(':'); if (separatorPos != std::string::npos) { int level = std::stoi(header.substr(0, separatorPos)); std::string content = header.substr(separatorPos + 1); // Create anchor ID from header content std::string anchorId = createAnchorId(content); // Add indentation based on header level std::string indentation = " "; for (int i = 1; i < level; ++i) { indentation += " "; } output << indentation << "
    • " << content << "
    • \n"; } } output << "
    \n"; output << "
\n"; output << "
\n"; } std::string MarkdownTranslator::createAnchorId(const std::string& text) { std::string id = text; // Convert to lowercase std::transform(id.begin(), id.end(), id.begin(), ::tolower); // Replace spaces with hyphens std::replace(id.begin(), id.end(), ' ', '-'); // Remove any non-alphanumeric characters except hyphens id.erase( std::remove_if( id.begin(), id.end(), [](char c) { return !(std::isalnum(c) || c == '-'); } ), id.end() ); return id; } std::string MarkdownTranslator::processLine(const std::string& line) { std::string processed = line; processed = processHeaders(processed); if (processed == line) { processed = processBold(processed); processed = processItalic(processed); processed = processLinks(processed); if (!processed.empty() && processed[0] != '<') { processed = processParagraph(processed); } } return processed + "\n"; } std::string MarkdownTranslator::processHeaders(const std::string& line) { // Check for H1-H6 std::regex headerRegex(headerRegexStr); std::smatch matches; if (std::regex_match(line, matches, headerRegex)) { int level = matches[1].length(); std::string content = matches[2]; std::string anchorId = createAnchorId(content); return "" + content + ""; } return line; } std::string MarkdownTranslator::processBold(const std::string& text) { // Replace **text** or __text__ with text std::string result = text; std::regex boldRegex(boldRegexStr); result = std::regex_replace(result, boldRegex, "$1$2"); return result; } std::string MarkdownTranslator::processItalic(const std::string& text) { // Replace *text* or _text_ with text std::string result = text; std::regex italicRegex(italicRegexStr); result = std::regex_replace(result, italicRegex, "$1$2"); return result; } std::string MarkdownTranslator::processLinks(const std::string& text) { // Replace [text](url) with text std::string result = text; std::regex linkRegex(linkRegexStr); result = std::regex_replace(result, linkRegex, "$1"); return result; } std::string MarkdownTranslator::processParagraph(const std::string& text) { if (text.empty()) return ""; return "

" + text + "

"; } std::string MarkdownTranslator::processSingleFigure(const std::string& text) { // Extract image details using regex std::regex imageRegex(imageRegexStr); std::smatch matches; if (std::regex_search(text, matches, imageRegex)) { std::string alt = matches[1].str(); std::string src = matches[2].str(); std::string title = matches.size() > 4 && matches[4].matched ? " title=\"" + matches[4].str() + "\"" : ""; return "\"""; } return text; } std::string MarkdownTranslator::processFigureBlock(const std::vector& lines){ std::stringstream html; std::string imageHtml; std::string caption; html << "
\n"; // Process each line in the figure block for (const auto& line : lines) { if (line.find("![") == 0) { // Process the image imageHtml = processSingleFigure(line); html << " " << imageHtml << "\n"; } else if (line.find("Caption:") == 0) { // Extract caption caption = line.substr(8); html << "
" << caption << "
\n"; } else if (!line.empty()) { // Process any other content html << "

" << line << "

\n"; } } html << "
"; return html.str(); } std::string htmlEscape(const std::string& text) { std::string escaped; escaped.reserve(text.size()); for (char c : text) { switch (c) { case '&': escaped += "&"; break; case '<': escaped += "<"; break; case '>': escaped += ">"; break; case '"': escaped += """; break; case '\'': escaped += "'"; break; default: escaped += c; break; } } return escaped; } std::string MarkdownTranslator::processCodeBlock( const std::string& language, const std::vector& lines ) { std::stringstream html; html << "
";
    for (const auto& line : lines)
        html << htmlEscape(line) << '\n';
    html << "
"; return html.str(); }