import { renderToStaticMarkup } from "react-dom/server";
import { nanoid } from "nanoid";
import { RAGAgent, getCitationFilePath } from "../../api";
import { CitationRegistry, Citation } from "./CitationRegistry";

type HtmlParsedAnswer = {
  answerHtml: string;
  citations: Citation[];
};

type UrlMap = Map<string, string>;

/**
 * Annotates sections for legislation in the content
 */
export function annotateSectionsForLegislation(selectedAgent: RAGAgent, content: string): string {
  if (selectedAgent !== RAGAgent.Legislation) return content;

  // Regex to match "Section" followed by a number or line starting with Section (example - **99)
  const sectionRegex = /(Sections? (\d+[a-zA-Z]?))|\*\*(\d+[a-zA-Z]?)\s(.+?)\*\*/gi;

  const createAnnotatedSection = (
    match: string,
    sectionWithText?: string,
    sectionNumber?: string,
    boldSectionNumber?: string,
    boldSectionText?: string
  ): string => {
    if (sectionWithText) {
      return `${sectionWithText}[Section ${sectionNumber}]`;
    } else if (boldSectionNumber && boldSectionText) {
      return `**${boldSectionNumber} ${boldSectionText}** [Section ${boldSectionNumber}]`;
    }

    return match;
  };

  return content.replace(sectionRegex, createAnnotatedSection);
}

/**
 * Ensures citations aren't on independent lines
 */
export function normalizeCitationLines(text: string): string {
  return text.replace(/\n\s*(\[[^\]]+\](?:\([^)]+\))?)/g, " $1");
}

/**
 * Extracts HTML citations and updates the URL map
 */
export function extractHtmlCitations(text: string, urlMap: UrlMap): string {
  // First, handle citations with href and title
  const htmlCitationWithTitleRegex = /<sup><a\s+href="([^"]+)"\s+title="([^"]+)">[^<]+<\/a><\/sup>/g;
  let result = text;
  let htmlMatch;
  
  while ((htmlMatch = htmlCitationWithTitleRegex.exec(text)) !== null) {
    const [fullMatch, url, title] = htmlMatch;
    if (title && url) {
      if (!urlMap.has(title)) {
        urlMap.set(title, url);
      }
      result = result.replace(fullMatch, `[${title}]`);
    }
  }
  
  // Then, handle any remaining sup elements with a tags (without href or title)
  const anySupWithATagRegex = /<sup><a[^>]*>[^<]+<\/a><\/sup>/g;
  result = result.replace(anySupWithATagRegex, "");
  
  return result;
}

/**
 * Removes all remaining HTML tags from the text
 */
export function cleanupHtmlTags(text: string): string {
  // Remove all the sup elements as the citations are extracted
  let result = text.replace(/<sup>.*?<\/sup>/g, '');
  
  // Clean up any remaining HTML tags
  return result.replace(/<\/?[^>]+(>|$)/g, "");
}

/**
 * Extracts URLs from markdown links and updates the URL map
 */
export function extractMarkdownLinks(text: string, urlMap: UrlMap): string {
  const markdownLinkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
  let result = text;
  let match;

  while ((match = markdownLinkRegex.exec(text)) !== null) {
    const [fullMatch, text, url] = match;
    const trimmedText = text.trim();
    const trimmedUrl = url.trim();
    urlMap.set(trimmedText, trimmedUrl);

    // Replace the markdown link with just the text and citation reference
    result = result.replace(fullMatch, `[${trimmedText}]`);
  }
  
  return result;
}

/**
 * Extracts URLs from inline parentheses and updates the URL map
 */
export function extractInlineUrls(text: string, urlMap: UrlMap): string {
  const inlineUrlRegex = /\[([^\]]+)\]\s*\(([^)]+)\)/g;
  let result = text;
  let inlineMatch;
  
  while ((inlineMatch = inlineUrlRegex.exec(text)) !== null) {
    const [fullMatch, text, url] = inlineMatch;
    const trimmedText = text.trim();
    const trimmedUrl = url.trim();
    if (!urlMap.has(trimmedText)) {
      urlMap.set(trimmedText, trimmedUrl);
    }
  }
  
  return result;
}

/**
 * Handles streaming by truncating incomplete citations
 */
export function handleStreamingCitations(text: string, isStreaming: boolean): string {
  if (!isStreaming) return text;
  
  let lastIndex = text.length;
  for (let i = text.length - 1; i >= 0; i--) {
    if (text[i] === "]") {
      break;
    } else if (text[i] === "[") {
      lastIndex = i;
      break;
    }
  }
  
  return text.substring(0, lastIndex);
}

/**
 * Formats a text part (non-citation)
 */
export function formatTextPart(part: string): string {
  // Check if the part contains a heading (h1-h7)
  const containsHeading = /#{1,7}\s+/.test(part);

  return part
    .replace(/\n/g, containsHeading ? "\n" : " <br />")
    .replace(/\n /g, containsHeading ? "\n " : " <br />")
    .replaceAll("(url)", "")
    .replace(/```/g, "")
    .replace(/ {2}/g, "&nbsp;&nbsp;");
}

/**
 * Cleans a citation part
 */
export function cleanCitationPart(part: string): string {
  return part
    .replace(/^\(/, "")
    .replace(/\)$/, "")
    .replace(/^citations?:\s*/i, "")
    .replace(":", "")
    .replaceAll("^", "")
    .trimStart();
}

/**
 * Extracts the citation text for USU agent from the filename
 * Example: "usu_id-27657--FT - Residential tenancy enquiries (CC).html" -> "FT - Residential tenancy enquiries (CC)"
 */
export function extractUSUCitationText(citationText: string): string {
  // Check if the citation text matches the expected pattern with id and double dash
  const match = citationText.match(/usu_id-\d+--(.+?)(\.html)?$/); 
  if (match && match[1]) {
    return match[1];
  }
  return citationText; // Return original if pattern doesn't match
}

/**
 * Creates a citation object and returns its ID
 */
export function createOrGetCitation(
  trimmedPart: string, 
  citations: Citation[], 
  urlMap: UrlMap, 
  nextPart: string | undefined,
  selectedAgent: RAGAgent,
  isStreaming: boolean = false
): string | null {
  if (!trimmedPart || trimmedPart.length <= 3) {
    return null; // Skip empty or invalid citations
  }

  let citationId: string | null = null;
  // Process USU citations to extract the relevant part of the filename
  let processedCitation = trimmedPart;
  if (selectedAgent === RAGAgent.USU) {
    processedCitation = extractUSUCitationText(trimmedPart);
  }
  
  const labeledCitation = isStreaming ? processedCitation : addLabelToCitation(processedCitation, selectedAgent);
  
  // First check in the conversation-specific registry if not streaming and conversationId is provided
  if (!isStreaming && nextPart && typeof nextPart === 'string' && nextPart.startsWith('__CONVERSATION_ID__:')) {
    const conversationId = nextPart.split(':')[1];
    
    // First check in conversation-specific registry
    const existingConversationCitation = CitationRegistry.getCitationByText(labeledCitation, conversationId) || 
                                        CitationRegistry.getCitationByText(processedCitation, conversationId);
    
    if (existingConversationCitation) {
      // If found in conversation registry, add to local citations array if not already there
      const localCitationIndex = citations.findIndex(c => c.id === existingConversationCitation.id);
      if (localCitationIndex === -1) {
        citations.push(existingConversationCitation);
      }
      return existingConversationCitation.id;
    }
    
    // If not found in conversation registry, check global registry
    const existingGlobalCitation = CitationRegistry.getGlobalCitationByText(labeledCitation) || 
                                  CitationRegistry.getGlobalCitationByText(processedCitation);
    
    if (existingGlobalCitation) {
      // Create a new citation with the same content but a new ID for this conversation
      const newCitationForConversation = {
        ...existingGlobalCitation,
        id: nanoid(), // Generate a new ID to avoid conflicts across conversations
        conversationId
      };
      
      citations.push(newCitationForConversation);
      return newCitationForConversation.id;
    }
  }
  
  // Check in local citations array
  const existingCitationIndex = citations.findIndex(citation => 
    citation.text === labeledCitation || citation.text === processedCitation
  );

  if (existingCitationIndex === -1) {
    citationId = nanoid();
    const markdownUrl = urlMap.get(trimmedPart);
    // Look for inline URL in the next part if no markdown URL found
    const inlineUrl = !markdownUrl && nextPart ? nextPart.match(/\(\s*(https?:\/\/[^)]+)\s*\)/) : null;
    const citationUrl = markdownUrl || (inlineUrl ? inlineUrl[1] : getCitationFilePath(trimmedPart, selectedAgent));
    const newCitation = { id: citationId, text: labeledCitation, url: citationUrl };
    citations.push(newCitation);
    
    // Also add to conversation registry if not streaming and conversationId is provided
    if (!isStreaming && nextPart && typeof nextPart === 'string' && nextPart.startsWith('__CONVERSATION_ID__:')) {
      const conversationId = nextPart.split(':')[1];
      const citationWithConversationId = {
        ...newCitation,
        conversationId
      };
      CitationRegistry.addOrUpdateCitation(citationWithConversationId, conversationId);
    }
  } else {
    citationId = citations[existingCitationIndex].id;
  }

  return citationId;
}

/**
 * Adds a label to citation text based on RAGAgent type
 */
export function addLabelToCitation(citationText: string, agentType: RAGAgent): string {
  const agentLabels: Record<RAGAgent, string | null> = {
    [RAGAgent.Legislation]: "Residential Tenancy Act 2010",
    [RAGAgent.FairTrading]: "NSW Government",
    [RAGAgent.USU]: null
  };
  
  const label = agentLabels[agentType];
  return label ? `${citationText} | ${label}` : citationText;
}

/**
 * Renders a citation as HTML
 */
export function renderCitation(citationId: string, index: number): string {
  return renderToStaticMarkup(
    <a 
      className="supContainer" 
      data-citation-index={citationId} 
      key={`citation-${index}`}
      id={`inline-citation-${citationId}`}
    >
      <sup>{index + 1}</sup>
    </a>
  );
}

/**
 * Processes all citations in the text and returns HTML fragments
 */
export function processCitations(
  text: string, 
  citations: Citation[], 
  urlMap: UrlMap, 
  selectedAgent: RAGAgent,
  isStreaming: boolean = false
): string[] {
  const parts = text.split(/\[([^\]]+)\]/g);
  
  return parts
    .map((part, mapIndex) => {
      if (mapIndex % 2 === 0) {
        return formatTextPart(part);
      } else {
        const cleanParts = part.includes(",") ? part.split(",") : [part];
        const citationFragments: string[] = [];

        cleanParts.forEach((cleanPart, partIndex) => {
          const trimmedPart = cleanCitationPart(cleanPart);
          const citationId = createOrGetCitation(
            trimmedPart, 
            citations, 
            urlMap, 
            parts[mapIndex + 1],
            selectedAgent,
            isStreaming
          );

          if (citationId) {
            const existingCitationIndex = citations.findIndex(citation => citation.id === citationId);
            citationFragments.push(renderCitation(citationId, existingCitationIndex));
          }
        });

        return citationFragments.join(""); // Combine all the citation fragments
      }
    })
    .filter((fragment): fragment is string => fragment !== null);
}

/**
 * Main function to parse an answer to HTML with citations
 * @param answer The answer text to parse
 * @param isStreaming Whether the answer is streaming
 * @param selectedAgent The selected RAG agent
 * @param conversationId Optional conversation ID to associate citations with
 */
export function parseAnswerToHtml(answer: string, isStreaming: boolean, selectedAgent: RAGAgent, conversationId?: string): HtmlParsedAnswer {
  // Start with a fresh set of citations for this answer
  const citations: Citation[] = [];
  const urlMap: UrlMap = new Map<string, string>();

  // Step 1: Annotate sections for legislation
  let parsedAnswer = annotateSectionsForLegislation(selectedAgent, answer.trim());
  
  // Step 2: Normalize citation lines
  parsedAnswer = normalizeCitationLines(parsedAnswer);
  
  // Step 3: Extract HTML citations
  parsedAnswer = extractHtmlCitations(parsedAnswer, urlMap);
  
  // Step 4: Clean up HTML tags
  parsedAnswer = cleanupHtmlTags(parsedAnswer);
  
  // Step 5: Extract markdown links
  parsedAnswer = extractMarkdownLinks(parsedAnswer, urlMap);
  
  // Step 6: Extract inline URLs
  parsedAnswer = extractInlineUrls(parsedAnswer, urlMap);
  
  // Step 7: Handle streaming citations
  parsedAnswer = handleStreamingCitations(parsedAnswer, isStreaming);
  
  // Step 8: Process all citations and generate HTML fragments
  const fragments = processCitations(parsedAnswer, citations, urlMap, selectedAgent, isStreaming);

  // Add conversation ID to each citation if provided
  if (conversationId) {
    for (let i = 0; i < citations.length; i++) {
      citations[i].conversationId = conversationId;
    }
    
    // Update the global citation registry with the new citations if not streaming
    if (!isStreaming) {
      CitationRegistry.updateCitationsForConversation(citations, conversationId);
    }
  }

  // Return the final result
  return {
    answerHtml: fragments.join(""),
    citations
  };
}
