
// import DOMWalkers from '../dom-walkers';
// import * as Utils from '../flux/models/utils';

import { flatten } from 'ramda';
import DOMWalkers from './dom';
import { escapeRegExp } from './strings';

interface IUnquoteOptions {
  keepIfWholeBodyIsQuote?: boolean,
}

const { FIRST_ORDERED_NODE_TYPE } = XPathResult;

function textAndNodesAfterNode(node: HTMLElement) {
  let text = '';
  let curNode: HTMLElement | null = node;
  const nodes: HTMLElement[] = [];
  while (curNode) {
    let sibling = curNode.nextSibling;
    while (sibling) {
      text += sibling.textContent;
      nodes.push(sibling as HTMLElement);
      sibling = sibling.nextSibling;
    }
    curNode = curNode.parentNode as HTMLElement | null;
  }
  return { text, nodes };
}

/**
 * Sometimes the last signature of an email will not be placed in a quote
 * block. This will cause out quote detector to not strip anything since
 * it looks very similar to someone writing inline regular text after some
 * quoted text (which is allowed).
 */
function unwrappedSignatureDetector(doc: Document, quoteElements: Node[]): HTMLElement[] {
  // Find the last quoteBlock
  for (const node of DOMWalkers.walkBackwards(doc)) {
    let textAndNodes;
    let focusNode = node;
    if (node && quoteElements.includes(node)) {
      textAndNodes = textAndNodesAfterNode(node as HTMLElement);
    } else if (node.previousSibling && quoteElements.includes(node.previousSibling)) {
      focusNode = node.previousSibling;
      textAndNodes = textAndNodesAfterNode(node.previousSibling as HTMLElement);
    } else {
      continue;
    }

    const { text, nodes } = textAndNodes;
    const maybeSig = text.replace(/\s/g, '');
    if (maybeSig.length > 0) {
      if (
        (focusNode.textContent || '').replace(/\s/g, '').search(escapeRegExp(maybeSig)) >= 0
      ) {
        return nodes;
      }
    }
    break;
  }
  return [];
}

/*
 * There are semi-common cases where immediately before a blockquote, we
 * encounter a string like: "On Thu … so and so … wrote:". This should be part
 * of the blockquote but was usually left as a collection of nodes. To help
 * with false-positives, we only look for strings like that that immediately
 * preceeded the blockquoted section. By the time the function gets here, the
 * last blockquote has been removed and the text we want will be at the end of
 * the document.
 */
function quoteStringDetector(doc: Document): HTMLElement[] {
  const quoteNodesToRemove = [];
  let seenInitialQuoteEnd = false;

  for (const node of DOMWalkers.walkBackwards(doc)) {
    if (node.nodeType === Node.DOCUMENT_NODE) {
      continue;
    }
    if (node.nodeType === Node.TEXT_NODE && (node.nodeValue?.trim().length || 0) > 0) {
      if (!seenInitialQuoteEnd) {
        if (node.nodeValue && /wrote:\s*$/gim.test(node.nodeValue)) {
          seenInitialQuoteEnd = true;
          quoteNodesToRemove.push(node);
          if (/On \S/gim.test(node.nodeValue)) {
            // The beginning of the quoted string may be in the same node
            return quoteNodesToRemove as HTMLElement[];
          }
        } else {
          // This means there's some text in between the end of the content
          // (adjacent to the blockquote) and the quote string. We shouldn't be
          // killing any text in this case.
          return quoteNodesToRemove as HTMLElement[];
        }
      } else {
        quoteNodesToRemove.push(node);
        if (node.nodeValue && /On \S/gim.test(node.nodeValue)) {
          // This means we've reached the beginning of the quoted string.
          return quoteNodesToRemove as HTMLElement[];
        }
      }
    } else {
      if (seenInitialQuoteEnd) {
        quoteNodesToRemove.push(node);
      }
    }
  }
  return quoteNodesToRemove as HTMLElement[];
}

const isEmptyishTextContent = ($el: HTMLElement) => {
  // either '' or '---' (often left over from sig / confidentiality notice removal)
  const trimmed = $el.textContent?.trim() || '';
  return trimmed === '' || /^-+$/.test(trimmed);
};

const looksLikeTrackingPixel = (img: HTMLElement) => {
  // we want to avoid hiding quoted text if the user has added an image beneath it, but only
  // if that image is more than 1px in size...
  const w = Number(img.getAttribute('width') || (img.style.width || '').replace('px', '') || 10000);
  const h = Number(
    img.getAttribute('height') || (img.style.height || '').replace('px', '') || 10000
  );
  return w <= 1 && h <= 1;
};

export class UnquoteEmail {
  private static parseHTML(content: string) {
    const parser = new DOMParser();
    let doc: Document | undefined = undefined;
    try {
      doc = parser.parseFromString(content, 'text/html');
    } catch (err) {
      // TOOD: report error
      // TODO: maybe return some document that says we couldn't render the message?
      console.log('Error parsing message', err)
    }
    return doc;
  }

  static hasQuotedHTML(html: string) {
    const doc = this.parseHTML(html);
    if (!doc) {
      return false;
    }
    this.removeImagesStrippedByAnotherClient(doc);
    this.removeTrailingFootersAndWhitespace(doc);
    return this.findQuoteElements(doc).length > 0;
  }


  static unquote(content: string, options: IUnquoteOptions) {
    const doc = this.parseHTML(content);
    if (!doc) {
      return content
    }
    this.removeImagesStrippedByAnotherClient(doc);
    this.removeTrailingFootersAndWhitespace(doc);
    for (const el of this.findQuoteElements(doc)) {
      if (el) {
        el.remove();
      }
    }

    // It's possible that the entire body was quoted text anyway and we've removed everything.
    if (options.keepIfWholeBodyIsQuote) {
      if (!doc.body || !doc.children[0] || (doc.body.textContent?.trim().length || 0) === 0) {
        return this.outputHTMLFor(this.parseHTML(content) as Document, { initialHTML: content });
      }
    }

    if (!doc.body) {
      return this.outputHTMLFor(this.parseHTML('') as Document, { initialHTML: content });
    }

    for (const el of quoteStringDetector(doc)) {
      if (el && el !== doc.body) {
        el.remove();
      }
    }

    // after removing all the quoted text, delete any whitespace that appeared between blocks
    // so the email doesn't end with <br> x 50
    this.removeUnnecessaryWhitespace(doc);

    return this.outputHTMLFor(doc, { initialHTML: content });
  }


  private static removeImagesStrippedByAnotherClient(doc: Document) {
    if (!doc.body) {
      return;
    }

    const result = doc.evaluate(
      "//img[contains(@alt,'removed by sender')]",
      doc.body,
      null,
      XPathResult.ANY_TYPE,
      null
    );
    const nodes = [];

    // collect all the results and then remove them all
    // to avoid modifying the dom while using the xpath selector
    let node = (result.iterateNext() as HTMLElement | null);
    while (node) {
      nodes.push(node);
      node = result.iterateNext() as HTMLElement | null;
    }
    nodes.forEach((n: HTMLElement) => n.remove());
  }

  private static removeTrailingFootersAndWhitespace(doc: Document) {
    let els: HTMLElement[] | null = [];
    let iters = 0;
    while ((els = this.findTrailingFooter(doc))) {
      iters++;
      els.forEach(el => el.remove());
      this.removeUnnecessaryWhitespace(doc);
      if (iters > 20) {
        return;
      }
    }
  }

  private static findTrailingFooter(doc: Document) {
    // Traverse from the body down the tree of "last" nodes looking for a
    // Confidentiality Notice, "To unsubscribe from this group", etc.
    // We strip these nodes because otherwise the quoted text logic
    // thinks that they are inline replies to quoted text.
    const footerRegexps = [
      /^Confidentiality Notice/i,
      /strictly confidential/i,
      /This email message is/i,
      /You received this message because/i,
    ];

    let head: HTMLElement = doc.body;
    while (head) {
      const tc = head?.textContent?.trim() || '';
      if (head.nodeType === Node.TEXT_NODE) {
        if (footerRegexps.find(r => r.test(tc))) {
          return [head];
        }
      }

      // chop off Google groups unsubscribe instructions which are appended
      // to the end but annoyingly not in a container.
      if (
        tc === '.' &&
        head.previousSibling &&
        head.previousSibling.previousSibling &&
        head.previousSibling.previousSibling.textContent?.trim().startsWith('To unsubscribe')
      ) {
        return [head, head.previousSibling, head.previousSibling.previousSibling] as HTMLElement[];
      }

      // chop off gmail_signature if the user has it configured to go at the absolute
      // bottom of the email
      if (head.nodeName === 'DIV' && head.classList.contains('gmail_signature')) {
        return [head];
      }

      if (head.childNodes.length === 0 && tc === '') {
        head = head.previousSibling as HTMLElement;
      } else {
        head = head.lastChild as HTMLElement;
      }
    }
    return null;
  }

  private static removeUnnecessaryWhitespace(doc: Document) {
    if (!doc.body) {
      return;
    }

    // Find back-to-back <br><br> at the top level and de-duplicate them. Note that
    // some emails contain TEXT<br>TEXT<br>TEXT, so the only ELEMENT children may be the <brs>
    const nodes = doc.body.childNodes;
    const extraTailBrTags = [];
    for (let i = nodes.length - 1; i >= 0; i--) {
      const curr = nodes[i];
      const next = nodes[i - 1];
      if (curr && curr.nodeName === 'BR' && next && next.nodeName === 'BR') {
        extraTailBrTags.push(curr);
      } else {
        break;
      }
    }
    for (const el of extraTailBrTags) {
      el.remove();
    }

    // Traverse down the tree of "last child" nodes to get the last child of the last child.
    // The deepest node at the end of the document.
    let lastOfLast = doc.body;
    while (lastOfLast.lastElementChild) {
      lastOfLast = lastOfLast.lastElementChild as HTMLElement;
    }

    // Traverse back up the tree - at each level, attempt to remove
    // whitespace from the last child and then remove the child itself
    // if it's completely empty. Repeat until a child has meaningful content,
    // then move up the tree.
    //
    // Containers with empty space at the end occur pretty often when we
    // remove the quoted text and it had preceding spaces.
    const removeTrailingWhitespaceChildren = (el: HTMLElement) => {
      while (el.lastChild) {
        const child = el.lastChild as HTMLElement;
        if (child.nodeType === Node.TEXT_NODE) {
          if (isEmptyishTextContent(child)) {
            child.remove();
            continue;
          }
        }
        if (['BR', 'P', 'DIV', 'SPAN', 'HR'].includes(child.nodeName)) {
          removeTrailingWhitespaceChildren(child);
          if (child.childElementCount === 0 && isEmptyishTextContent(child)) {
            child.remove();
            continue;
          }
        }
        break;
      }
    };

    while (lastOfLast.parentElement) {
      lastOfLast = lastOfLast.parentElement;
      removeTrailingWhitespaceChildren(lastOfLast);
    }
  }

  private static findQuoteElements(doc: Document) {
    const parsers = [
      this.findGmailQuotes,
      this.findYahooQuotes,
      this.findMicrosoftQuotes,
      this.findAppleMailQuotes,
      this.findBlockquoteQuotes,
      this.findQuotesAfterMessageHeaderBlock,
      this.findQuotesAfter__OriginalMessage__,
    ];

    let quoteElements: HTMLElement[] = [];
    for (const parser of parsers) {
      quoteElements = quoteElements.concat(parser.call(this, doc) || []);
    }

    // Find top-level nodes that look like a signature - some clients append
    // a signature block /beneath/ the quoted text and we need to count is as
    // quoted text as well — otherwise it gets considered an inline reply block.
    const unwrappedSignatureNodes = unwrappedSignatureDetector(doc, quoteElements);
    quoteElements = quoteElements.concat(unwrappedSignatureNodes);

    // Keep quotes that are followed by non-quote blocks (eg: inline reply text)
    quoteElements = quoteElements.filter(
      el => !this.isElementFollowedByUnquotedElement(el, quoteElements)
    );

    return quoteElements;
  }

  private static findAppleMailQuotes(doc: Document) {
    // Apple's mail app adds IDs to a div noting the end of a specific email and the start of a chain
    return Array.from(doc.querySelectorAll('#mail-editor-reference-message-container')) as HTMLElement[];
  }

  private static findGmailQuotes(doc: Document) {
    // Gmail creates both div.gmail_quote and blockquote.gmail_quote. The div
    // version marks text but does not cause indentation, but both should be
    // considered quoted text.
    return Array.from(doc.querySelectorAll('.gmail_quote')) as HTMLElement[];
  }

  private static findYahooQuotes(doc: Document) {
    // Both Yahoo and AOL wrap their quotes in divs with classes that contain
    // the text yahoo_quoted
    return Array.from(doc.querySelectorAll('[class*="yahoo_quoted"]')) as HTMLElement[];
  }

  private static findMicrosoftQuotes(doc: Document) {
    // MS office adds an element with a specific border
    const msQuoteBorder = Array.from(doc.querySelectorAll('[style*="border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0cm 0cm 0cm"]')) as HTMLElement[];
    return flatten(msQuoteBorder.map(this.collectAllNodesBelow));
  }


  private static findBlockquoteQuotes(doc: Document) {
    return Array.from(doc.querySelectorAll('blockquote'));
  }

  private static findQuotesAfterMessageHeaderBlock(doc: Document) {
    // This detector looks for a element in the DOM tree containing
    // three children: <b>Sent:</b> or <b>Date:</b> and <b>To:</b> and
    // <b>Subject:</b>. It then returns every node after that as quoted text.

    // Find a DOM node exactly matching <b>Sent:</b>
    const dateXPath = `
      //b[. = 'Sent:'] |
      //b[. = 'Date:'] |
      //b[. = 'Sent: '] |
      //b[. = 'Date: '] |
      //span[. = 'Sent: '] |
      //span[. = 'Date: '] |
      //span[. = 'Sent:'] |
      //span[. = 'Date:']`;
    const dateMarker = doc.evaluate(dateXPath, doc.body, null, FIRST_ORDERED_NODE_TYPE, null)
      .singleNodeValue;

    if (!dateMarker) {
      return [];
    }

    // check to see if the parent container also contains the other two
    const headerContainer = dateMarker.parentElement;
    let matches = 0;
    for (const node of Array.from(headerContainer?.children || [])) {
      const tc = (node as any).textContent.trim();
      if (tc === 'To:' || tc === 'Subject:') {
        matches++;
      }
    }
    if (matches !== 2) {
      return [];
    }

    // got a hit! let's cut some text.
    const quotedTextNodes = this.collectAllNodesBelow(headerContainer);

    // Special case to add "From:" because it's often detatched from the rest of the
    // header fields. We just add it where ever it's located.
    const fromXPath = "//b[. = 'From:'] | //span[. = 'From:']| //span[. = 'From: ']";
    let from = doc.evaluate(fromXPath, doc.body, null, FIRST_ORDERED_NODE_TYPE, null)
      .singleNodeValue as HTMLElement | null;

    if (from) {
      if (from.nodeName === 'SPAN') {
        from = from.parentElement;
      }
      if (from) {
        quotedTextNodes.push(from);
      }
    }

    return quotedTextNodes;
  }

  private static collectAllNodesBelow = (headerContainer: HTMLElement | null) => {
    // The headers container and everything past it in the document is quoted text.
    // This traverses the DOM, walking up the tree and adding all siblings below
    // our current path to the array.
    let head: HTMLElement | null = headerContainer;
    const results = [];
    while (head) {
      results.push(head);
      while (head && !head.nextElementSibling) {
        head = head.parentElement;
      }
      if (head) {
        head = head.nextElementSibling as HTMLElement | null;
      }
    }
    return results;
  };


  private static isElementFollowedByUnquotedElement(el: HTMLElement, quoteElements: HTMLElement[]) {
    const seen: HTMLElement[] = [];
    let head = el;

    while (head) {
      // advance to the next sibling, or the parent's next sibling
      while (head && !head.nextSibling) {
        head = head.parentNode as HTMLElement;
      }
      if (!head) {
        break;
      }
      head = head.nextSibling as HTMLElement;

      // search this branch of the tree for any text nodes / images that
      // are not contained within a matched quoted text block. We mark
      // the subtree as "seen" because we traverse upwards, and would
      // re-evaluate the subtree on each iteration otherwise.
      const pile = [head];
      let node = null;

      while ((node = pile.pop())) {
        if (seen.includes(node)) {
          continue;
        }
        if (quoteElements.includes(node)) {
          continue;
        }
        if (node.childNodes) {
          pile.concat(Array.from(node.childNodes) as HTMLElement[]);
        }
        if (node.nodeName === 'IMG' && !looksLikeTrackingPixel(node)) {
          return true;
        }
        if (node.nodeType === Node.TEXT_NODE && !isEmptyishTextContent(node)) {
          return true;
        }
      }
      seen.push(head);
    }

    return false;
  }


  private static findQuotesAfter__OriginalMessage__(doc: Document) {
    // these are pulled from specific messages seen in the wild. I think that doing this
    // via Xpath is still more performant than writing code to traverse + examine?
    const originalMessageMarker = doc.evaluate(
      `//div[. = '-------- Original message --------'] |
       //div[. = '------ Original Message ------'] |
       //div[starts-with(., '-----Original Message-----')] |
       //i[. = '-------Original Message-------'] |
       //div[. = '---Original---']`,

      doc.body,
      null,
      FIRST_ORDERED_NODE_TYPE,
      null
    ).singleNodeValue as HTMLElement | null;

    if (!originalMessageMarker) {
      return [];
    }

    return this.collectAllNodesBelow(originalMessageMarker);
  }

  private static outputHTMLFor(doc: Document, { initialHTML }: { initialHTML?: string } = {}) {
    if (!doc.body) {
      const parsed = this.parseHTML('');
      if (parsed) {
        doc = parsed;
      }
    }
    doc.querySelectorAll('a').forEach(anchor => {
      anchor.target = '_blank';
    });
    if (initialHTML && (/<\s?head\s?>/i.test(initialHTML) || /<\s?body[\s>]/i.test(initialHTML))) {
      return doc.children[0].innerHTML;
    }
    return doc.body.innerHTML;
  }

}