import _ from "lodash";
import { normalize } from '../../../../components/interop/utils';

const areClose = (a,b,scale) => {
  return Math.abs(a-b)/scale < 0.05
}

function generateGlobalStyles(styleStats) {
  if (_.isEmpty(styleStats)) {
    return {};
  }

  let topStyles = _.sortBy(_.toPairs(styleStats), '1.charCount').reverse();

  let colors = ['#333', '#0000a0', '#006d00', '#7e36aa', '#167f90'];
  let globalStyles = {}

  let totalChars = _.sum(_.map(topStyles, ([key, stats]) => stats.charCount));

  let top = topStyles[0][1];
  // If there is no global dominant style, probabbly same styles have same name
  let noGlobalStyle = top.charCount / totalChars < 0.3;

  _.each(topStyles, ([styleKey, { avgCharWidth, charCount }], i) => {
    let color = '#af4795';
    if (noGlobalStyle) {
      color = (charCount / totalChars > 0.01) ? 'black' : colors[i % (colors.length - 1) + 1];
    } else if (colors[i]) {
      color = colors[i];
    }

    globalStyles[styleKey] = { color };

    if (avgCharWidth > 0.5) {
      globalStyles[styleKey].fontWeight = '700';

      if (avgCharWidth > 0.535) {
        globalStyles[styleKey].fontFamily = '"Montserrat"';
      }
    } else if (avgCharWidth < 0.42) {
      globalStyles[styleKey].fontFamily = '"Roboto Condensed"';
    }
  });

  if (topStyles.length > 3) {
    let [[s1, normalText], [s2, top2], [s3, top3]] = topStyles;

    globalStyles[s2].color = top2.avgFontSize < normalText.avgFontSize ** 0.97 ? "gray" : globalStyles[s2].color;
    globalStyles[s3].color = top3.avgFontSize < normalText.avgFontSize * 0.97 ? "gray" : globalStyles[s3].color;
  }


  // console.log(_.map(topStyles, ([key,stats]) => {
  //   return `[${key}] ${(stats.charCount/totalChars*100).toFixed(1)}% FontSize ${stats.avgFontSize.toFixed(1)}px Width ${(stats.avgCharWidth*100).toFixed(1)}`
  // }).join(`\n`))

  return globalStyles;
}

const offsetChar = (char, n) => String.fromCharCode(char.charCodeAt(0) + n);

function postProcessText(pages) {
  // Detect if it is one of those pdf with all the chars offsetted
  const usedOffsets = new Set();
  _.each(pages, (page, i) => {
    let charsRanking = {};
    let allText = _.map(page.pageItems, 'textContent').join('');

    if(allText.length > 30) {
      for(let j = 0;j < allText.length;j++) {
        const c = allText[j];
        charsRanking[c] = (charsRanking[c] || 0)+1;
      }

      const topChars = _.sortBy(_.toPairs(charsRanking),'1').reverse();

      if(topChars.length < 2) {
        return;
      }

      let [c1, count1] = topChars[0];
      let [c2, count2] = topChars[1];

      if(c1 !== ' ' && c2 !==' ') {

        // Compute the offset between space and the most common char
        let offset = 0;
        let match = false;
        const top20Chars = _.map(topChars.slice(0,10), '0');
        // Some of the top 20 chars has to be an 'e'

        for(const offSetCandidate of top20Chars.map(c => ' '.charCodeAt(0) - c.charCodeAt(0)).concat(... usedOffsets)) {
          offset = offSetCandidate
          let fixedTop20Chars = top20Chars.map(c => offsetChar(c, offset));
          if(_.filter(fixedTop20Chars, c => !c.match(/[\w\s_\-.]/)).length <= 2) {
            match = true;
            usedOffsets.add(offset);
            break;
          }
        }
        if(!match) {
          let fixedTop20Chars = top20Chars.map(c => offsetChar(c, offset));
          console.warn(`BROKEN encoding (pag ${i}, "${fixedTop20Chars}", offsset: ${offset})`, allText.slice(0,10)+'...')
          // debugger;
        }

        // Offset chars to take the most common one to space (' ')
        _.each(page.pageItems, p => p.textContent = _.map(p.textContent, c => offsetChar(c, offset)).join(''))
      } else {
        // console.warn('Encoding ok', allText.slice(0,30)+'...')
      }
    }
  });

  // Generate normalized text version without special chars for searching
  _.each(pages, (page, i) => {
    page.index = i;
    page.allText = _.map(page.pageItems, (item,i) => {
      let normalizedText = normalize(item.textContent || "", true, false);

      // The following logics decides if it has to add an space between two blocks of texts, by checking if one block
      // Is almost right next to the following one. Some PDFs have the text lines broken in multiple fragments, in the
      // middle of words, and therefore have to ve joined without spaces.
      let nextItem = page.pageItems[i+1];
      if(nextItem) {
        let {left, top, width, height} = item.style;
        let n = nextItem.style;

        // TODO: This does not work for vertical text
        if (!(areClose(width + left, n.left, height) && areClose(height, n.height, height) && areClose(top, n.top, height))) {
          normalizedText += ' ';
        }
      }

      item.normalizedText = normalizedText;

      return item.normalizedText;
    }).join("").replace(/\s+/gi, ' ');
  });
}

export function decodePDF(pages) {
  if (!pages || !pages.length)
    return pages;

  let decodedPages = [];

  let styleStats = {};

  const updateStyleStats = (textContent, { fontName, fontSize, width, height, transform }) => {
    if (!styleStats[fontName]) {
      styleStats[fontName] = { charCount: 0, widthSum: 0, fontSizeSum: 0, count: 0 };
    }
    styleStats[fontName].charCount += textContent.length;
    styleStats[fontName].count += 1;
    styleStats[fontName].widthSum += width / fontSize;
    styleStats[fontName].fontSizeSum += fontSize;
  };

  let lastViewport, lastStyles, lastStyle = {};
  let props = ['fontName', 'fontSize', 'left', 'top', 'width', 'height', 'transform'];

  for (const [viewportC, stylesC, pageItemsC] of pages) {
    let viewport = viewportC === -1 ? lastViewport : (lastViewport = viewportC);

    let styles = stylesC === -1 ? lastStyles : (lastStyles = stylesC);

    let pageItems = [];
    for(const [textContent, styleC] of pageItemsC) {
      let [fontName, fontSize, left, top, width, height, transform] = styleC;

      let style = { fontName, fontSize, left, top, width, height, transform };

      style = _.mapValues(style, (val, prop) => {
        if (val === -1) {
          return (val = lastStyle[prop]);
        } else {
          return (lastStyle[prop] = val);
        }
      });

      updateStyleStats(textContent, style);

      pageItems.push({ textContent, style });
    }

    decodedPages.push({ viewport, styles, pageItems })
  }

  _.each(styleStats, (stat) => {
    stat.avgCharWidth = stat.widthSum / stat.charCount;
    stat.avgFontSize = stat.fontSizeSum / stat.count;
  })


  let globalStyles = generateGlobalStyles(styleStats);

  postProcessText(decodedPages);

  return { pages: decodedPages, globalStyles };
}
