import { BoundingBox, Coordinates, GoogleOCRData, Vertex } from '@src/types/ocr'
import { FieldNode, FieldType, Maybe } from '@src/graphql/types'
import { LINE_ITEM_ID_KEY_SEPARATOR, LINE_THRESHOLD } from '@src/utils/app_constants'
import { format, parse } from 'date-fns'
import { parseDateString } from '@src/utils/date'
import { isFallback } from './enum'

// This should be in sync with cauldron/cauldron/fields/field_type.py
const fieldTypeReplacementMap: Record<FieldType, Record<string, string>> = {
  [FieldType.Integer]: {
    S: '5',
    B: '8',
    O: '0',
    o: '0',
    I: '1',
    i: '1',
    l: '1',
  },
  [FieldType.Decimal]: {
    S: '5',
    B: '8',
    O: '0',
    o: '0',
    I: '1',
    i: '1',
    l: '1',
  },
  [FieldType.Date]: {
    S: '5',
    B: '8',
    O: '0',
    o: '0',
    I: '1',
    i: '1',
    l: '1',
  },
  [FieldType.DateTime]: {
    S: '5',
    B: '8',
    O: '0',
    o: '0',
    I: '1',
    i: '1',
    l: '1',
  },
  [FieldType.Price]: {
    S: '5',
    B: '8',
    O: '0',
    o: '0',
    I: '1',
    i: '1',
    l: '1',
  },
  [FieldType.ReferenceNumber]: {
    O: '0',
    o: '0',
  },
  [FieldType.Database]: {},
}

const cleanOCRErrors = (text: string, fieldType: Maybe<FieldType>): string => {
  let cleanedText = text
  if (fieldType) {
    for (const [key, value] of Object.entries(fieldTypeReplacementMap[fieldType])) {
      cleanedText = cleanedText.replace(new RegExp(`[${key}]+`, 'g'), value)
    }
  }
  return cleanedText
}

/*
 * Parses tuple of the form: (x, y)
 * Returns: [x, y] array
 */
export const parseTuple = (tuple: string): string[] => {
  return tuple.replace(/^\(|\s|\)$/g, '').split(',')
}

export const distSquared = (p1: Vertex, p2: Vertex): number => {
  const a = p1.x - p2.x
  const b = p1.y - p2.y
  const c = a * a + b * b
  return c
}

export const getGoogleOcrWordBoundingBoxes = (googleOcrData: GoogleOCRData): BoundingBox[] => {
  const page = googleOcrData.full_text_annotation.pages![0]
  const { width, height, blocks } = page
  const boundingBoxes: BoundingBox[] = []
  for (const block of blocks) {
    for (const paragraph of block.paragraphs || []) {
      for (const word of paragraph.words || []) {
        const { vertices } = word.bounding_box
        if (vertices.length < 4) {
          continue
        }
        for (const vertex of vertices) {
          if (vertex.x === undefined || vertex.y === undefined) {
            continue
          }
        }
        // we copy here otherwise its an in-place edit on our master ocr
        // data structure... and that means no shallow copy either.
        const topLeftVertex = { x: vertices[0].x / width, y: vertices[0].y / height }
        const topRightVertex = { x: vertices[1].x / width, y: vertices[1].y / height }
        const bottomRightVertex = { x: vertices[2].x / width, y: vertices[2].y / height }
        const bottomLeftVertex = { x: vertices[3].x / width, y: vertices[3].y / height }
        const scaledBoundingBox: BoundingBox = {
          vertices: [topLeftVertex, topRightVertex, bottomRightVertex, bottomLeftVertex],
        }
        boundingBoxes.push(scaledBoundingBox)
      }
    }
  }
  return boundingBoxes
}

type LetterAndCoords = {
  letter: string
  midpoint: Vertex
}

// TODO: sort on backend and here for some angle edge cases
const getSortedGoogleOCRDataLetters = (googleOcrData: GoogleOCRData): LetterAndCoords[][] => {
  const page = googleOcrData.full_text_annotation.pages![0]
  const { width } = page
  const { height } = page
  const { blocks } = page
  const words = []
  for (const block of blocks) {
    for (const paragraph of block.paragraphs || []) {
      for (const word of paragraph.words || []) {
        const letters = []
        // we copy here otherwise its an in-place edit on our master ocr
        // data structure... and that means no shallow copy either.
        for (const wordSymbol of word.symbols) {
          const letterText = wordSymbol.text
          const { vertices } = wordSymbol.bounding_box
          if (vertices.length < 3) {
            continue
          }
          for (const vertex of vertices) {
            if (vertex.x === undefined || vertex.y === undefined) {
              continue
            }
          }
          const topLeftVertex = { x: vertices[0].x / width, y: vertices[0].y / height }
          const topRightVertex = { x: vertices[1].x / width, y: vertices[1].y / height }
          const bottomLeftVertex = { x: vertices[3].x / width, y: vertices[3].y / height }
          const letterWithCoords = {
            letter: letterText,
            midpoint: {
              x: (topLeftVertex.x + topRightVertex.x) / 2,
              y: (topLeftVertex.y + bottomLeftVertex.y) / 2,
            },
          }
          letters.push(letterWithCoords)
        }
        words.push(letters)
      }
    }
  }
  return words
}

/*
 * Given a dictionary of words, extract the word/s given by range
 * from `start` to `end`
 * Return: string
 */
export const extractWordFromCoordinates: (
  googleOcrData: GoogleOCRData,
  start: Coordinates,
  end: Coordinates,
) => string = (googleOcrData, start, end) => {
  const maxX = Math.max(start.x, end.x)
  const minX = Math.min(start.x, end.x)
  const minY = Math.min(start.y, end.y)
  const maxY = Math.max(start.y, end.y)
  const sortedWords = getSortedGoogleOCRDataLetters(googleOcrData)

  let str = ''
  let midpointY = sortedWords[0][0].midpoint.y
  sortedWords.forEach((wordLetters: LetterAndCoords[]) => {
    let word = ''
    for (const letterAndCoords of wordLetters) {
      const { midpoint } = letterAndCoords
      const needsNewline = midpoint.y - midpointY > LINE_THRESHOLD
      if (midpoint.x >= minX && midpoint.x <= maxX && midpoint.y >= minY && midpoint.y <= maxY) {
        if (needsNewline) {
          str += '\n'
        }
        word += letterAndCoords.letter
        midpointY = midpoint.y
      }
    }
    str += `${word} `
  })
  return str.trim()
}

const cleanInvalidCharRegex = (text: string, invalidCharsRegex: Maybe<string>): string => {
  let cleanedText = text
  if (invalidCharsRegex) {
    cleanedText = cleanedText.replace(new RegExp(`[${invalidCharsRegex}]+`, 'g'), '')
  }
  return cleanedText
}

export type CleanTextFromImageType = {
  text: string
  err: string | null
}

export const formatFieldToDateFormatString = (
  text: string,
  dateFormatString: string,
): CleanTextFromImageType => {
  // NOTE: javascript dates can be REALLY WEIRD.
  // you would think that new Date(' 2020-04-31')  would be equal to new Date('2020-04-31'), right?
  // Wrong!! The first one parses to may first, 2020, and the second parses to apr 30, 2020!!
  // This is just one of many unexpected behaviors.
  // I'm not really sure there is anything we can do about this, but I wanted to document it in case
  // it's surfaced as a bug later.
  const dateErrString =
    'The extracted data has an invalid date format. Please manually input it into the desired format instead'
  // replace hyphens with slashes cause JavaScript date module works weirdly with hyphens (timezone shift)
  const textWithSlashes = text.replace(/-(?!(\d+:))/g, '/')
  let dateFromText = parseDateString(textWithSlashes)
  // use a best-effort matching because maybe the string will be like 20 - nov - 20 which parses as invalid,
  // but 20-nov-20 does not. A null date.valueOf() matches invalid date
  // if not valid try parsing with nonstandard Date formats
  if (!dateFromText.valueOf()) {
    // clean whitespace
    const cleanedDate = textWithSlashes.replace(/\s/g, '')
    dateFromText = parseDateString(cleanedDate)
    const referenceDate = new Date()
    if (!dateFromText.valueOf()) {
      dateFromText = parse(cleanedDate, 'yyyy/MM/dd-HH:mm:ss', referenceDate)
      if (!dateFromText.valueOf()) {
        // handle dd MM yyyy format, needs ops verification however in case of discrepancies with MM dd yyyy
        dateFromText = parse(cleanedDate, 'dd/MM/yyyy', referenceDate)
        if (!dateFromText.valueOf()) {
          dateFromText = parse(cleanedDate, 'dd/MM/yyyy-HH:mm:ss', referenceDate)
        }
      }
    }
  }
  // see https://github.com/date-fns/date-fns/blob/master/docs/unicodeTokens.md
  // for why we convert to M (date) and H (time).
  // I think it is safe to assume we never mean "local week of the year" with Y.
  let convertedFormatString = dateFormatString.toLowerCase()
  convertedFormatString = convertedFormatString
    .replace(/(?=\S*[-/])([m]+)/, (match) => match.toUpperCase())
    .replace(/(?=\S*[:])([h]+)/, (match) => match.toUpperCase())
  convertedFormatString = convertedFormatString.replace(/\(/g, '')
  convertedFormatString = convertedFormatString.replace(/\)/g, '')
  if (dateFromText) {
    try {
      return { text: format(dateFromText, convertedFormatString), err: '' }
      // date parsing could error out, so better to not crash entire page
    } catch {
      return { text, err: dateErrString }
    }
  } else {
    return { text, err: dateErrString }
  }
}

export const cleanTextFromImage = (
  activeFieldKey: string,
  repeatableFieldKeyMap: Record<string, FieldNode>,
  nonRepeatableFieldKeyMap: Record<string, FieldNode>,
  copiedTextFromImage: string,
): CleanTextFromImageType => {
  const activeFieldIsRepeatable = activeFieldKey.includes(LINE_ITEM_ID_KEY_SEPARATOR)
  const fieldNode = activeFieldIsRepeatable
    ? repeatableFieldKeyMap[activeFieldKey]
    : nonRepeatableFieldKeyMap[activeFieldKey]
  let cleanedTextFromImage = cleanOCRErrors(
    copiedTextFromImage,
    fieldNode?.fieldType && !isFallback(fieldNode.fieldType) ? fieldNode.fieldType.value : null,
  )
  let err = null
  if (fieldNode?.dateFormatString) {
    const cleanedTextFromImageObj = formatFieldToDateFormatString(
      cleanedTextFromImage,
      fieldNode?.dateFormatString,
    )
    cleanedTextFromImage = cleanedTextFromImageObj.text
    err = cleanedTextFromImageObj.err
  }
  const invalidCharsRegex = activeFieldIsRepeatable
    ? repeatableFieldKeyMap[activeFieldKey.split(LINE_ITEM_ID_KEY_SEPARATOR)[1]].invalidCharsRegex
    : nonRepeatableFieldKeyMap[activeFieldKey].invalidCharsRegex
  cleanedTextFromImage = invalidCharsRegex
    ? cleanedTextFromImage.replace(new RegExp(`[${invalidCharsRegex}]+`, 'g'), '')
    : cleanedTextFromImage
  return {
    text: cleanedTextFromImage,
    err,
  }
}

export const applyCleanForField = (
  text: string,
  invalidCharsRegex: Maybe<string>,
  fieldType: Maybe<FieldType>,
  fieldNode: Maybe<FieldNode>,
): string => {
  let cleanedTextFromImage = text
  cleanedTextFromImage = cleanOCRErrors(cleanedTextFromImage, fieldType)
  cleanedTextFromImage = cleanInvalidCharRegex(cleanedTextFromImage, invalidCharsRegex)
  if (fieldNode?.dateFormatString) {
    const cleanedTextFromImageObj = formatFieldToDateFormatString(
      cleanedTextFromImage,
      fieldNode!.dateFormatString,
    )
    cleanedTextFromImage = cleanedTextFromImageObj.text
  }
  return cleanedTextFromImage
}
