import domify from 'domify';

function sanitize(el, fn) {
  for (var i = el.childNodes.length - 1; i > -1; i--) {
    sanitize(el.childNodes[i], fn)
  }

  if (fn(el) !== false) {
    return el
  } else if (el.parentNode) {
    removeDomLayer(el)
    return
  } else {
    return domArray(el.childNodes)
  }
}

function removeDomLayer(el) {
  while (el.childNodes.length) {
    el.parentNode.insertBefore(el.lastChild, el)
  }
  el.parentNode.removeChild(el)
}

function domArray(arr) {
  if (arr.length == 0) return
  if (arr.length == 1) return arr[0]

  var frag = document.createDocumentFragment()
  while (arr.length) {
    frag.appendChild(arr[0])
  }
  return frag
}

const ATTR_DIM = 1
const ATTR_SRC = 2
const ATTR_HREF = 3

function sanitizeAttr(ele, name, type, attr, url) {
  let a = ele.attributes[name], m
  if (a && a.value) {
    let val = a.value
    switch (type) {
      case ATTR_DIM:
        m = val.match(/^(\d+)$|^(\d+)%$/)
        if (!m) {
          return false
        }
        if (m[2]) {
          let n = Number(m[2])
          if (n < 10 && n > 100) {
            return false
          }
        }
      break
      case ATTR_SRC:
        m = a.value.match(/^((https?|hyper):)?\/\/|(\w+:\/\/)/)
        if (m) {
          if (m[3]) {
            return false
          }
        } else {
          val = new URL(val, url).toString()
        }
      break
      case ATTR_HREF:
        m = a.value.match(/^((https?|ftp|mailto|hyper):)?\/\/|(\w+:\/\/)/)
        if (m) {
          if (!m[3]) {
            attr.target = '_blank'
          } else {
            return false
          }
        } else {
          attr.target = '_blank'
          val = new URL(val, url).toString()
        }
      break
    }

    attr[name] = val
    return true
  }
  return false
}

function sanitizeMod(html, url) {
  let dirty = domify(html)
  if (dirty.nodeType >= 2 && dirty.nodeType <= 4) {
    return html
  } else if (dirty.nodeType >= 5 && dirty.nodeType <= 10) {
    return ''
  }

  return sanitize(dirty, ele => {
    let attr = {}
    if ((ele.nodeType >= 2 && ele.nodeType <= 4) || ele.nodeType === 11) {
      return true
    } else if (ele.nodeType >= 5 && ele.nodeType <= 10) {
      return false
    }

    switch (ele.tagName) {
      case 'STYLE': case 'SCRIPT':
        ele.parentNode.removeChild(ele)
        return false
      case 'AUDIO':
      case 'VIDEO':
        sanitizeAttr(ele, 'width', ATTR_DIM, attr)
        sanitizeAttr(ele, 'height', ATTR_DIM, attr)
        attr.controls = 'true'
        break
      case 'SOURCE':
        sanitizeAttr(ele, 'type', 0, attr)
        if (!sanitizeAttr(ele, 'src', ATTR_SRC, attr, url)) {
          return false
        }
        break
      case 'TRACK':
        sanitizeAttr(ele, 'kind', 0, attr)
        sanitizeAttr(ele, 'srclang', 0, attr)
        sanitizeAttr(ele, 'label', 0, attr)
        if (!sanitizeAttr(ele, 'src', ATTR_SRC, attr, url)) {
          return false
        }
        break
      case 'IFRAME':
      case 'IMG':
        sanitizeAttr(ele, 'width', ATTR_DIM, attr)
        sanitizeAttr(ele, 'height', ATTR_DIM, attr)
        if (!sanitizeAttr(ele, 'src', ATTR_SRC, attr, url)) {
          return false
        }
        break
      case 'A':
        sanitizeAttr(ele, 'alt', 0, attr)
        if (!sanitizeAttr(ele, 'href', ATTR_HREF, attr, url)) {
          return false
        }
      case 'DFN': case 'ABBR':
        sanitizeAttr(ele, 'title', 0, attr)

      //
      // With text markup, eliminate tags that are basically empty.
      //
      case 'BLOCKQUOTE': case 'P': case 'NL': case 'LABEL':
      case 'CODE': case 'CAPTION': case 'CITE': case 'LI': case 'ADDRESS':
      case 'TH': case 'TD': case 'PRE': case 'DT': case 'DD':
      case 'H1': case 'H2': case 'H3': case 'H4': case 'H5': case 'H6':
      case 'FIGCAPTION': case 'SUMMARY':
        if (!ele.textContent.trim() && !ele.children.length) {
          return false
        }
        break

      //
      // With container elements, remove them if they have no children
      //
      case 'DL': case 'DI': case 'UL': case 'OL': case 'DEL': case 'INS':
      case 'B': case 'I': case 'STRONG': case 'EM': case 'STRIKE':
      case 'S': case 'SMALL': case 'SUB': case 'SUP': case 'U':
      case 'TABLE': case 'THEAD': case 'TBODY': case 'TFOOT': case 'TR':
      case 'DETAILS': case 'FIGURE': case 'ARTICLE': case 'ASIDE':
        if (ele.childNodes.length === 0) {
          return false
        }

      case 'HR':
        break
      case 'BR': case 'WBR':
        if (ele.parentNode.firstChild == ele || ele.parentNode.lastChild == ele)
          return false
        break
      default:
        return false
    }

    while (ele.attributes.length > 0) {
      ele.removeAttribute(ele.attributes[0].name)
    }

    for (let k in attr) {
      ele.setAttribute(k, attr[k])
    }

    return true
  })
}

sanitizeMod.html = function(html, url) {
  let san = sanitizeMod(html, url)
  if (san.tagName === 'IMG') {
    return {tag: "img", src: san.src}
  }

  let frag = document.createElement('div')
  frag.appendChild(san)
  return {tag: "html", text: frag.innerHTML}
}

export default sanitizeMod
