From 5d596fbb521f5a8e16012932e04f8cbbdb949569 Mon Sep 17 00:00:00 2001
From: Yukai Huang <yukaihuangtw@gmail.com>
Date: Sat, 30 Nov 2019 22:10:18 +0800
Subject: [PATCH] WIP: impl spellchek worker

---
 .../editor/spellcheck/spellcheck.worker.js    | 91 +++++++++++++++++++
 .../js/lib/editor/spellcheck/spellchecker.js  | 57 ++++++++++++
 public/js/lib/editor/spellcheck/tokenizer.js  | 70 ++++++++++++++
 3 files changed, 218 insertions(+)
 create mode 100644 public/js/lib/editor/spellcheck/spellcheck.worker.js
 create mode 100644 public/js/lib/editor/spellcheck/spellchecker.js
 create mode 100644 public/js/lib/editor/spellcheck/tokenizer.js

diff --git a/public/js/lib/editor/spellcheck/spellcheck.worker.js b/public/js/lib/editor/spellcheck/spellcheck.worker.js
new file mode 100644
index 00000000..6e84eebd
--- /dev/null
+++ b/public/js/lib/editor/spellcheck/spellcheck.worker.js
@@ -0,0 +1,91 @@
+import Typo from 'typo-js'
+import { tokenizer } from './tokenizer'
+
+let dictionaryDownloadUrls = {}
+const typoMap = new Map()
+let typo
+
+function request (url) {
+  return new Promise(resolve => {
+    const req = new XMLHttpRequest()
+    req.open('GET', url, true)
+    req.onload = () => {
+      if (req.readyState === 4 && req.status === 200) {
+        resolve(req.responseText)
+      }
+    }
+    req.send(null)
+  })
+}
+
+async function runSeriesP (iterables, fn) {
+  const results = []
+  for (const iterable of iterables) {
+    results.push(await fn(iterable))
+  }
+  return results
+}
+
+function mapSeriesP (iterables, fn) {
+  return new Promise(resolve => {
+    resolve(runSeriesP(iterables, fn))
+  })
+}
+
+function createTypo (lang, affData, dicData) {
+  return new Typo(lang, affData, dicData, { platform: 'any' })
+}
+
+/**
+ * @param {string} lang
+ */
+async function findOrCreateTypoInstance (lang) {
+  // find existing typo instance
+  let typo = typoMap.get(lang)
+  if (typo) {
+    return typo
+  }
+
+  const [affData, dicData] = await mapSeriesP([
+    dictionaryDownloadUrls[lang].aff,
+    dictionaryDownloadUrls[lang].dic
+  ], request)
+
+  typo = createTypo(lang, affData, dicData)
+  typoMap.set(lang, typo)
+
+  return typo
+}
+
+/* Worker exposed methods */
+
+export function initializeDictionaryUrls (urls) {
+  dictionaryDownloadUrls = urls
+}
+
+/**
+ * @param {string} lang
+ */
+export async function setSpellChckerLang (lang) {
+  typo = await findOrCreateTypoInstance(lang)
+}
+
+/**
+ * @param {string} text
+ */
+export function check (text) {
+  const tokens = tokenizer(text)
+
+  return tokens.map(token => {
+    if (typo && !typo.check(word)) {
+      return {
+        ...token,
+        severity: 'error',
+      }
+    } else {
+      // no error
+      return null
+    }
+  }).filter(Boolean)
+}
+
diff --git a/public/js/lib/editor/spellcheck/spellchecker.js b/public/js/lib/editor/spellcheck/spellchecker.js
new file mode 100644
index 00000000..d92f9682
--- /dev/null
+++ b/public/js/lib/editor/spellcheck/spellchecker.js
@@ -0,0 +1,57 @@
+import { serverurl } from '../../config'
+import worker from './spellcheck.worker'
+
+const spellcheckWorker = worker()
+
+const dictionaryDownloadUrls = {
+  en_US: {
+    aff: `${serverurl}/vendor/codemirror-spell-checker/en_US.aff`,
+    dic: `${serverurl}/vendor/codemirror-spell-checker/en_US.dic`
+  },
+  de: {
+    aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de/index.aff',
+    dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de/index.dic'
+  },
+  de_AT: {
+    aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-AT/index.aff',
+    dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-AT/index.dic'
+  },
+  de_CH: {
+    aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-CH/index.aff',
+    dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-CH/index.dic'
+  }
+}
+
+export const supportLanguages = Object.keys(dictionaryDownloadUrls)
+
+(function (mod) {
+  mod(CodeMirror)
+})(function (CodeMirror) {
+  spellcheckWorker
+
+  function validator (text) {
+    return lint(text).map(error => {
+      const {
+        ruleNames,
+        ruleDescription,
+        lineNumber: ln,
+        errorRange
+      } = error
+      const lineNumber = ln - 1
+
+      let start = 0; let end = -1
+      if (errorRange) {
+        [start, end] = errorRange.map(r => r - 1)
+      }
+
+      return {
+        messageHTML: `${ruleNames.join('/')}: ${ruleDescription}`,
+        severity: 'error',
+        from: CodeMirror.Pos(lineNumber, start),
+        to: CodeMirror.Pos(lineNumber, end)
+      }
+    })
+  }
+
+  CodeMirror.registerHelper('lint', 'markdown', validator)
+})
diff --git a/public/js/lib/editor/spellcheck/tokenizer.js b/public/js/lib/editor/spellcheck/tokenizer.js
new file mode 100644
index 00000000..0cb026d4
--- /dev/null
+++ b/public/js/lib/editor/spellcheck/tokenizer.js
@@ -0,0 +1,70 @@
+class Stream {
+  constructor (text) {
+    if (typeof text !== 'string') {
+      throw TypeError('text should be string')
+    }
+
+    this.text = text
+    this.index = -1
+    this.length = text.length
+  }
+
+  peek () {
+    const peekIndex = this.index + 1
+
+    if (peekIndex >= this.length) {
+      return null
+    } else {
+      return this.text[peekIndex]
+    }
+  }
+
+  next () {
+    this.index += 1
+
+    if (this.index >= this.length) {
+      return null
+    } else {
+      return this.text[this.index]
+    }
+  }
+}
+
+/** @typedef {{ word: string, ch: number, lineNumber: number }} Token */
+/**
+ *
+ * @param {string} text
+ * @returns {Token[]}
+ */
+export function tokenizer (text) {
+  const lineStreams = text.split('\n').map(l => new Stream(l))
+  const regexWord = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~ '
+
+  const tokens = []
+
+  lineStreams.forEach((stream, lineIndex) => {
+    let ch
+    let column = 0
+    let word = ''
+
+    while ((ch = stream.peek()) != null) {
+      if (regexWord.includes(ch)) {
+        if (word.length > 0) {
+          tokens.push({
+            word,
+            ch: column - word.length,
+            lineNumber: lineIndex
+          })
+        }
+        word = ''
+      } else {
+        word += ch
+      }
+
+      stream.next()
+      column += 1
+    }
+  })
+
+  return tokens
+}