WIP: impl spellchek worker

2025-01-12 01:44:15 +00:00 · 2019-11-30 22:10:18 +08:00 · 2019-11-30 22:10:18 +08:00 · 5d596fbb52
commit 5d596fbb52
parent 69bed30662
3 changed files with 218 additions and 0 deletions
--- a/public/js/lib/editor/spellcheck/spellcheck.worker.js
+++ b/public/js/lib/editor/spellcheck/spellcheck.worker.js
@ -0,0 +1,91 @@
 import Typo from 'typo-js'
 import { tokenizer } from './tokenizer'
 let dictionaryDownloadUrls = {}
 const typoMap = new Map()
 let typo
 function request (url) {
  return new Promise(resolve => {
    const req = new XMLHttpRequest()
    req.open('GET', url, true)
    req.onload = () => {
      if (req.readyState === 4 && req.status === 200) {
        resolve(req.responseText)
      }
    }
    req.send(null)
  })
 }
 async function runSeriesP (iterables, fn) {
  const results = []
  for (const iterable of iterables) {
    results.push(await fn(iterable))
  }
  return results
 }
 function mapSeriesP (iterables, fn) {
  return new Promise(resolve => {
    resolve(runSeriesP(iterables, fn))
  })
 }
 function createTypo (lang, affData, dicData) {
  return new Typo(lang, affData, dicData, { platform: 'any' })
 }
 /**
 * @param {string} lang
 */
 async function findOrCreateTypoInstance (lang) {
  // find existing typo instance
  let typo = typoMap.get(lang)
  if (typo) {
    return typo
  }
  const [affData, dicData] = await mapSeriesP([
    dictionaryDownloadUrls[lang].aff,
    dictionaryDownloadUrls[lang].dic
  ], request)
  typo = createTypo(lang, affData, dicData)
  typoMap.set(lang, typo)
  return typo
 }
 /* Worker exposed methods */
 export function initializeDictionaryUrls (urls) {
  dictionaryDownloadUrls = urls
 }
 /**
 * @param {string} lang
 */
 export async function setSpellChckerLang (lang) {
  typo = await findOrCreateTypoInstance(lang)
 }
 /**
 * @param {string} text
 */
 export function check (text) {
  const tokens = tokenizer(text)
  return tokens.map(token => {
    if (typo && !typo.check(word)) {
      return {
        ...token,
        severity: 'error',
      }
    } else {
      // no error
      return null
    }
  }).filter(Boolean)
 }
--- a/public/js/lib/editor/spellcheck/spellchecker.js
+++ b/public/js/lib/editor/spellcheck/spellchecker.js
@ -0,0 +1,57 @@
 import { serverurl } from '../../config'
 import worker from './spellcheck.worker'
 const spellcheckWorker = worker()
 const dictionaryDownloadUrls = {
  en_US: {
    aff: `${serverurl}/vendor/codemirror-spell-checker/en_US.aff`,
    dic: `${serverurl}/vendor/codemirror-spell-checker/en_US.dic`
  },
  de: {
    aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de/index.aff',
    dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de/index.dic'
  },
  de_AT: {
    aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-AT/index.aff',
    dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-AT/index.dic'
  },
  de_CH: {
    aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-CH/index.aff',
    dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-CH/index.dic'
  }
 }
 export const supportLanguages = Object.keys(dictionaryDownloadUrls)
 (function (mod) {
  mod(CodeMirror)
 })(function (CodeMirror) {
  spellcheckWorker
  function validator (text) {
    return lint(text).map(error => {
      const {
        ruleNames,
        ruleDescription,
        lineNumber: ln,
        errorRange
      } = error
      const lineNumber = ln - 1
      let start = 0; let end = -1
      if (errorRange) {
        [start, end] = errorRange.map(r => r - 1)
      }
      return {
        messageHTML: `${ruleNames.join('/')}: ${ruleDescription}`,
        severity: 'error',
        from: CodeMirror.Pos(lineNumber, start),
        to: CodeMirror.Pos(lineNumber, end)
      }
    })
  }
  CodeMirror.registerHelper('lint', 'markdown', validator)
 })
--- a/public/js/lib/editor/spellcheck/tokenizer.js
+++ b/public/js/lib/editor/spellcheck/tokenizer.js
@ -0,0 +1,70 @@
 class Stream {
  constructor (text) {
    if (typeof text !== 'string') {
      throw TypeError('text should be string')
    }
    this.text = text
    this.index = -1
    this.length = text.length
  }
  peek () {
    const peekIndex = this.index + 1
    if (peekIndex >= this.length) {
      return null
    } else {
      return this.text[peekIndex]
    }
  }
  next () {
    this.index += 1
    if (this.index >= this.length) {
      return null
    } else {
      return this.text[this.index]
    }
  }
 }
 /** @typedef {{ word: string, ch: number, lineNumber: number }} Token */
 /**
 *
 * @param {string} text
 * @returns {Token[]}
 */
 export function tokenizer (text) {
  const lineStreams = text.split('\n').map(l => new Stream(l))
  const regexWord = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~ '
  const tokens = []
  lineStreams.forEach((stream, lineIndex) => {
    let ch
    let column = 0
    let word = ''
    while ((ch = stream.peek()) != null) {
      if (regexWord.includes(ch)) {
        if (word.length > 0) {
          tokens.push({
            word,
            ch: column - word.length,
            lineNumber: lineIndex
          })
        }
        word = ''
      } else {
        word += ch
      }
      stream.next()
      column += 1
    }
  })
  return tokens
 }