From 5d596fbb521f5a8e16012932e04f8cbbdb949569 Mon Sep 17 00:00:00 2001 From: Yukai Huang Date: Sat, 30 Nov 2019 22:10:18 +0800 Subject: [PATCH] WIP: impl spellchek worker --- .../editor/spellcheck/spellcheck.worker.js | 91 +++++++++++++++++++ .../js/lib/editor/spellcheck/spellchecker.js | 57 ++++++++++++ public/js/lib/editor/spellcheck/tokenizer.js | 70 ++++++++++++++ 3 files changed, 218 insertions(+) create mode 100644 public/js/lib/editor/spellcheck/spellcheck.worker.js create mode 100644 public/js/lib/editor/spellcheck/spellchecker.js create mode 100644 public/js/lib/editor/spellcheck/tokenizer.js diff --git a/public/js/lib/editor/spellcheck/spellcheck.worker.js b/public/js/lib/editor/spellcheck/spellcheck.worker.js new file mode 100644 index 00000000..6e84eebd --- /dev/null +++ b/public/js/lib/editor/spellcheck/spellcheck.worker.js @@ -0,0 +1,91 @@ +import Typo from 'typo-js' +import { tokenizer } from './tokenizer' + +let dictionaryDownloadUrls = {} +const typoMap = new Map() +let typo + +function request (url) { + return new Promise(resolve => { + const req = new XMLHttpRequest() + req.open('GET', url, true) + req.onload = () => { + if (req.readyState === 4 && req.status === 200) { + resolve(req.responseText) + } + } + req.send(null) + }) +} + +async function runSeriesP (iterables, fn) { + const results = [] + for (const iterable of iterables) { + results.push(await fn(iterable)) + } + return results +} + +function mapSeriesP (iterables, fn) { + return new Promise(resolve => { + resolve(runSeriesP(iterables, fn)) + }) +} + +function createTypo (lang, affData, dicData) { + return new Typo(lang, affData, dicData, { platform: 'any' }) +} + +/** + * @param {string} lang + */ +async function findOrCreateTypoInstance (lang) { + // find existing typo instance + let typo = typoMap.get(lang) + if (typo) { + return typo + } + + const [affData, dicData] = await mapSeriesP([ + dictionaryDownloadUrls[lang].aff, + dictionaryDownloadUrls[lang].dic + ], request) + + typo = createTypo(lang, affData, dicData) + typoMap.set(lang, typo) + + return typo +} + +/* Worker exposed methods */ + +export function initializeDictionaryUrls (urls) { + dictionaryDownloadUrls = urls +} + +/** + * @param {string} lang + */ +export async function setSpellChckerLang (lang) { + typo = await findOrCreateTypoInstance(lang) +} + +/** + * @param {string} text + */ +export function check (text) { + const tokens = tokenizer(text) + + return tokens.map(token => { + if (typo && !typo.check(word)) { + return { + ...token, + severity: 'error', + } + } else { + // no error + return null + } + }).filter(Boolean) +} + diff --git a/public/js/lib/editor/spellcheck/spellchecker.js b/public/js/lib/editor/spellcheck/spellchecker.js new file mode 100644 index 00000000..d92f9682 --- /dev/null +++ b/public/js/lib/editor/spellcheck/spellchecker.js @@ -0,0 +1,57 @@ +import { serverurl } from '../../config' +import worker from './spellcheck.worker' + +const spellcheckWorker = worker() + +const dictionaryDownloadUrls = { + en_US: { + aff: `${serverurl}/vendor/codemirror-spell-checker/en_US.aff`, + dic: `${serverurl}/vendor/codemirror-spell-checker/en_US.dic` + }, + de: { + aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de/index.aff', + dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de/index.dic' + }, + de_AT: { + aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-AT/index.aff', + dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-AT/index.dic' + }, + de_CH: { + aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-CH/index.aff', + dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-CH/index.dic' + } +} + +export const supportLanguages = Object.keys(dictionaryDownloadUrls) + +(function (mod) { + mod(CodeMirror) +})(function (CodeMirror) { + spellcheckWorker + + function validator (text) { + return lint(text).map(error => { + const { + ruleNames, + ruleDescription, + lineNumber: ln, + errorRange + } = error + const lineNumber = ln - 1 + + let start = 0; let end = -1 + if (errorRange) { + [start, end] = errorRange.map(r => r - 1) + } + + return { + messageHTML: `${ruleNames.join('/')}: ${ruleDescription}`, + severity: 'error', + from: CodeMirror.Pos(lineNumber, start), + to: CodeMirror.Pos(lineNumber, end) + } + }) + } + + CodeMirror.registerHelper('lint', 'markdown', validator) +}) diff --git a/public/js/lib/editor/spellcheck/tokenizer.js b/public/js/lib/editor/spellcheck/tokenizer.js new file mode 100644 index 00000000..0cb026d4 --- /dev/null +++ b/public/js/lib/editor/spellcheck/tokenizer.js @@ -0,0 +1,70 @@ +class Stream { + constructor (text) { + if (typeof text !== 'string') { + throw TypeError('text should be string') + } + + this.text = text + this.index = -1 + this.length = text.length + } + + peek () { + const peekIndex = this.index + 1 + + if (peekIndex >= this.length) { + return null + } else { + return this.text[peekIndex] + } + } + + next () { + this.index += 1 + + if (this.index >= this.length) { + return null + } else { + return this.text[this.index] + } + } +} + +/** @typedef {{ word: string, ch: number, lineNumber: number }} Token */ +/** + * + * @param {string} text + * @returns {Token[]} + */ +export function tokenizer (text) { + const lineStreams = text.split('\n').map(l => new Stream(l)) + const regexWord = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~ ' + + const tokens = [] + + lineStreams.forEach((stream, lineIndex) => { + let ch + let column = 0 + let word = '' + + while ((ch = stream.peek()) != null) { + if (regexWord.includes(ch)) { + if (word.length > 0) { + tokens.push({ + word, + ch: column - word.length, + lineNumber: lineIndex + }) + } + word = '' + } else { + word += ch + } + + stream.next() + column += 1 + } + }) + + return tokens +}