WIP: impl spellchek worker

This commit is contained in:
Yukai Huang 2019-11-30 22:10:18 +08:00
parent 69bed30662
commit 5d596fbb52
No known key found for this signature in database
GPG Key ID: D4D3B2F0E99D4914
3 changed files with 218 additions and 0 deletions

View File

@ -0,0 +1,91 @@
import Typo from 'typo-js'
import { tokenizer } from './tokenizer'
let dictionaryDownloadUrls = {}
const typoMap = new Map()
let typo
function request (url) {
return new Promise(resolve => {
const req = new XMLHttpRequest()
req.open('GET', url, true)
req.onload = () => {
if (req.readyState === 4 && req.status === 200) {
resolve(req.responseText)
}
}
req.send(null)
})
}
async function runSeriesP (iterables, fn) {
const results = []
for (const iterable of iterables) {
results.push(await fn(iterable))
}
return results
}
function mapSeriesP (iterables, fn) {
return new Promise(resolve => {
resolve(runSeriesP(iterables, fn))
})
}
function createTypo (lang, affData, dicData) {
return new Typo(lang, affData, dicData, { platform: 'any' })
}
/**
* @param {string} lang
*/
async function findOrCreateTypoInstance (lang) {
// find existing typo instance
let typo = typoMap.get(lang)
if (typo) {
return typo
}
const [affData, dicData] = await mapSeriesP([
dictionaryDownloadUrls[lang].aff,
dictionaryDownloadUrls[lang].dic
], request)
typo = createTypo(lang, affData, dicData)
typoMap.set(lang, typo)
return typo
}
/* Worker exposed methods */
export function initializeDictionaryUrls (urls) {
dictionaryDownloadUrls = urls
}
/**
* @param {string} lang
*/
export async function setSpellChckerLang (lang) {
typo = await findOrCreateTypoInstance(lang)
}
/**
* @param {string} text
*/
export function check (text) {
const tokens = tokenizer(text)
return tokens.map(token => {
if (typo && !typo.check(word)) {
return {
...token,
severity: 'error',
}
} else {
// no error
return null
}
}).filter(Boolean)
}

View File

@ -0,0 +1,57 @@
import { serverurl } from '../../config'
import worker from './spellcheck.worker'
const spellcheckWorker = worker()
const dictionaryDownloadUrls = {
en_US: {
aff: `${serverurl}/vendor/codemirror-spell-checker/en_US.aff`,
dic: `${serverurl}/vendor/codemirror-spell-checker/en_US.dic`
},
de: {
aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de/index.aff',
dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de/index.dic'
},
de_AT: {
aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-AT/index.aff',
dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-AT/index.dic'
},
de_CH: {
aff: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-CH/index.aff',
dic: 'https://rawcdn.githack.com/wooorm/dictionaries/143091715eebbbdfa0e8936e117f9182514eebe6/dictionaries/de-CH/index.dic'
}
}
export const supportLanguages = Object.keys(dictionaryDownloadUrls)
(function (mod) {
mod(CodeMirror)
})(function (CodeMirror) {
spellcheckWorker
function validator (text) {
return lint(text).map(error => {
const {
ruleNames,
ruleDescription,
lineNumber: ln,
errorRange
} = error
const lineNumber = ln - 1
let start = 0; let end = -1
if (errorRange) {
[start, end] = errorRange.map(r => r - 1)
}
return {
messageHTML: `${ruleNames.join('/')}: ${ruleDescription}`,
severity: 'error',
from: CodeMirror.Pos(lineNumber, start),
to: CodeMirror.Pos(lineNumber, end)
}
})
}
CodeMirror.registerHelper('lint', 'markdown', validator)
})

View File

@ -0,0 +1,70 @@
class Stream {
constructor (text) {
if (typeof text !== 'string') {
throw TypeError('text should be string')
}
this.text = text
this.index = -1
this.length = text.length
}
peek () {
const peekIndex = this.index + 1
if (peekIndex >= this.length) {
return null
} else {
return this.text[peekIndex]
}
}
next () {
this.index += 1
if (this.index >= this.length) {
return null
} else {
return this.text[this.index]
}
}
}
/** @typedef {{ word: string, ch: number, lineNumber: number }} Token */
/**
*
* @param {string} text
* @returns {Token[]}
*/
export function tokenizer (text) {
const lineStreams = text.split('\n').map(l => new Stream(l))
const regexWord = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~ '
const tokens = []
lineStreams.forEach((stream, lineIndex) => {
let ch
let column = 0
let word = ''
while ((ch = stream.peek()) != null) {
if (regexWord.includes(ch)) {
if (word.length > 0) {
tokens.push({
word,
ch: column - word.length,
lineNumber: lineIndex
})
}
word = ''
} else {
word += ch
}
stream.next()
column += 1
}
})
return tokens
}