add regex for all wide chars.
This commit is contained in:
parent
758eef133c
commit
ccca1092e7
|
@ -549,6 +549,16 @@ exports.wideChars = new RegExp('(['
|
|||
+ '\\uffe0-\\uffe6'
|
||||
+ '])', 'g');
|
||||
|
||||
exports.allWide = new RegExp('('
|
||||
// 0x20000 - 0x2fffd:
|
||||
+ '[\\ud840-\\ud87f][\\udc00-\\udffd]'
|
||||
+ '|'
|
||||
// 0x30000 - 0x3fffd:
|
||||
+ '[\\ud880-\\ud8bf][\\udc00-\\udffd]'
|
||||
+ '|'
|
||||
+ exports.wideChars.source.slice(1, -1)
|
||||
+ ')', 'g');
|
||||
|
||||
// Regex to detect a surrogate pair.
|
||||
exports.surrogate = /[\ud800-\udbff][\udc00-\udfff]/g;
|
||||
|
||||
|
|
|
@ -2427,7 +2427,7 @@ Element.prototype.parseContent = function(noTags) {
|
|||
content = content.replace(unicode.wideChars, '$1_');
|
||||
} else {
|
||||
// no double-width: replace them with question-marks.
|
||||
content = content.replace(unicode.wideChars, '??');
|
||||
content = content.replace(unicode.allWide, '??');
|
||||
// delete combining characters since they're 0-width anyway.
|
||||
// NOTE: We could drop this, the non-surrogates would get changed to ? by
|
||||
// the unicode filter, and surrogates changed to ? by the surrogate
|
||||
|
|
Loading…
Reference in New Issue