handle unicode combining characters properly.
This commit is contained in:
parent
dfb87e2037
commit
d520c94c3b
12
README.md
12
README.md
|
@ -276,14 +276,14 @@ The screen on which every other node renders.
|
|||
│ box1 │ box2 │
|
||||
└─────────┴─────────┘
|
||||
```
|
||||
- __fullUnicode__ - allow for rendering of East Asian double-width characters.
|
||||
also enable proper rendering of utf-16 surrogate pairs. this allows you to
|
||||
- __fullUnicode__ - allow for rendering of East Asian double-width characters,
|
||||
utf-16 surrogate pairs, and unicode combining characters. this allows you to
|
||||
display text above the basic multilingual plane. this is behind an option
|
||||
because it may affect performance slightly negatively. without this option
|
||||
enabled, all double-width and surrogate pair characters will be replaced by
|
||||
`??` and `?` respectively. (NOTE: libvte (e.g. gnome-terminal) cannot display
|
||||
characters that are both surrogate pairs _and_ double-width properly. there
|
||||
is no way for blessed to fix this unfortunately).
|
||||
enabled, all double-width, surrogate pair, and combining characters will be
|
||||
replaced by `??`, `?`, `` respectively. (NOTE: libvte (e.g. gnome-terminal)
|
||||
cannot display characters that are both surrogate pairs _and_ double-width
|
||||
properly. there is no way for blessed to fix this unfortunately).
|
||||
|
||||
##### Properties:
|
||||
|
||||
|
|
|
@ -12,7 +12,8 @@
|
|||
var program = require('./program')
|
||||
, tput = require('./tput')
|
||||
, widget = require('./widget')
|
||||
, colors = require('./colors');
|
||||
, colors = require('./colors')
|
||||
, unicode = require('./unicode');
|
||||
|
||||
/**
|
||||
* Blessed
|
||||
|
@ -24,8 +25,9 @@ function blessed() {
|
|||
|
||||
blessed.program = blessed.Program = program;
|
||||
blessed.tput = blessed.Tput = tput;
|
||||
blessed.colors = colors;
|
||||
blessed.widget = widget;
|
||||
blessed.colors = colors;
|
||||
blessed.unicode = unicode;
|
||||
|
||||
Object.keys(blessed.widget).forEach(function(name) {
|
||||
blessed[name] = blessed.widget[name];
|
||||
|
|
256
lib/unicode.js
256
lib/unicode.js
|
@ -79,6 +79,33 @@
|
|||
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
// String.fromCodePoint
|
||||
//
|
||||
// Copyright Mathias Bynens <https://mathiasbynens.be/>
|
||||
// https://github.com/mathiasbynens/String.fromCodePoint
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining
|
||||
// a copy of this software and associated documentation files (the
|
||||
// "Software"), to deal in the Software without restriction, including
|
||||
// without limitation the rights to use, copy, modify, merge, publish,
|
||||
// distribute, sublicense, and/or sell copies of the Software, and to
|
||||
// permit persons to whom the Software is furnished to do so, subject to
|
||||
// the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
var stringFromCharCode = String.fromCharCode;
|
||||
var floor = Math.floor;
|
||||
|
||||
exports.charWidth = function(str, i) {
|
||||
var point = typeof str !== 'number'
|
||||
? exports.codePointAt(str, i || 0)
|
||||
|
@ -355,7 +382,7 @@ exports.isSurrogate = function(str, i) {
|
|||
return point > 0x00ffff;
|
||||
};
|
||||
|
||||
exports.combining = [
|
||||
exports.combiningTable = [
|
||||
[ 0x0300, 0x036F ], [ 0x0483, 0x0486 ], [ 0x0488, 0x0489 ],
|
||||
[ 0x0591, 0x05BD ], [ 0x05BF, 0x05BF ], [ 0x05C1, 0x05C2 ],
|
||||
[ 0x05C4, 0x05C5 ], [ 0x05C7, 0x05C7 ], [ 0x0600, 0x0603 ],
|
||||
|
@ -404,7 +431,9 @@ exports.combining = [
|
|||
[ 0x1D173, 0x1D182 ], [ 0x1D185, 0x1D18B ], [ 0x1D1AA, 0x1D1AD ],
|
||||
[ 0x1D242, 0x1D244 ], [ 0xE0001, 0xE0001 ], [ 0xE0020, 0xE007F ],
|
||||
[ 0xE0100, 0xE01EF ]
|
||||
].reduce(function(out, row) {
|
||||
];
|
||||
|
||||
exports.combining = exports.combiningTable.reduce(function(out, row) {
|
||||
for (var i = row[0]; i <= row[1]; i++) {
|
||||
out[i] = true;
|
||||
}
|
||||
|
@ -465,6 +494,44 @@ exports.codePointAt = function(str, position) {
|
|||
return first;
|
||||
};
|
||||
|
||||
exports.fromCodePoint = function() {
|
||||
var MAX_SIZE = 0x4000;
|
||||
var codeUnits = [];
|
||||
var highSurrogate;
|
||||
var lowSurrogate;
|
||||
var index = -1;
|
||||
var length = arguments.length;
|
||||
if (!length) {
|
||||
return '';
|
||||
}
|
||||
var result = '';
|
||||
while (++index < length) {
|
||||
var codePoint = Number(arguments[index]);
|
||||
if (
|
||||
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
|
||||
codePoint < 0 || // not a valid Unicode code point
|
||||
codePoint > 0x10FFFF || // not a valid Unicode code point
|
||||
floor(codePoint) != codePoint // not an integer
|
||||
) {
|
||||
throw RangeError('Invalid code point: ' + codePoint);
|
||||
}
|
||||
if (codePoint <= 0xFFFF) { // BMP code point
|
||||
codeUnits.push(codePoint);
|
||||
} else { // Astral code point; split in surrogate halves
|
||||
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
|
||||
codePoint -= 0x10000;
|
||||
highSurrogate = (codePoint >> 10) + 0xD800;
|
||||
lowSurrogate = (codePoint % 0x400) + 0xDC00;
|
||||
codeUnits.push(highSurrogate, lowSurrogate);
|
||||
}
|
||||
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
|
||||
result += stringFromCharCode.apply(null, codeUnits);
|
||||
codeUnits.length = 0;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
// Double width characters that are _not_ surrogate pairs.
|
||||
// NOTE: 0x20000 - 0x2fffd and 0x30000 - 0x3fffd are not necessary for this
|
||||
// regex anyway. This regex is used to put a blank char after wide chars to
|
||||
|
@ -484,3 +551,188 @@ exports.wideChars = new RegExp('(['
|
|||
|
||||
// Regex to detect a surrogate pair.
|
||||
exports.surrogate = /[\ud800-\udbff][\udc00-\udfff]/g;
|
||||
|
||||
// Regex to find combining characters.
|
||||
exports.combiningRegex = exports.combiningTable.reduce(function(out, row) {
|
||||
var low, high, range;
|
||||
if (row[0] > 0x00ffff) {
|
||||
low = exports.fromCodePoint(row[0]);
|
||||
low = [
|
||||
hexify(low.charCodeAt(0)),
|
||||
hexify(low.charCodeAt(1))
|
||||
];
|
||||
high = exports.fromCodePoint(row[1]);
|
||||
high = [
|
||||
hexify(high.charCodeAt(0)),
|
||||
hexify(high.charCodeAt(1))
|
||||
];
|
||||
range = '[\\u' + low[0] + '-' + '\\u' + high[0] + ']'
|
||||
+ '[\\u' + low[1] + '-' + '\\u' + high[1] + ']';
|
||||
if (!~out.indexOf('|')) out += ']';
|
||||
out += '|' + range;
|
||||
} else {
|
||||
low = hexify(row[0]);
|
||||
high = hexify(row[1]);
|
||||
low = '\\u' + low;
|
||||
high = '\\u' + high;
|
||||
out += low + '-' + high;
|
||||
}
|
||||
return out;
|
||||
}, '[');
|
||||
|
||||
exports.combiningRegex = new RegExp(exports.combiningRegex, 'g');
|
||||
|
||||
function hexify(n) {
|
||||
n = n.toString(16);
|
||||
while (n.length < 4) n = '0' + n;
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
exports.combiningRegex = new RegExp(
|
||||
'['
|
||||
+ '\\u0300-\\u036f'
|
||||
+ '\\u0483-\\u0486'
|
||||
+ '\\u0488-\\u0489'
|
||||
+ '\\u0591-\\u05bd'
|
||||
+ '\\u05bf-\\u05bf'
|
||||
+ '\\u05c1-\\u05c2'
|
||||
+ '\\u05c4-\\u05c5'
|
||||
+ '\\u05c7-\\u05c7'
|
||||
+ '\\u0600-\\u0603'
|
||||
+ '\\u0610-\\u0615'
|
||||
+ '\\u064b-\\u065e'
|
||||
+ '\\u0670-\\u0670'
|
||||
+ '\\u06d6-\\u06e4'
|
||||
+ '\\u06e7-\\u06e8'
|
||||
+ '\\u06ea-\\u06ed'
|
||||
+ '\\u070f-\\u070f'
|
||||
+ '\\u0711-\\u0711'
|
||||
+ '\\u0730-\\u074a'
|
||||
+ '\\u07a6-\\u07b0'
|
||||
+ '\\u07eb-\\u07f3'
|
||||
+ '\\u0901-\\u0902'
|
||||
+ '\\u093c-\\u093c'
|
||||
+ '\\u0941-\\u0948'
|
||||
+ '\\u094d-\\u094d'
|
||||
+ '\\u0951-\\u0954'
|
||||
+ '\\u0962-\\u0963'
|
||||
+ '\\u0981-\\u0981'
|
||||
+ '\\u09bc-\\u09bc'
|
||||
+ '\\u09c1-\\u09c4'
|
||||
+ '\\u09cd-\\u09cd'
|
||||
+ '\\u09e2-\\u09e3'
|
||||
+ '\\u0a01-\\u0a02'
|
||||
+ '\\u0a3c-\\u0a3c'
|
||||
+ '\\u0a41-\\u0a42'
|
||||
+ '\\u0a47-\\u0a48'
|
||||
+ '\\u0a4b-\\u0a4d'
|
||||
+ '\\u0a70-\\u0a71'
|
||||
+ '\\u0a81-\\u0a82'
|
||||
+ '\\u0abc-\\u0abc'
|
||||
+ '\\u0ac1-\\u0ac5'
|
||||
+ '\\u0ac7-\\u0ac8'
|
||||
+ '\\u0acd-\\u0acd'
|
||||
+ '\\u0ae2-\\u0ae3'
|
||||
+ '\\u0b01-\\u0b01'
|
||||
+ '\\u0b3c-\\u0b3c'
|
||||
+ '\\u0b3f-\\u0b3f'
|
||||
+ '\\u0b41-\\u0b43'
|
||||
+ '\\u0b4d-\\u0b4d'
|
||||
+ '\\u0b56-\\u0b56'
|
||||
+ '\\u0b82-\\u0b82'
|
||||
+ '\\u0bc0-\\u0bc0'
|
||||
+ '\\u0bcd-\\u0bcd'
|
||||
+ '\\u0c3e-\\u0c40'
|
||||
+ '\\u0c46-\\u0c48'
|
||||
+ '\\u0c4a-\\u0c4d'
|
||||
+ '\\u0c55-\\u0c56'
|
||||
+ '\\u0cbc-\\u0cbc'
|
||||
+ '\\u0cbf-\\u0cbf'
|
||||
+ '\\u0cc6-\\u0cc6'
|
||||
+ '\\u0ccc-\\u0ccd'
|
||||
+ '\\u0ce2-\\u0ce3'
|
||||
+ '\\u0d41-\\u0d43'
|
||||
+ '\\u0d4d-\\u0d4d'
|
||||
+ '\\u0dca-\\u0dca'
|
||||
+ '\\u0dd2-\\u0dd4'
|
||||
+ '\\u0dd6-\\u0dd6'
|
||||
+ '\\u0e31-\\u0e31'
|
||||
+ '\\u0e34-\\u0e3a'
|
||||
+ '\\u0e47-\\u0e4e'
|
||||
+ '\\u0eb1-\\u0eb1'
|
||||
+ '\\u0eb4-\\u0eb9'
|
||||
+ '\\u0ebb-\\u0ebc'
|
||||
+ '\\u0ec8-\\u0ecd'
|
||||
+ '\\u0f18-\\u0f19'
|
||||
+ '\\u0f35-\\u0f35'
|
||||
+ '\\u0f37-\\u0f37'
|
||||
+ '\\u0f39-\\u0f39'
|
||||
+ '\\u0f71-\\u0f7e'
|
||||
+ '\\u0f80-\\u0f84'
|
||||
+ '\\u0f86-\\u0f87'
|
||||
+ '\\u0f90-\\u0f97'
|
||||
+ '\\u0f99-\\u0fbc'
|
||||
+ '\\u0fc6-\\u0fc6'
|
||||
+ '\\u102d-\\u1030'
|
||||
+ '\\u1032-\\u1032'
|
||||
+ '\\u1036-\\u1037'
|
||||
+ '\\u1039-\\u1039'
|
||||
+ '\\u1058-\\u1059'
|
||||
+ '\\u1160-\\u11ff'
|
||||
+ '\\u135f-\\u135f'
|
||||
+ '\\u1712-\\u1714'
|
||||
+ '\\u1732-\\u1734'
|
||||
+ '\\u1752-\\u1753'
|
||||
+ '\\u1772-\\u1773'
|
||||
+ '\\u17b4-\\u17b5'
|
||||
+ '\\u17b7-\\u17bd'
|
||||
+ '\\u17c6-\\u17c6'
|
||||
+ '\\u17c9-\\u17d3'
|
||||
+ '\\u17dd-\\u17dd'
|
||||
+ '\\u180b-\\u180d'
|
||||
+ '\\u18a9-\\u18a9'
|
||||
+ '\\u1920-\\u1922'
|
||||
+ '\\u1927-\\u1928'
|
||||
+ '\\u1932-\\u1932'
|
||||
+ '\\u1939-\\u193b'
|
||||
+ '\\u1a17-\\u1a18'
|
||||
+ '\\u1b00-\\u1b03'
|
||||
+ '\\u1b34-\\u1b34'
|
||||
+ '\\u1b36-\\u1b3a'
|
||||
+ '\\u1b3c-\\u1b3c'
|
||||
+ '\\u1b42-\\u1b42'
|
||||
+ '\\u1b6b-\\u1b73'
|
||||
+ '\\u1dc0-\\u1dca'
|
||||
+ '\\u1dfe-\\u1dff'
|
||||
+ '\\u200b-\\u200f'
|
||||
+ '\\u202a-\\u202e'
|
||||
+ '\\u2060-\\u2063'
|
||||
+ '\\u206a-\\u206f'
|
||||
+ '\\u20d0-\\u20ef'
|
||||
+ '\\u302a-\\u302f'
|
||||
+ '\\u3099-\\u309a'
|
||||
+ '\\ua806-\\ua806'
|
||||
+ '\\ua80b-\\ua80b'
|
||||
+ '\\ua825-\\ua826'
|
||||
+ '\\ufb1e-\\ufb1e'
|
||||
+ '\\ufe00-\\ufe0f'
|
||||
+ '\\ufe20-\\ufe23'
|
||||
+ '\\ufeff-\\ufeff'
|
||||
+ '\\ufff9-\\ufffb'
|
||||
+ ']'
|
||||
+ '|[\\ud802-\\ud802][\\ude01-\\ude03]'
|
||||
+ '|[\\ud802-\\ud802][\\ude05-\\ude06]'
|
||||
+ '|[\\ud802-\\ud802][\\ude0c-\\ude0f]'
|
||||
+ '|[\\ud802-\\ud802][\\ude38-\\ude3a]'
|
||||
+ '|[\\ud802-\\ud802][\\ude3f-\\ude3f]'
|
||||
+ '|[\\ud834-\\ud834][\\udd67-\\udd69]'
|
||||
+ '|[\\ud834-\\ud834][\\udd73-\\udd82]'
|
||||
+ '|[\\ud834-\\ud834][\\udd85-\\udd8b]'
|
||||
+ '|[\\ud834-\\ud834][\\uddaa-\\uddad]'
|
||||
+ '|[\\ud834-\\ud834][\\ude42-\\ude44]'
|
||||
+ '|[\\udb40-\\udb40][\\udc01-\\udc01]'
|
||||
+ '|[\\udb40-\\udb40][\\udc20-\\udc7f]'
|
||||
+ '|[\\udb40-\\udb40][\\udd00-\\uddef]'
|
||||
, 'g');
|
||||
*/
|
||||
|
|
|
@ -1214,8 +1214,6 @@ Screen.prototype.draw = function(start, end) {
|
|||
} else {
|
||||
o[++x][1] = ' ';
|
||||
}
|
||||
} else if (cwid === 0) {
|
||||
ch = ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2332,8 +2330,14 @@ Element.prototype.parseContent = function(noTags) {
|
|||
// blank character after it so it doesn't eat the real next char.
|
||||
content = content.replace(unicode.wideChars, '$1 ');
|
||||
} else {
|
||||
// no double-width or surrogate pairs: replace them with question-marks.
|
||||
// no double-width: replace them with question-marks.
|
||||
content = content.replace(unicode.wideChars, '??');
|
||||
// delete combining characters since they're 0-width anyway.
|
||||
// NOTE: We could drop this, the non-surrogates would get changed to ? by
|
||||
// the unicode filter, and surrogates changed to ? by the surrogate
|
||||
// regex. however, the user might expect them to be 0-width.
|
||||
content = content.replace(unicode.combiningRegex, '');
|
||||
// no surrogate pairs: replace them with question-marks.
|
||||
content = content.replace(unicode.surrogate, '?');
|
||||
}
|
||||
|
||||
|
@ -2673,9 +2677,7 @@ main:
|
|||
if (surrogates && surrogates.length) {
|
||||
for (var j = 0; j < surrogates.length; j++) {
|
||||
var cwid = unicode.charWidth(surrogates[j], 0);
|
||||
if (cwid === 0) {
|
||||
out[i] += ' ';
|
||||
} else if (cwid === 1) {
|
||||
if (cwid === 1) {
|
||||
out[i] += ' ';
|
||||
}
|
||||
}
|
||||
|
@ -3997,9 +3999,25 @@ Element.prototype.render = function() {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Handle surrogate pairs:
|
||||
// Make sure we put surrogate pair chars in one cell.
|
||||
if (this.screen.fullUnicode && content[ci - 1]) {
|
||||
// Handle combining chars:
|
||||
// Make sure they get in the same cell and are counted as 0.
|
||||
var point = unicode.codePointAt(content, ci - 1);
|
||||
if (unicode.combining[point]) {
|
||||
if (point > 0x00ffff) {
|
||||
ch = content[ci - 1] + content[ci];
|
||||
ci++;
|
||||
}
|
||||
if (x - 1 >= xi) {
|
||||
lines[y][x - 1][1] += ch;
|
||||
} else if (y - 1 >= yi) {
|
||||
lines[y - 1][xl - 1][1] += ch;
|
||||
}
|
||||
x--;
|
||||
continue;
|
||||
}
|
||||
// Handle surrogate pairs:
|
||||
// Make sure we put surrogate pair chars in one cell.
|
||||
var code = content[ci - 1].charCodeAt(0);
|
||||
// if (unicode.codePointAt(content, ci - 1) > 0x00ffff) {
|
||||
// if (unicode.isSurrogate(content, ci - 1) {
|
||||
|
@ -4014,6 +4032,19 @@ Element.prototype.render = function() {
|
|||
}
|
||||
}
|
||||
|
||||
// Alternative to regex to avoiding combining chars when fullUnicode=false
|
||||
// NOTE: Wouldn't matter because the surrogate regex would already remove it.
|
||||
// if (!this.screen.fullUnicode) {
|
||||
// var point = unicode.codePointAt(content, ci - 1);
|
||||
// if (unicode.combining[point]) {
|
||||
// if (point > 0x00ffff) {
|
||||
// ci++;
|
||||
// }
|
||||
// x--;
|
||||
// continue;
|
||||
// }
|
||||
// }
|
||||
|
||||
if (this.style.transparent) {
|
||||
lines[y][x][0] = blend(attr, lines[y][x][0]);
|
||||
if (content[ci]) lines[y][x][1] = ch;
|
||||
|
|
|
@ -31,10 +31,16 @@ var SURROGATE_SINGLE = String.fromCodePoint
|
|||
? String.fromCodePoint(0x1D306)
|
||||
: String.fromCharCode(0xD834, 0xDF06);
|
||||
|
||||
var COMBINE = String.fromCodePoint
|
||||
? String.fromCodePoint(0x0300)
|
||||
: String.fromCharCode(0x0300);
|
||||
|
||||
var COMBINE = blessed.unicode.fromCodePoint(0x10A01);
|
||||
|
||||
// At cols=44, the bug that is avoided by this occurs:
|
||||
// || angles[line[x + 1][1]]) {
|
||||
|
||||
var lorem = 'Non eram nescius Brute cum quae summis ingeniis exquisitaque'
|
||||
var lorem = 'Non eram nes' + COMBINE + 'cius Brute cum quae summis ingeniis exquisitaque'
|
||||
+ ' doctrina philosophi Graeco sermone tractavissent ea Latinis litteris mandaremus'
|
||||
+ ' fore ut hic noster labor in varias reprehensiones incurreret nam quibusdam et'
|
||||
+ ' iis quidem non admodum indoctis totum hoc displicet philosophari quidam autem'
|
||||
|
|
Loading…
Reference in New Issue