mirror of
https://github.com/embarklabs/neo-blessed.git
synced 2025-02-22 15:48:07 +00:00
add support for surrogate pairs. fixes #123. see #4 and slap-editor/slap#107.
This commit is contained in:
parent
1b1775a4c2
commit
ea4e142757
11
README.md
11
README.md
@ -264,11 +264,12 @@ The screen on which every other node renders.
|
||||
matter whether the keys are locked.
|
||||
- __dockBorders__ - automatically "dock" borders with other elements instead of
|
||||
overlapping, depending on position (__experimental__). for example:
|
||||
- __doubleWidth__ - allow for rendering of East Asian double-width characters.
|
||||
this is behind an option because it may affect performance negatively.
|
||||
- __doubleWidthPerfect__ - handle high code point double-width characters,
|
||||
without this option, high code point double width characters just show up as
|
||||
`?`. that being said, this option will slow content parsing a fair amount.
|
||||
- __fullUnicode__ - allow for rendering of East Asian double-width characters.
|
||||
also enable proper rendering of utf-16 surrogate pairs. this allows you to
|
||||
display text above the basic multilingual plane. this is behind an option
|
||||
because it may affect performance slightly negatively. without this option
|
||||
enabled, all double-width and surrogate pair characters will be replaced by
|
||||
`??` and `?` respectively.
|
||||
|
||||
These border-overlapped elements:
|
||||
|
||||
|
163
lib/widget.js
163
lib/widget.js
@ -290,6 +290,9 @@ function Screen(options) {
|
||||
|
||||
this.ignoreLocked = options.ignoreLocked || [];
|
||||
|
||||
this._unicode = this.tput.unicode || this.tput.numbers.U8 === 1;
|
||||
this.fullUnicode = this.options.fullUnicode && this._unicode;
|
||||
|
||||
this.dattr = ((0 << 18) | (0x1ff << 9)) | 0x1ff;
|
||||
|
||||
this.renders = 0;
|
||||
@ -979,7 +982,8 @@ Screen.prototype.draw = function(start, end) {
|
||||
, fg
|
||||
, bg
|
||||
, flags
|
||||
, cwid;
|
||||
, cwid
|
||||
, point;
|
||||
|
||||
var main = ''
|
||||
, pre
|
||||
@ -1196,25 +1200,42 @@ Screen.prototype.draw = function(start, end) {
|
||||
|
||||
// If we find a double-width char, eat the next character which should be
|
||||
// a space due to parseContent's behavior.
|
||||
if (this.options.doubleWidth && (this.tput.unicode || this.tput.numbers.U8 === 1)) {
|
||||
cwid = east_asian_width.char_width(line[x][1].codePointAt(0));
|
||||
if (cwid === 2) {
|
||||
// Might also need:
|
||||
// `|| line[x + 1][0] !== line[x][0]` for borderless boxes?
|
||||
if (x === line.length - 1 || angles[line[x + 1][1]]) {
|
||||
if (this.fullUnicode) {
|
||||
// If this is a surrogate pair double-width char, we can ignore it
|
||||
// because parseContent already counted it as length=2.
|
||||
point = line[x][1].codePointAt(0);
|
||||
// if (line[x][1].length === 1) {
|
||||
if (point <= 0xffff) {
|
||||
cwid = east_asian_width.char_width(point);
|
||||
if (cwid === 2) {
|
||||
// Might also need:
|
||||
// `|| line[x + 1][0] !== line[x][0]` for borderless boxes?
|
||||
if (x === line.length - 1 || angles[line[x + 1][1]]) {
|
||||
ch = ' ';
|
||||
o[x][1] = ' ';
|
||||
} else {
|
||||
o[++x][1] = ' ';
|
||||
}
|
||||
} else if (cwid === 0) {
|
||||
ch = ' ';
|
||||
o[x][1] = ' ';
|
||||
} else {
|
||||
o[++x][1] = ' ';
|
||||
}
|
||||
} else if (cwid === 0) {
|
||||
// No real way to do this right now:
|
||||
// ch = '';
|
||||
ch = ' ';
|
||||
o[x][1] = ' ';
|
||||
}
|
||||
}
|
||||
|
||||
// Find surrogate pairs that have been split:
|
||||
// Pad after each character instead of end of line.
|
||||
// XXX Doesn't work well.
|
||||
// if (this.fullUnicode) {
|
||||
// var code = line[x][1].charCodeAt(0);
|
||||
// if (code >= 0xd800 && code <= 0xdbff) {
|
||||
// var code2 = line[x + 1][1].charCodeAt(0);
|
||||
// if (code2 >= 0xdc00 && code2 <= 0xdfff) {
|
||||
// ch = line[x][1] + line[x + 1][1];
|
||||
// line[x + 1][1] = ' ';
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// Attempt to use ACS for supported characters.
|
||||
// This is not ideal, but it's how ncurses works.
|
||||
// There are a lot of terminals that support ACS
|
||||
@ -1228,7 +1249,9 @@ Screen.prototype.draw = function(start, end) {
|
||||
// supports UTF8, but I imagine it's unlikely.
|
||||
// Maybe remove !this.tput.unicode check, however,
|
||||
// this seems to be the way ncurses does it.
|
||||
if (this.tput.strings.enter_alt_charset_mode && !this.tput.brokenACS) {
|
||||
if (this.tput.strings.enter_alt_charset_mode && !this.tput.brokenACS
|
||||
// Necessary for handling unicode when not supported:
|
||||
&& (this.tput.acscr[ch] || acs)) {
|
||||
// Fun fact: even if this.tput.brokenACS wasn't checked here,
|
||||
// the linux console would still work fine because the acs
|
||||
// table would fail the check of: this.tput.acscr[ch]
|
||||
@ -2321,42 +2344,39 @@ Element.prototype.parseContent = function(noTags) {
|
||||
.replace(/\r\n|\r/g, '\n')
|
||||
.replace(/\t/g, this.screen.tabc);
|
||||
|
||||
if (this.screen.options.doubleWidth
|
||||
&& (this.screen.tput.unicode
|
||||
|| this.screen.tput.numbers.U8 === 1)) {
|
||||
// double-width chars will eat the next char after render - create a
|
||||
// blank character after it so it doesn't eat the real next char
|
||||
if (this.screen.fullUnicode) {
|
||||
// double-width chars will eat the next char after render. create a
|
||||
// blank character after it so it doesn't eat the real next char.
|
||||
content = content.replace(wideChars, '$1 ');
|
||||
} else {
|
||||
// no double-width. replace double-width chars with question-marks.
|
||||
// NOTE: could use two chars: '? ' depending on what is intended.
|
||||
// if we did, we could remove the unicode checks above in this if
|
||||
// statement.
|
||||
content = content.replace(wideChars, '?');
|
||||
// no double-width or surrogate pairs: replace them with question-marks.
|
||||
content = content.replace(wideChars, '??');
|
||||
content = content.replace(/[\ud800-\udbff][\udc00-\udfff]/g, '?');
|
||||
}
|
||||
|
||||
if (this.screen.options.doubleWidthPerfect) {
|
||||
var _content = content;
|
||||
content = '';
|
||||
for (var i = 0; i < _content.length; i++) {
|
||||
var point = _content.codePointAt(i);
|
||||
if ((point >= 0x20000 && point <= 0x2fffd)
|
||||
|| (point >= 0x30000 && point <= 0x3fffd)) {
|
||||
if (this.screen.options.doubleWidth
|
||||
&& (this.screen.tput.unicode
|
||||
|| this.screen.tput.numbers.U8 === 1)) {
|
||||
content += _content[i] + ' ';
|
||||
} else {
|
||||
// NOTE: could use two chars: '? ' depending on what is intended.
|
||||
// if we did, we could remove the unicode checks above in this if
|
||||
// statement.
|
||||
content += '?';
|
||||
}
|
||||
} else {
|
||||
content += _content[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
// XXX Because 0x20000 - 0x2fffd and 0x30000 - 0x3fffd are surrogate pairs,
|
||||
// the length is computed as `2` in javascript, which coincidentally helps
|
||||
// since they are double width. This may not be necessary.
|
||||
// if (this.screen.options.fullUnicode) {
|
||||
// var _content = content;
|
||||
// content = '';
|
||||
// for (var i = 0; i < _content.length; i++) {
|
||||
// var point = _content.codePointAt(i);
|
||||
// if ((point >= 0x20000 && point <= 0x2fffd)
|
||||
// || (point >= 0x30000 && point <= 0x3fffd)) {
|
||||
// if (this.screen._unicode) {
|
||||
// content += _content[i] + ' ';
|
||||
// } else {
|
||||
// // NOTE: could use two chars: '? ' depending on what is intended.
|
||||
// // if we did, we could remove the unicode checks above in this if
|
||||
// // statement.
|
||||
// content += '?';
|
||||
// }
|
||||
// } else {
|
||||
// content += _content[i];
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
if (!noTags) {
|
||||
content = this._parseTags(content);
|
||||
@ -2677,6 +2697,30 @@ main:
|
||||
: current;
|
||||
}, 0);
|
||||
|
||||
// Find all surrogate pairs and compensate for the lack of width
|
||||
// on the line by padding with trailing spaces:
|
||||
if (this.screen.fullUnicode) {
|
||||
for (var i = 0; i < out.length; i++) {
|
||||
// NOTE: Happens at 54 cols with all chars enabled in test.
|
||||
// Check to see if surrogates got split on end and beginning of 2 lines.
|
||||
if (/[\ud800-\udbff]$/.exec(out[i])
|
||||
&& /^[\udc00-\udfff]/.exec(out[i + 1])) {
|
||||
out[i] = out[i] + out[i + 1][0];
|
||||
out[i + 1] = out[i + 1].substring(1) + ' ';
|
||||
}
|
||||
// Pad the end of the lines if the surrogate is not a double-width char.
|
||||
// var surrogates = out[i].length - punycode.ucs2.decode(out[i]).length;
|
||||
var surrogates = out[i].match(/[\ud800-\udbff][\udc00-\udfff]/g);
|
||||
if (surrogates && surrogates.length) {
|
||||
for (var j = 0; j < surrogates.length; j++) {
|
||||
if (east_asian_width.char_width(surrogates[j].codePointAt(0)) === 1) {
|
||||
out[i] += ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
};
|
||||
|
||||
@ -3991,6 +4035,22 @@ Element.prototype.render = function() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle surrogate pairs:
|
||||
// Make sure we put surrogate pair chars in one cell.
|
||||
if (this.screen.fullUnicode) {
|
||||
var code = content[ci - 1].charCodeAt(0);
|
||||
// if (content.codePointAt(ci - 1) > 0xffff) {
|
||||
if (code >= 0xd800 && code <= 0xdbff) {
|
||||
var code2 = (content[ci] || '').charCodeAt(0);
|
||||
if (code2 >= 0xdc00 && code2 <= 0xdfff) {
|
||||
ch = content[ci - 1] + content[ci];
|
||||
ci++;
|
||||
} else {
|
||||
ch = bch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (this.style.transparent) {
|
||||
lines[y][x][0] = blend(attr, lines[y][x][0]);
|
||||
if (content[ci]) lines[y][x][1] = ch;
|
||||
@ -8614,6 +8674,10 @@ function hsort(obj) {
|
||||
});
|
||||
}
|
||||
|
||||
// NOTE: 0x20000 - 0x2fffd and 0x30000 - 0x3fffd are not necessary for this
|
||||
// regex anyway. This regex is used to put a blank char after wide chars to
|
||||
// be eaten, however, if this is a surrogate pair, parseContent already adds
|
||||
// the extra one char because its length equals 2 instead of 1.
|
||||
var wideChars = new RegExp('('
|
||||
// 0x20000 - 0x2fffd:
|
||||
// + '[\\ud840-\\ud87f][\\udc00-\\udffd]'
|
||||
@ -8631,9 +8695,6 @@ var wideChars = new RegExp('('
|
||||
+ '\\ufe30-\\ufe6f' /* CJK Compatibility Forms */
|
||||
+ '\\uff00-\\uff60' /* Fullwidth Forms */
|
||||
+ '\\uffe0-\\uffe6'
|
||||
// XXX Cannot implement these in a regex. Not perfect, but the layout will
|
||||
// still not be negatively affected by double-width chars in this range,
|
||||
// however, the next char on the screen will be eaten.
|
||||
// + '\\u20000-\\u2fffd'
|
||||
// + '\\u30000-\\u3fffd'
|
||||
+ ']'
|
||||
|
@ -5,12 +5,30 @@ screen = blessed.screen({
|
||||
dump: __dirname + '/logs/eaw.log',
|
||||
smartCSR: true,
|
||||
dockBorders: true,
|
||||
doubleWidth: true,
|
||||
doubleWidthPerfect: true
|
||||
fullUnicode: true
|
||||
});
|
||||
|
||||
var DW = '杜';
|
||||
var DW2 = String.fromCodePoint ? String.fromCodePoint(0x30000) : 'a';
|
||||
// screen.options.fullUnicode = false;
|
||||
// screen.fullUnicode = false;
|
||||
// screen._unicode = false;
|
||||
// screen.tput.unicode = false;
|
||||
// screen.tput.numbers.U8 = -1;
|
||||
// screen.tput.strings.enter_alt_charset_mode = false;
|
||||
|
||||
// var DOUBLE = '杜';
|
||||
var DOUBLE = String.fromCodePoint
|
||||
? String.fromCodePoint(0x675c)
|
||||
: String.fromCharCode(0x675c);
|
||||
|
||||
// var SURROGATE_DOUBLE = '𰀀';
|
||||
var SURROGATE_DOUBLE = String.fromCodePoint
|
||||
? String.fromCodePoint(0x30000)
|
||||
: String.fromCharCode(0xD880, 0xDC00);
|
||||
|
||||
// var SURROGATE_SINGLE = '𝌆';
|
||||
var SURROGATE_SINGLE = String.fromCodePoint
|
||||
? String.fromCodePoint(0x1D306)
|
||||
: String.fromCharCode(0xD834, 0xDF06);
|
||||
|
||||
// At cols=44, the bug that is avoided by this occurs:
|
||||
// || angles[line[x + 1][1]]) {
|
||||
@ -60,8 +78,9 @@ var lorem = 'Non eram nescius Brute cum quae summis ingeniis exquisitaque'
|
||||
+ ' isdem de rebus alia ratione compositis quid est cur nostri a nostris non'
|
||||
+ ' legantur';
|
||||
|
||||
lorem = lorem.replace(/e/gi, DW);
|
||||
lorem = lorem.replace(/a/gi, DW2);
|
||||
lorem = lorem.replace(/e/gi, DOUBLE);
|
||||
lorem = lorem.replace(/a/gi, SURROGATE_DOUBLE);
|
||||
lorem = lorem.replace(/o/gi, SURROGATE_SINGLE);
|
||||
|
||||
var main = blessed.box({
|
||||
parent: screen,
|
||||
|
Loading…
x
Reference in New Issue
Block a user