diff --git a/README.md b/README.md index 26181f6..ebb96f0 100644 --- a/README.md +++ b/README.md @@ -276,14 +276,14 @@ The screen on which every other node renders. │ box1 │ box2 │ └─────────┴─────────┘ ``` -- __fullUnicode__ - allow for rendering of East Asian double-width characters. - also enable proper rendering of utf-16 surrogate pairs. this allows you to +- __fullUnicode__ - allow for rendering of East Asian double-width characters, + utf-16 surrogate pairs, and unicode combining characters. this allows you to display text above the basic multilingual plane. this is behind an option because it may affect performance slightly negatively. without this option - enabled, all double-width and surrogate pair characters will be replaced by - `??` and `?` respectively. (NOTE: libvte (e.g. gnome-terminal) cannot display - characters that are both surrogate pairs _and_ double-width properly. there - is no way for blessed to fix this unfortunately). + enabled, all double-width, surrogate pair, and combining characters will be + replaced by `??`, `?`, `` respectively. (NOTE: libvte (e.g. gnome-terminal) + cannot display characters that are both surrogate pairs _and_ double-width + properly. there is no way for blessed to fix this unfortunately). ##### Properties: diff --git a/lib/blessed.js b/lib/blessed.js index c6af364..05f748b 100644 --- a/lib/blessed.js +++ b/lib/blessed.js @@ -12,7 +12,8 @@ var program = require('./program') , tput = require('./tput') , widget = require('./widget') - , colors = require('./colors'); + , colors = require('./colors') + , unicode = require('./unicode'); /** * Blessed @@ -24,8 +25,9 @@ function blessed() { blessed.program = blessed.Program = program; blessed.tput = blessed.Tput = tput; -blessed.colors = colors; blessed.widget = widget; +blessed.colors = colors; +blessed.unicode = unicode; Object.keys(blessed.widget).forEach(function(name) { blessed[name] = blessed.widget[name]; diff --git a/lib/unicode.js b/lib/unicode.js index 3707c01..77d2fa6 100644 --- a/lib/unicode.js +++ b/lib/unicode.js @@ -79,6 +79,33 @@ // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// String.fromCodePoint +// +// Copyright Mathias Bynens +// https://github.com/mathiasbynens/String.fromCodePoint +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to +// the following conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +var stringFromCharCode = String.fromCharCode; +var floor = Math.floor; + exports.charWidth = function(str, i) { var point = typeof str !== 'number' ? exports.codePointAt(str, i || 0) @@ -355,7 +382,7 @@ exports.isSurrogate = function(str, i) { return point > 0x00ffff; }; -exports.combining = [ +exports.combiningTable = [ [ 0x0300, 0x036F ], [ 0x0483, 0x0486 ], [ 0x0488, 0x0489 ], [ 0x0591, 0x05BD ], [ 0x05BF, 0x05BF ], [ 0x05C1, 0x05C2 ], [ 0x05C4, 0x05C5 ], [ 0x05C7, 0x05C7 ], [ 0x0600, 0x0603 ], @@ -404,7 +431,9 @@ exports.combining = [ [ 0x1D173, 0x1D182 ], [ 0x1D185, 0x1D18B ], [ 0x1D1AA, 0x1D1AD ], [ 0x1D242, 0x1D244 ], [ 0xE0001, 0xE0001 ], [ 0xE0020, 0xE007F ], [ 0xE0100, 0xE01EF ] -].reduce(function(out, row) { +]; + +exports.combining = exports.combiningTable.reduce(function(out, row) { for (var i = row[0]; i <= row[1]; i++) { out[i] = true; } @@ -465,6 +494,44 @@ exports.codePointAt = function(str, position) { return first; }; +exports.fromCodePoint = function() { + var MAX_SIZE = 0x4000; + var codeUnits = []; + var highSurrogate; + var lowSurrogate; + var index = -1; + var length = arguments.length; + if (!length) { + return ''; + } + var result = ''; + while (++index < length) { + var codePoint = Number(arguments[index]); + if ( + !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` + codePoint < 0 || // not a valid Unicode code point + codePoint > 0x10FFFF || // not a valid Unicode code point + floor(codePoint) != codePoint // not an integer + ) { + throw RangeError('Invalid code point: ' + codePoint); + } + if (codePoint <= 0xFFFF) { // BMP code point + codeUnits.push(codePoint); + } else { // Astral code point; split in surrogate halves + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + codePoint -= 0x10000; + highSurrogate = (codePoint >> 10) + 0xD800; + lowSurrogate = (codePoint % 0x400) + 0xDC00; + codeUnits.push(highSurrogate, lowSurrogate); + } + if (index + 1 == length || codeUnits.length > MAX_SIZE) { + result += stringFromCharCode.apply(null, codeUnits); + codeUnits.length = 0; + } + } + return result; +}; + // Double width characters that are _not_ surrogate pairs. // NOTE: 0x20000 - 0x2fffd and 0x30000 - 0x3fffd are not necessary for this // regex anyway. This regex is used to put a blank char after wide chars to @@ -484,3 +551,188 @@ exports.wideChars = new RegExp('([' // Regex to detect a surrogate pair. exports.surrogate = /[\ud800-\udbff][\udc00-\udfff]/g; + +// Regex to find combining characters. +exports.combiningRegex = exports.combiningTable.reduce(function(out, row) { + var low, high, range; + if (row[0] > 0x00ffff) { + low = exports.fromCodePoint(row[0]); + low = [ + hexify(low.charCodeAt(0)), + hexify(low.charCodeAt(1)) + ]; + high = exports.fromCodePoint(row[1]); + high = [ + hexify(high.charCodeAt(0)), + hexify(high.charCodeAt(1)) + ]; + range = '[\\u' + low[0] + '-' + '\\u' + high[0] + ']' + + '[\\u' + low[1] + '-' + '\\u' + high[1] + ']'; + if (!~out.indexOf('|')) out += ']'; + out += '|' + range; + } else { + low = hexify(row[0]); + high = hexify(row[1]); + low = '\\u' + low; + high = '\\u' + high; + out += low + '-' + high; + } + return out; +}, '['); + +exports.combiningRegex = new RegExp(exports.combiningRegex, 'g'); + +function hexify(n) { + n = n.toString(16); + while (n.length < 4) n = '0' + n; + return n; +} + +/* +exports.combiningRegex = new RegExp( + '[' + + '\\u0300-\\u036f' + + '\\u0483-\\u0486' + + '\\u0488-\\u0489' + + '\\u0591-\\u05bd' + + '\\u05bf-\\u05bf' + + '\\u05c1-\\u05c2' + + '\\u05c4-\\u05c5' + + '\\u05c7-\\u05c7' + + '\\u0600-\\u0603' + + '\\u0610-\\u0615' + + '\\u064b-\\u065e' + + '\\u0670-\\u0670' + + '\\u06d6-\\u06e4' + + '\\u06e7-\\u06e8' + + '\\u06ea-\\u06ed' + + '\\u070f-\\u070f' + + '\\u0711-\\u0711' + + '\\u0730-\\u074a' + + '\\u07a6-\\u07b0' + + '\\u07eb-\\u07f3' + + '\\u0901-\\u0902' + + '\\u093c-\\u093c' + + '\\u0941-\\u0948' + + '\\u094d-\\u094d' + + '\\u0951-\\u0954' + + '\\u0962-\\u0963' + + '\\u0981-\\u0981' + + '\\u09bc-\\u09bc' + + '\\u09c1-\\u09c4' + + '\\u09cd-\\u09cd' + + '\\u09e2-\\u09e3' + + '\\u0a01-\\u0a02' + + '\\u0a3c-\\u0a3c' + + '\\u0a41-\\u0a42' + + '\\u0a47-\\u0a48' + + '\\u0a4b-\\u0a4d' + + '\\u0a70-\\u0a71' + + '\\u0a81-\\u0a82' + + '\\u0abc-\\u0abc' + + '\\u0ac1-\\u0ac5' + + '\\u0ac7-\\u0ac8' + + '\\u0acd-\\u0acd' + + '\\u0ae2-\\u0ae3' + + '\\u0b01-\\u0b01' + + '\\u0b3c-\\u0b3c' + + '\\u0b3f-\\u0b3f' + + '\\u0b41-\\u0b43' + + '\\u0b4d-\\u0b4d' + + '\\u0b56-\\u0b56' + + '\\u0b82-\\u0b82' + + '\\u0bc0-\\u0bc0' + + '\\u0bcd-\\u0bcd' + + '\\u0c3e-\\u0c40' + + '\\u0c46-\\u0c48' + + '\\u0c4a-\\u0c4d' + + '\\u0c55-\\u0c56' + + '\\u0cbc-\\u0cbc' + + '\\u0cbf-\\u0cbf' + + '\\u0cc6-\\u0cc6' + + '\\u0ccc-\\u0ccd' + + '\\u0ce2-\\u0ce3' + + '\\u0d41-\\u0d43' + + '\\u0d4d-\\u0d4d' + + '\\u0dca-\\u0dca' + + '\\u0dd2-\\u0dd4' + + '\\u0dd6-\\u0dd6' + + '\\u0e31-\\u0e31' + + '\\u0e34-\\u0e3a' + + '\\u0e47-\\u0e4e' + + '\\u0eb1-\\u0eb1' + + '\\u0eb4-\\u0eb9' + + '\\u0ebb-\\u0ebc' + + '\\u0ec8-\\u0ecd' + + '\\u0f18-\\u0f19' + + '\\u0f35-\\u0f35' + + '\\u0f37-\\u0f37' + + '\\u0f39-\\u0f39' + + '\\u0f71-\\u0f7e' + + '\\u0f80-\\u0f84' + + '\\u0f86-\\u0f87' + + '\\u0f90-\\u0f97' + + '\\u0f99-\\u0fbc' + + '\\u0fc6-\\u0fc6' + + '\\u102d-\\u1030' + + '\\u1032-\\u1032' + + '\\u1036-\\u1037' + + '\\u1039-\\u1039' + + '\\u1058-\\u1059' + + '\\u1160-\\u11ff' + + '\\u135f-\\u135f' + + '\\u1712-\\u1714' + + '\\u1732-\\u1734' + + '\\u1752-\\u1753' + + '\\u1772-\\u1773' + + '\\u17b4-\\u17b5' + + '\\u17b7-\\u17bd' + + '\\u17c6-\\u17c6' + + '\\u17c9-\\u17d3' + + '\\u17dd-\\u17dd' + + '\\u180b-\\u180d' + + '\\u18a9-\\u18a9' + + '\\u1920-\\u1922' + + '\\u1927-\\u1928' + + '\\u1932-\\u1932' + + '\\u1939-\\u193b' + + '\\u1a17-\\u1a18' + + '\\u1b00-\\u1b03' + + '\\u1b34-\\u1b34' + + '\\u1b36-\\u1b3a' + + '\\u1b3c-\\u1b3c' + + '\\u1b42-\\u1b42' + + '\\u1b6b-\\u1b73' + + '\\u1dc0-\\u1dca' + + '\\u1dfe-\\u1dff' + + '\\u200b-\\u200f' + + '\\u202a-\\u202e' + + '\\u2060-\\u2063' + + '\\u206a-\\u206f' + + '\\u20d0-\\u20ef' + + '\\u302a-\\u302f' + + '\\u3099-\\u309a' + + '\\ua806-\\ua806' + + '\\ua80b-\\ua80b' + + '\\ua825-\\ua826' + + '\\ufb1e-\\ufb1e' + + '\\ufe00-\\ufe0f' + + '\\ufe20-\\ufe23' + + '\\ufeff-\\ufeff' + + '\\ufff9-\\ufffb' + + ']' + + '|[\\ud802-\\ud802][\\ude01-\\ude03]' + + '|[\\ud802-\\ud802][\\ude05-\\ude06]' + + '|[\\ud802-\\ud802][\\ude0c-\\ude0f]' + + '|[\\ud802-\\ud802][\\ude38-\\ude3a]' + + '|[\\ud802-\\ud802][\\ude3f-\\ude3f]' + + '|[\\ud834-\\ud834][\\udd67-\\udd69]' + + '|[\\ud834-\\ud834][\\udd73-\\udd82]' + + '|[\\ud834-\\ud834][\\udd85-\\udd8b]' + + '|[\\ud834-\\ud834][\\uddaa-\\uddad]' + + '|[\\ud834-\\ud834][\\ude42-\\ude44]' + + '|[\\udb40-\\udb40][\\udc01-\\udc01]' + + '|[\\udb40-\\udb40][\\udc20-\\udc7f]' + + '|[\\udb40-\\udb40][\\udd00-\\uddef]' +, 'g'); +*/ diff --git a/lib/widget.js b/lib/widget.js index 6be6bf3..0cf1692 100644 --- a/lib/widget.js +++ b/lib/widget.js @@ -1214,8 +1214,6 @@ Screen.prototype.draw = function(start, end) { } else { o[++x][1] = ' '; } - } else if (cwid === 0) { - ch = ' '; } } } @@ -2332,8 +2330,14 @@ Element.prototype.parseContent = function(noTags) { // blank character after it so it doesn't eat the real next char. content = content.replace(unicode.wideChars, '$1 '); } else { - // no double-width or surrogate pairs: replace them with question-marks. + // no double-width: replace them with question-marks. content = content.replace(unicode.wideChars, '??'); + // delete combining characters since they're 0-width anyway. + // NOTE: We could drop this, the non-surrogates would get changed to ? by + // the unicode filter, and surrogates changed to ? by the surrogate + // regex. however, the user might expect them to be 0-width. + content = content.replace(unicode.combiningRegex, ''); + // no surrogate pairs: replace them with question-marks. content = content.replace(unicode.surrogate, '?'); } @@ -2673,9 +2677,7 @@ main: if (surrogates && surrogates.length) { for (var j = 0; j < surrogates.length; j++) { var cwid = unicode.charWidth(surrogates[j], 0); - if (cwid === 0) { - out[i] += ' '; - } else if (cwid === 1) { + if (cwid === 1) { out[i] += ' '; } } @@ -3997,9 +3999,25 @@ Element.prototype.render = function() { continue; } - // Handle surrogate pairs: - // Make sure we put surrogate pair chars in one cell. if (this.screen.fullUnicode && content[ci - 1]) { + // Handle combining chars: + // Make sure they get in the same cell and are counted as 0. + var point = unicode.codePointAt(content, ci - 1); + if (unicode.combining[point]) { + if (point > 0x00ffff) { + ch = content[ci - 1] + content[ci]; + ci++; + } + if (x - 1 >= xi) { + lines[y][x - 1][1] += ch; + } else if (y - 1 >= yi) { + lines[y - 1][xl - 1][1] += ch; + } + x--; + continue; + } + // Handle surrogate pairs: + // Make sure we put surrogate pair chars in one cell. var code = content[ci - 1].charCodeAt(0); // if (unicode.codePointAt(content, ci - 1) > 0x00ffff) { // if (unicode.isSurrogate(content, ci - 1) { @@ -4014,6 +4032,19 @@ Element.prototype.render = function() { } } + // Alternative to regex to avoiding combining chars when fullUnicode=false + // NOTE: Wouldn't matter because the surrogate regex would already remove it. + // if (!this.screen.fullUnicode) { + // var point = unicode.codePointAt(content, ci - 1); + // if (unicode.combining[point]) { + // if (point > 0x00ffff) { + // ci++; + // } + // x--; + // continue; + // } + // } + if (this.style.transparent) { lines[y][x][0] = blend(attr, lines[y][x][0]); if (content[ci]) lines[y][x][1] = ch; diff --git a/test/widget-eaw.js b/test/widget-eaw.js index fea0f0e..b55ac05 100644 --- a/test/widget-eaw.js +++ b/test/widget-eaw.js @@ -31,10 +31,16 @@ var SURROGATE_SINGLE = String.fromCodePoint ? String.fromCodePoint(0x1D306) : String.fromCharCode(0xD834, 0xDF06); +var COMBINE = String.fromCodePoint + ? String.fromCodePoint(0x0300) + : String.fromCharCode(0x0300); + +var COMBINE = blessed.unicode.fromCodePoint(0x10A01); + // At cols=44, the bug that is avoided by this occurs: // || angles[line[x + 1][1]]) { -var lorem = 'Non eram nescius Brute cum quae summis ingeniis exquisitaque' +var lorem = 'Non eram nes' + COMBINE + 'cius Brute cum quae summis ingeniis exquisitaque' + ' doctrina philosophi Graeco sermone tractavissent ea Latinis litteris mandaremus' + ' fore ut hic noster labor in varias reprehensiones incurreret nam quibusdam et' + ' iis quidem non admodum indoctis totum hoc displicet philosophari quidam autem'