Skip to content

Commit dfe9015

Browse files
committed
Convert uniXXXX glyph names to proper ones when building the charCodeToGlyphId map for TrueType fonts (bug 1132849, issue 6893, issue 6894)
This patch adds a `getUnicodeForGlyph` helper function, which is used to recover Unicode values for non-standard glyph names. Some PDF generators, e.g. Scribus PDF, use improper `uniXXXX` glyph names which breaks the glyph mapping. We can avoid this by converting them to "standard" glyph names instead. Fixes https://bugzilla.mozilla.org/show_bug.cgi?id=1132849. Fixes 6893. Fixes 6894.
1 parent 1475984 commit dfe9015

File tree

6 files changed

+96
-10
lines changed

6 files changed

+96
-10
lines changed

src/core/fonts.js

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ var getSupplementalGlyphMapForArialBlack =
8080
coreStandardFonts.getSupplementalGlyphMapForArialBlack;
8181
var getUnicodeRangeFor = coreUnicode.getUnicodeRangeFor;
8282
var mapSpecialUnicodeValues = coreUnicode.mapSpecialUnicodeValues;
83+
var getUnicodeForGlyph = coreUnicode.getUnicodeForGlyph;
8384

8485
// Unicode Private Use Area
8586
var PRIVATE_USE_OFFSET_START = 0xE000;
@@ -465,7 +466,7 @@ var ProblematicCharRanges = new Int32Array([
465466
*/
466467
var Font = (function FontClosure() {
467468
function Font(name, file, properties) {
468-
var charCode, glyphName, fontChar;
469+
var charCode, glyphName, unicode, fontChar;
469470

470471
this.name = name;
471472
this.loadedName = properties.loadedName;
@@ -609,21 +610,25 @@ var Font = (function FontClosure() {
609610
this.toFontChar[charCode] = fontChar;
610611
}
611612
} else if (isStandardFont) {
612-
this.toFontChar = [];
613613
glyphsUnicodeMap = getGlyphsUnicode();
614614
for (charCode in properties.defaultEncoding) {
615615
glyphName = (properties.differences[charCode] ||
616616
properties.defaultEncoding[charCode]);
617-
this.toFontChar[charCode] = glyphsUnicodeMap[glyphName];
617+
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
618+
if (unicode !== -1) {
619+
this.toFontChar[charCode] = unicode;
620+
}
618621
}
619622
} else {
620-
var unicodeCharCode, notCidFont = (type.indexOf('CIDFontType') === -1);
621623
glyphsUnicodeMap = getGlyphsUnicode();
622624
this.toUnicode.forEach(function(charCode, unicodeCharCode) {
623-
if (notCidFont) {
625+
if (!this.composite) {
624626
glyphName = (properties.differences[charCode] ||
625627
properties.defaultEncoding[charCode]);
626-
unicodeCharCode = (glyphsUnicodeMap[glyphName] || unicodeCharCode);
628+
unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
629+
if (unicode !== -1) {
630+
unicodeCharCode = unicode;
631+
}
627632
}
628633
this.toFontChar[charCode] = unicodeCharCode;
629634
}.bind(this));
@@ -722,7 +727,7 @@ var Font = (function FontClosure() {
722727
function int16(b0, b1) {
723728
return (b0 << 8) + b1;
724729
}
725-
730+
726731
function signedInt16(b0, b1) {
727732
var value = (b0 << 8) + b1;
728733
return value & (1 << 15) ? value - 0x10000 : value;
@@ -2283,6 +2288,26 @@ var Font = (function FontClosure() {
22832288
return false;
22842289
}
22852290

2291+
// Some bad PDF generators, e.g. Scribus PDF, include glyph names
2292+
// in a 'uniXXXX' format -- attempting to recover proper ones.
2293+
function recoverGlyphName(name, glyphsUnicodeMap) {
2294+
if (glyphsUnicodeMap[name] !== undefined) {
2295+
return name;
2296+
}
2297+
// The glyph name is non-standard, trying to recover.
2298+
var unicode = getUnicodeForGlyph(name, glyphsUnicodeMap);
2299+
if (unicode !== -1) {
2300+
for (var key in glyphsUnicodeMap) {
2301+
if (glyphsUnicodeMap[key] === unicode) {
2302+
return key;
2303+
}
2304+
}
2305+
}
2306+
warn('Unable to recover a standard glyph name for: ' + name);
2307+
return name;
2308+
}
2309+
2310+
22862311
if (properties.type === 'CIDFontType2') {
22872312
var cidToGidMap = properties.cidToGidMap || [];
22882313
var isCidToGidMapEmpty = cidToGidMap.length === 0;
@@ -2337,7 +2362,7 @@ var Font = (function FontClosure() {
23372362
}
23382363
var glyphsUnicodeMap = getGlyphsUnicode();
23392364
for (charCode = 0; charCode < 256; charCode++) {
2340-
var glyphName;
2365+
var glyphName, standardGlyphName;
23412366
if (this.differences && charCode in this.differences) {
23422367
glyphName = this.differences[charCode];
23432368
} else if (charCode in baseEncoding &&
@@ -2349,13 +2374,16 @@ var Font = (function FontClosure() {
23492374
if (!glyphName) {
23502375
continue;
23512376
}
2377+
// Ensure that non-standard glyph names are resolved to valid ones.
2378+
standardGlyphName = recoverGlyphName(glyphName, glyphsUnicodeMap);
2379+
23522380
var unicodeOrCharCode, isUnicode = false;
23532381
if (cmapPlatformId === 3 && cmapEncodingId === 1) {
2354-
unicodeOrCharCode = glyphsUnicodeMap[glyphName];
2382+
unicodeOrCharCode = glyphsUnicodeMap[standardGlyphName];
23552383
isUnicode = true;
23562384
} else if (cmapPlatformId === 1 && cmapEncodingId === 0) {
23572385
// TODO: the encoding needs to be updated with mac os table.
2358-
unicodeOrCharCode = MacRomanEncoding.indexOf(glyphName);
2386+
unicodeOrCharCode = MacRomanEncoding.indexOf(standardGlyphName);
23592387
}
23602388

23612389
var found = false;
@@ -2373,6 +2401,11 @@ var Font = (function FontClosure() {
23732401
if (!found && properties.glyphNames) {
23742402
// Try to map using the post table.
23752403
var glyphId = properties.glyphNames.indexOf(glyphName);
2404+
// The post table ought to use the same kind of glyph names as the
2405+
// `differences` array, but check the standard ones as a fallback.
2406+
if (glyphId === -1 && standardGlyphName !== glyphName) {
2407+
glyphId = properties.glyphNames.indexOf(standardGlyphName);
2408+
}
23762409
if (glyphId > 0 && hasGlyph(glyphId, -1, -1)) {
23772410
charCodeToGlyphId[charCode] = glyphId;
23782411
found = true;
@@ -2686,6 +2719,12 @@ var Font = (function FontClosure() {
26862719
code = +glyphName.substr(1);
26872720
}
26882721
break;
2722+
default:
2723+
// 'uniXXXX'/'uXXXX{XX}' glyphs
2724+
var unicode = getUnicodeForGlyph(glyphName, glyphsUnicodeMap);
2725+
if (unicode !== -1) {
2726+
code = unicode;
2727+
}
26892728
}
26902729
if (code) {
26912730
// If |baseEncodingName| is one the predefined encodings,

src/core/unicode.js

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,36 @@
6565
return code;
6666
}
6767

68+
function getUnicodeForGlyph(name, glyphsUnicodeMap) {
69+
var unicode = glyphsUnicodeMap[name];
70+
if (unicode !== undefined) {
71+
return unicode;
72+
}
73+
if (!name) {
74+
return -1;
75+
}
76+
// Try to recover valid Unicode values from 'uniXXXX'/'uXXXX{XX}' glyphs.
77+
if (name[0] === 'u') {
78+
var nameLen = name.length, hexStr;
79+
80+
if (nameLen === 7 && name[1] === 'n' && name[2] === 'i') { // 'uniXXXX'
81+
hexStr = name.substr(3);
82+
} else if (nameLen >= 5 && nameLen <= 7) { // 'uXXXX{XX}'
83+
hexStr = name.substr(1);
84+
} else {
85+
return -1;
86+
}
87+
// Check for upper-case hexadecimal characters, to avoid false positives.
88+
if (hexStr === hexStr.toUpperCase()) {
89+
unicode = parseInt(hexStr, 16);
90+
if (unicode >= 0) {
91+
return unicode;
92+
}
93+
}
94+
}
95+
return -1;
96+
}
97+
6898
var UnicodeRanges = [
6999
{ 'begin': 0x0000, 'end': 0x007F }, // Basic Latin
70100
{ 'begin': 0x0080, 'end': 0x00FF }, // Latin-1 Supplement
@@ -1612,4 +1642,5 @@
16121642
exports.reverseIfRtl = reverseIfRtl;
16131643
exports.getUnicodeRangeFor = getUnicodeRangeFor;
16141644
exports.getNormalizedUnicodes = getNormalizedUnicodes;
1645+
exports.getUnicodeForGlyph = getUnicodeForGlyph;
16151646
}));

test/pdfs/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
!bug1200096.pdf
3333
!issue5564_reduced.pdf
3434
!canvas.pdf
35+
!bug1132849.pdf
36+
!issue6894.pdf
3537
!issue5804.pdf
3638
!ShowText-ShadingPattern.pdf
3739
!complex_ttf_font.pdf

test/pdfs/bug1132849.pdf

70.9 KB
Binary file not shown.

test/pdfs/issue6894.pdf

1.31 MB
Binary file not shown.

test/test_manifest.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,20 @@
728728
"rounds": 1,
729729
"type": "eq"
730730
},
731+
{ "id": "bug1132849",
732+
"file": "pdfs/bug1132849.pdf",
733+
"md5": "aedfbead1f8feb35cf2e38b279133b47",
734+
"rounds": 1,
735+
"link": false,
736+
"type": "eq"
737+
},
738+
{ "id": "issue6894",
739+
"file": "pdfs/issue6894.pdf",
740+
"md5": "bb84f2025c11f23cf436170049f81215",
741+
"rounds": 1,
742+
"link": false,
743+
"type": "eq"
744+
},
731745
{ "id": "personwithdog",
732746
"file": "pdfs/personwithdog.pdf",
733747
"md5": "cd68fb2ce00dab97801b3e51495b99e3",

0 commit comments

Comments
 (0)