+ toUnicode[code] = 0;
+ }
+ }
+
+ // pass 2: try to fill in the missing chars, looking for names of
+ // the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B'
+ // are any letters, 'xx' is two hex digits, and 'nn' is 2-4
+ // decimal digits
+ if (missing && globalParams->getMapNumericCharNames()) {
+ for (code = 0; code < 256; ++code) {
+ if ((charName = enc[code]) && !toUnicode[code] &&
+ strcmp(charName, ".notdef")) {
+ n = strlen(charName);
+ code2 = -1;
+ if (hex && n == 3 && isalpha(charName[0]) &&
+ isxdigit(charName[1]) && isxdigit(charName[2])) {
+ sscanf(charName+1, "%x", &code2);
+ } else if (hex && n == 2 &&
+ isxdigit(charName[0]) && isxdigit(charName[1])) {
+ sscanf(charName, "%x", &code2);
+ } else if (!hex && n >= 2 && n <= 4 &&
+ isdigit(charName[0]) && isdigit(charName[1])) {
+ code2 = atoi(charName);
+ } else if (n >= 3 && n <= 5 &&
+ isdigit(charName[1]) && isdigit(charName[2])) {
+ code2 = atoi(charName+1);
+ } else if (n >= 4 && n <= 6 &&
+ isdigit(charName[2]) && isdigit(charName[3])) {
+ code2 = atoi(charName+2);
+ }
+ if (code2 >= 0 && code2 <= 0xff) {
+ toUnicode[code] = (Unicode)code2;
+ }
+ }
+ }
+ }
+
+ // construct the char code -> Unicode mapping object
+ ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
+
+ // merge in a ToUnicode CMap, if there is one -- this overwrites
+ // existing entries in ctu, i.e., the ToUnicode CMap takes
+ // precedence, but the other encoding info is allowed to fill in any
+ // holes
+ readToUnicodeCMap(fontDict, 8, ctu);
+
+ // look for a Unicode-to-Unicode mapping
+ if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
+ for (i = 0; i < 256; ++i) {
+ toUnicode[i] = 0;
+ }
+ ctu2 = CharCodeToUnicode::make8BitToUnicode(toUnicode);
+ for (i = 0; i < 256; ++i) {
+ n = ctu->mapToUnicode((CharCode)i, uBuf, 8);
+ if (n >= 1) {
+ n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
+ if (n >= 1) {
+ ctu2->setMapping((CharCode)i, uBuf, n);
+ }
+ }
+ }
+ utu->decRefCnt();
+ delete ctu;
+ ctu = ctu2;
+ }
+
+ //----- get the character widths -----
+
+ // initialize all widths
+ for (code = 0; code < 256; ++code) {
+ widths[code] = missingWidth * 0.001;
+ }
+
+ // use widths from font dict, if present
+ fontDict->lookup("FirstChar", &obj1);
+ firstChar = obj1.isInt() ? obj1.getInt() : 0;
+ obj1.free();
+ if (firstChar < 0 || firstChar > 255) {
+ firstChar = 0;
+ }
+ fontDict->lookup("LastChar", &obj1);
+ lastChar = obj1.isInt() ? obj1.getInt() : 255;
+ obj1.free();
+ if (lastChar < 0 || lastChar > 255) {
+ lastChar = 255;
+ }
+ mul = (type == fontType3) ? fontMat[0] : 0.001;
+ fontDict->lookup("Widths", &obj1);
+ if (obj1.isArray()) {
+ flags |= fontFixedWidth;
+ if (obj1.arrayGetLength() < lastChar - firstChar + 1) {
+ lastChar = firstChar + obj1.arrayGetLength() - 1;
+ }
+ for (code = firstChar; code <= lastChar; ++code) {
+ obj1.arrayGet(code - firstChar, &obj2);
+ if (obj2.isNum()) {
+ widths[code] = obj2.getNum() * mul;
+ if (widths[code] != widths[firstChar]) {
+ flags &= ~fontFixedWidth;
+ }
+ }
+ obj2.free();
+ }
+
+ // use widths from built-in font
+ } else if (builtinFont) {
+ // this is a kludge for broken PDF files that encode char 32
+ // as .notdef
+ if (builtinFont->widths->getWidth("space", &w)) {
+ widths[32] = 0.001 * w;
+ }
+ for (code = 0; code < 256; ++code) {
+ if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
+ widths[code] = 0.001 * w;
+ }
+ }
+
+ // couldn't find widths -- use defaults
+ } else {
+ // this is technically an error -- the Widths entry is required
+ // for all but the Base-14 fonts -- but certain PDF generators
+ // apparently don't include widths for Arial and TimesNewRoman
+ if (isFixedWidth()) {
+ i = 0;
+ } else if (isSerif()) {
+ i = 8;
+ } else {
+ i = 4;
+ }
+ if (isBold()) {
+ i += 2;
+ }
+ if (isItalic()) {
+ i += 1;
+ }
+ builtinFont = builtinFontSubst[i];
+ // this is a kludge for broken PDF files that encode char 32
+ // as .notdef
+ if (builtinFont->widths->getWidth("space", &w)) {
+ widths[32] = 0.001 * w;
+ }
+ for (code = 0; code < 256; ++code) {
+ if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
+ widths[code] = 0.001 * w;
+ }
+ }
+ }
+ obj1.free();
+
+ ok = gTrue;
+}
+
+Gfx8BitFont::~Gfx8BitFont() {
+ int i;
+
+ for (i = 0; i < 256; ++i) {
+ if (encFree[i] && enc[i]) {
+ gfree(enc[i]);
+ }
+ }
+ ctu->decRefCnt();
+ if (charProcs.isDict()) {
+ charProcs.free();
+ }
+ if (resources.isDict()) {
+ resources.free();
+ }
+}
+
+int Gfx8BitFont::getNextChar(char *s, int len, CharCode *code,
+ Unicode *u, int uSize, int *uLen,
+ double *dx, double *dy, double *ox, double *oy) {
+ CharCode c;
+
+ *code = c = (CharCode)(*s & 0xff);
+ *uLen = ctu->mapToUnicode(c, u, uSize);
+ *dx = widths[c];
+ *dy = *ox = *oy = 0;
+ return 1;
+}
+
+CharCodeToUnicode *Gfx8BitFont::getToUnicode() {
+ ctu->incRefCnt();
+ return ctu;
+}
+
+Gushort *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) {
+ Gushort *map;
+ int cmapPlatform, cmapEncoding;
+ int unicodeCmap, macRomanCmap, msSymbolCmap, cmap;
+ GBool useMacRoman, useUnicode;
+ char *charName;
+ Unicode u;
+ int code, i, n;
+
+ map = (Gushort *)gmalloc(256 * sizeof(Gushort));
+ for (i = 0; i < 256; ++i) {
+ map[i] = 0;
+ }
+
+ // To match up with the Adobe-defined behaviour, we choose a cmap
+ // like this:
+ // 1. If the PDF font has an encoding:
+ // 1a. If the PDF font specified MacRomanEncoding and the
+ // TrueType font has a Macintosh Roman cmap, use it, and
+ // reverse map the char names through MacRomanEncoding to
+ // get char codes.
+ // 1b. If the TrueType font has a Microsoft Unicode cmap or a
+ // non-Microsoft Unicode cmap, use it, and use the Unicode
+ // indexes, not the char codes.
+ // 1c. If the PDF font is symbolic and the TrueType font has a
+ // Microsoft Symbol cmap, use it, and use char codes
+ // directly (possibly with an offset of 0xf000).
+ // 1d. If the TrueType font has a Macintosh Roman cmap, use it,
+ // as in case 1a.
+ // 2. If the PDF font does not have an encoding:
+ // 2a. If the TrueType font has a Macintosh Roman cmap, use it,
+ // and use char codes directly (possibly with an offset of
+ // 0xf000).
+ // 2b. If the TrueType font has a Microsoft Symbol cmap, use it,
+ // and use char codes directly (possible with an offset of
+ // 0xf000).
+ // 3. If none of these rules apply, use the first cmap and hope for
+ // the best (this shouldn't happen).
+ unicodeCmap = macRomanCmap = msSymbolCmap = -1;
+ for (i = 0; i < ff->getNumCmaps(); ++i) {
+ cmapPlatform = ff->getCmapPlatform(i);
+ cmapEncoding = ff->getCmapEncoding(i);
+ if ((cmapPlatform == 3 && cmapEncoding == 1) ||
+ cmapPlatform == 0) {
+ unicodeCmap = i;
+ } else if (cmapPlatform == 1 && cmapEncoding == 0) {
+ macRomanCmap = i;
+ } else if (cmapPlatform == 3 && cmapEncoding == 0) {
+ msSymbolCmap = i;
+ }
+ }
+ cmap = 0;
+ useMacRoman = gFalse;
+ useUnicode = gFalse;
+ if (hasEncoding) {
+ if (usesMacRomanEnc && macRomanCmap >= 0) {
+ cmap = macRomanCmap;
+ useMacRoman = gTrue;
+ } else if (unicodeCmap >= 0) {
+ cmap = unicodeCmap;
+ useUnicode = gTrue;
+ } else if ((flags & fontSymbolic) && msSymbolCmap >= 0) {
+ cmap = msSymbolCmap;
+ } else if (macRomanCmap >= 0) {
+ cmap = macRomanCmap;
+ useMacRoman = gTrue;
+ }
+ } else {
+ if (macRomanCmap >= 0) {
+ cmap = macRomanCmap;
+ } else if (msSymbolCmap >= 0) {
+ cmap = msSymbolCmap;
+ }
+ }
+
+ // reverse map the char names through MacRomanEncoding, then map the
+ // char codes through the cmap
+ if (useMacRoman) {
+ for (i = 0; i < 256; ++i) {
+ if ((charName = enc[i])) {
+ if ((code = globalParams->getMacRomanCharCode(charName))) {
+ map[i] = ff->mapCodeToGID(cmap, code);
+ }
+ }
+ }
+
+ // map Unicode through the cmap
+ } else if (useUnicode) {
+ for (i = 0; i < 256; ++i) {
+ if ((n = ctu->mapToUnicode((CharCode)i, &u, 1))) {
+ map[i] = ff->mapCodeToGID(cmap, u);
+ }
+ }
+
+ // map the char codes through the cmap, possibly with an offset of
+ // 0xf000
+ } else {
+ for (i = 0; i < 256; ++i) {
+ if (!(map[i] = ff->mapCodeToGID(cmap, i))) {
+ map[i] = ff->mapCodeToGID(cmap, 0xf000 + i);
+ }