+
+ // check embedded or external font file for base encoding
+ // (only for Type 1 fonts - trying to get an encoding out of a
+ // TrueType font is a losing proposition)
+ ffT1 = NULL;
+ ffT1C = NULL;
+ buf = NULL;
+ if (type == fontType1 && (extFontFile || embFontID.num >= 0)) {
+ if (extFontFile) {
+ ffT1 = FoFiType1::load(extFontFile->getCString());
+ } else {
+ buf = readEmbFontFile(xref, &len);
+ ffT1 = FoFiType1::make(buf, len);
+ }
+ if (ffT1) {
+ if (ffT1->getName()) {
+ if (embFontName) {
+ delete embFontName;
+ }
+ embFontName = new GString(ffT1->getName());
+ }
+ if (!baseEnc) {
+ baseEnc = ffT1->getEncoding();
+ baseEncFromFontFile = gTrue;
+ }
+ }
+ } else if (type == fontType1C && (extFontFile || embFontID.num >= 0)) {
+ if (extFontFile) {
+ ffT1C = FoFiType1C::load(extFontFile->getCString());
+ } else {
+ buf = readEmbFontFile(xref, &len);
+ ffT1C = FoFiType1C::make(buf, len);
+ }
+ if (ffT1C) {
+ if (ffT1C->getName()) {
+ if (embFontName) {
+ delete embFontName;
+ }
+ embFontName = new GString(ffT1C->getName());
+ }
+ if (!baseEnc) {
+ baseEnc = ffT1C->getEncoding();
+ baseEncFromFontFile = gTrue;
+ }
+ }
+ }
+ if (buf) {
+ gfree(buf);
+ }
+
+ // get default base encoding
+ if (!baseEnc) {
+ if (builtinFont && embFontID.num < 0) {
+ baseEnc = builtinFont->defaultBaseEnc;
+ hasEncoding = gTrue;
+ } else if (type == fontTrueType) {
+ baseEnc = winAnsiEncoding;
+ } else {
+ baseEnc = standardEncoding;
+ }
+ }
+
+ // copy the base encoding
+ for (i = 0; i < 256; ++i) {
+ enc[i] = baseEnc[i];
+ if ((encFree[i] = baseEncFromFontFile) && enc[i]) {
+ enc[i] = copyString(baseEnc[i]);
+ }
+ }
+
+ // some Type 1C font files have empty encodings, which can break the
+ // T1C->T1 conversion (since the 'seac' operator depends on having
+ // the accents in the encoding), so we fill in any gaps from
+ // StandardEncoding
+ if (type == fontType1C && (extFontFile || embFontID.num >= 0) &&
+ baseEncFromFontFile) {
+ for (i = 0; i < 256; ++i) {
+ if (!enc[i] && standardEncoding[i]) {
+ enc[i] = standardEncoding[i];
+ encFree[i] = gFalse;
+ }
+ }
+ }
+
+ // merge differences into encoding
+ if (obj1.isDict()) {
+ obj1.dictLookup("Differences", &obj2);
+ if (obj2.isArray()) {
+ hasEncoding = gTrue;
+ code = 0;
+ for (i = 0; i < obj2.arrayGetLength(); ++i) {
+ obj2.arrayGet(i, &obj3);
+ if (obj3.isInt()) {
+ code = obj3.getInt();
+ } else if (obj3.isName()) {
+ if (code >= 0 && code < 256) {
+ if (encFree[code]) {
+ gfree(enc[code]);
+ }
+ enc[code] = copyString(obj3.getName());
+ encFree[code] = gTrue;
+ }
+ ++code;
+ } else {
+ error(-1, "Wrong type in font encoding resource differences (%s)",
+ obj3.getTypeName());
+ }
+ obj3.free();
+ }
+ }
+ obj2.free();
+ }
+ obj1.free();
+ if (ffT1) {
+ delete ffT1;
+ }
+ if (ffT1C) {
+ delete ffT1C;
+ }
+
+ //----- build the mapping to Unicode -----
+
+ // pass 1: use the name-to-Unicode mapping table
+ missing = hex = gFalse;
+ for (code = 0; code < 256; ++code) {
+ if ((charName = enc[code])) {
+ if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) &&
+ strcmp(charName, ".notdef")) {
+ // if it wasn't in the name-to-Unicode table, check for a
+ // name that looks like 'Axx' or 'xx', where 'A' is any letter
+ // and 'xx' is two hex digits
+ if ((strlen(charName) == 3 &&
+ isalpha(charName[0]) &&
+ isxdigit(charName[1]) && isxdigit(charName[2]) &&
+ ((charName[1] >= 'a' && charName[1] <= 'f') ||
+ (charName[1] >= 'A' && charName[1] <= 'F') ||
+ (charName[2] >= 'a' && charName[2] <= 'f') ||
+ (charName[2] >= 'A' && charName[2] <= 'F'))) ||
+ (strlen(charName) == 2 &&
+ isxdigit(charName[0]) && isxdigit(charName[1]) &&
+ ((charName[0] >= 'a' && charName[0] <= 'f') ||
+ (charName[0] >= 'A' && charName[0] <= 'F') ||
+ (charName[1] >= 'a' && charName[1] <= 'f') ||
+ (charName[1] >= 'A' && charName[1] <= 'F')))) {
+ hex = gTrue;
+ }
+ missing = gTrue;
+ }
+ } else {
+ toUnicode[code] = 0;
+ }
+ }
+
+ // pass 2: try to fill in the missing chars, looking for names of
+ // the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B'
+ // are any letters, 'xx' is two hex digits, and 'nn' is 2-4
+ // decimal digits
+ if (missing && globalParams->getMapNumericCharNames()) {
+ for (code = 0; code < 256; ++code) {
+ if ((charName = enc[code]) && !toUnicode[code] &&
+ strcmp(charName, ".notdef")) {
+ n = strlen(charName);
+ code2 = -1;
+ if (hex && n == 3 && isalpha(charName[0]) &&
+ isxdigit(charName[1]) && isxdigit(charName[2])) {
+ sscanf(charName+1, "%x", &code2);
+ } else if (hex && n == 2 &&
+ isxdigit(charName[0]) && isxdigit(charName[1])) {
+ sscanf(charName, "%x", &code2);
+ } else if (!hex && n >= 2 && n <= 4 &&
+ isdigit(charName[0]) && isdigit(charName[1])) {
+ code2 = atoi(charName);
+ } else if (n >= 3 && n <= 5 &&
+ isdigit(charName[1]) && isdigit(charName[2])) {
+ code2 = atoi(charName+1);
+ } else if (n >= 4 && n <= 6 &&
+ isdigit(charName[2]) && isdigit(charName[3])) {
+ code2 = atoi(charName+2);
+ }
+ if (code2 >= 0 && code2 <= 0xff) {
+ toUnicode[code] = (Unicode)code2;
+ }
+ }
+ }
+ }
+
+ // construct the char code -> Unicode mapping object
+ ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
+
+ // merge in a ToUnicode CMap, if there is one -- this overwrites
+ // existing entries in ctu, i.e., the ToUnicode CMap takes
+ // precedence, but the other encoding info is allowed to fill in any
+ // holes
+ readToUnicodeCMap(fontDict, 8, ctu);
+
+ // look for a Unicode-to-Unicode mapping
+ if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
+ for (i = 0; i < 256; ++i) {
+ toUnicode[i] = 0;
+ }
+ ctu2 = CharCodeToUnicode::make8BitToUnicode(toUnicode);
+ for (i = 0; i < 256; ++i) {
+ n = ctu->mapToUnicode((CharCode)i, uBuf, 8);
+ if (n >= 1) {
+ n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
+ if (n >= 1) {
+ ctu2->setMapping((CharCode)i, uBuf, n);
+ }
+ }
+ }
+ utu->decRefCnt();
+ delete ctu;
+ ctu = ctu2;
+ }
+
+ //----- get the character widths -----
+
+ // initialize all widths
+ for (code = 0; code < 256; ++code) {
+ widths[code] = missingWidth * 0.001;
+ }
+
+ // use widths from font dict, if present
+ fontDict->lookup("FirstChar", &obj1);
+ firstChar = obj1.isInt() ? obj1.getInt() : 0;
+ obj1.free();
+ if (firstChar < 0 || firstChar > 255) {
+ firstChar = 0;
+ }
+ fontDict->lookup("LastChar", &obj1);
+ lastChar = obj1.isInt() ? obj1.getInt() : 255;
+ obj1.free();
+ if (lastChar < 0 || lastChar > 255) {
+ lastChar = 255;
+ }
+ mul = (type == fontType3) ? fontMat[0] : 0.001;
+ fontDict->lookup("Widths", &obj1);
+ if (obj1.isArray()) {
+ flags |= fontFixedWidth;
+ if (obj1.arrayGetLength() < lastChar - firstChar + 1) {
+ lastChar = firstChar + obj1.arrayGetLength() - 1;
+ }
+ for (code = firstChar; code <= lastChar; ++code) {
+ obj1.arrayGet(code - firstChar, &obj2);
+ if (obj2.isNum()) {
+ widths[code] = obj2.getNum() * mul;
+ if (widths[code] != widths[firstChar]) {
+ flags &= ~fontFixedWidth;
+ }
+ }
+ obj2.free();
+ }
+
+ // use widths from built-in font
+ } else if (builtinFont) {
+ // this is a kludge for broken PDF files that encode char 32
+ // as .notdef
+ if (builtinFont->widths->getWidth("space", &w)) {
+ widths[32] = 0.001 * w;
+ }
+ for (code = 0; code < 256; ++code) {
+ if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
+ widths[code] = 0.001 * w;
+ }
+ }
+
+ // couldn't find widths -- use defaults
+ } else {
+ // this is technically an error -- the Widths entry is required
+ // for all but the Base-14 fonts -- but certain PDF generators
+ // apparently don't include widths for Arial and TimesNewRoman
+ if (isFixedWidth()) {
+ i = 0;
+ } else if (isSerif()) {
+ i = 8;