1 //========================================================================
3 // CharCodeToUnicode.cc
5 // Copyright 2001-2002 Glyph & Cog, LLC
7 //========================================================================
10 #pragma implementation
20 #include "GlobalParams.h"
21 #include "PSTokenizer.h"
22 #include "CharCodeToUnicode.h"
24 //------------------------------------------------------------------------
26 #define maxUnicodeString 8
28 struct CharCodeToUnicodeString {
30 Unicode u[maxUnicodeString];
34 //------------------------------------------------------------------------
36 static int getCharFromString(void *data) {
50 static int getCharFromFile(void *data) {
51 return fgetc((FILE *)data);
54 //------------------------------------------------------------------------
56 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *collectionA) {
59 CharCode size, mapLenA;
62 CharCodeToUnicode *ctu;
64 if (!(f = globalParams->getCIDToUnicodeFile(collectionA))) {
65 error(-1, "Couldn't find cidToUnicode file for the '%s' collection",
66 collectionA->getCString());
71 mapA = (Unicode *)gmalloc(size * sizeof(Unicode));
74 while (getLine(buf, sizeof(buf), f)) {
75 if (mapLenA == size) {
77 mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode));
79 if (sscanf(buf, "%x", &u) == 1) {
82 error(-1, "Bad line (%d) in cidToUnicode file for the '%s' collection",
83 (int)(mapLenA + 1), collectionA->getCString());
89 ctu = new CharCodeToUnicode(collectionA->copy(), mapA, mapLenA, gTrue,
95 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
96 return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0);
99 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
100 CharCodeToUnicode *ctu;
103 ctu = new CharCodeToUnicode(NULL);
104 p = buf->getCString();
105 ctu->parseCMap1(&getCharFromString, &p, nBits);
109 void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
112 char tok1[256], tok2[256], tok3[256];
113 int nDigits, n1, n2, n3;
115 CharCode code1, code2;
123 pst = new PSTokenizer(getCharFunc, data);
124 pst->getToken(tok1, sizeof(tok1), &n1);
125 while (pst->getToken(tok2, sizeof(tok2), &n2)) {
126 if (!strcmp(tok2, "usecmap")) {
127 if (tok1[0] == '/') {
128 name = new GString(tok1 + 1);
129 if ((f = globalParams->findToUnicodeFile(name))) {
130 parseCMap1(&getCharFromFile, f, nBits);
133 error(-1, "Couldn't find ToUnicode CMap file for '%s'",
138 pst->getToken(tok1, sizeof(tok1), &n1);
139 } else if (!strcmp(tok2, "beginbfchar")) {
140 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
141 if (!strcmp(tok1, "endbfchar")) {
144 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
145 !strcmp(tok2, "endbfchar")) {
146 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
149 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
150 tok2[0] == '<' && tok2[n2 - 1] == '>')) {
151 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
154 tok1[n1 - 1] = tok2[n2 - 1] = '\0';
155 if (sscanf(tok1 + 1, "%x", &code1) != 1) {
156 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
159 if (code1 >= mapLen) {
161 mapLen = (code1 + 256) & ~255;
162 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
163 for (i = oldLen; i < mapLen; ++i) {
168 if (sscanf(tok2 + 1, "%x", &u) != 1) {
169 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
175 if (sMapLen == sMapSize) {
177 sMap = (CharCodeToUnicodeString *)
178 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
180 sMap[sMapLen].c = code1;
181 sMap[sMapLen].len = (n2 - 2) / 4;
182 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
183 strncpy(uHex, tok2 + 1 + j*4, 4);
185 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
186 error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
192 pst->getToken(tok1, sizeof(tok1), &n1);
193 } else if (!strcmp(tok2, "beginbfrange")) {
194 while (pst->getToken(tok1, sizeof(tok1), &n1)) {
195 if (!strcmp(tok1, "endbfrange")) {
198 if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
199 !strcmp(tok2, "endbfrange") ||
200 !pst->getToken(tok3, sizeof(tok3), &n3) ||
201 !strcmp(tok3, "endbfrange")) {
202 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
205 if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
206 n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' &&
207 tok3[0] == '<' && tok3[n3 - 1] == '>')) {
208 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
211 tok1[n1 - 1] = tok2[n2 - 1] = tok3[n3 - 1] = '\0';
212 if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
213 sscanf(tok2 + 1, "%x", &code2) != 1) {
214 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
217 if (code2 >= mapLen) {
219 mapLen = (code2 + 256) & ~255;
220 map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
221 for (i = oldLen; i < mapLen; ++i) {
226 if (sscanf(tok3 + 1, "%x", &u) != 1) {
227 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
230 for (; code1 <= code2; ++code1) {
234 if (sMapLen + (int)(code2 - code1 + 1) > sMapSize) {
235 sMapSize = (sMapSize + (code2 - code1 + 1) + 7) & ~7;
236 sMap = (CharCodeToUnicodeString *)
237 grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
239 for (i = 0; code1 <= code2; ++code1, ++i) {
241 sMap[sMapLen].c = code1;
242 sMap[sMapLen].len = (n3 - 2) / 4;
243 for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
244 strncpy(uHex, tok3 + 1 + j*4, 4);
246 if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
247 error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
250 sMap[sMapLen].u[sMap[sMapLen].len - 1] += i;
255 pst->getToken(tok1, sizeof(tok1), &n1);
263 CharCodeToUnicode::CharCodeToUnicode(GString *collectionA) {
266 collection = collectionA;
268 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
269 for (i = 0; i < mapLen; ++i) {
273 sMapLen = sMapSize = 0;
277 CharCodeToUnicode::CharCodeToUnicode(GString *collectionA, Unicode *mapA,
278 CharCode mapLenA, GBool copyMap,
279 CharCodeToUnicodeString *sMapA,
281 collection = collectionA;
284 map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
285 memcpy(map, mapA, mapLen * sizeof(Unicode));
290 sMapLen = sMapSize = sMapLenA;
294 CharCodeToUnicode::~CharCodeToUnicode() {
304 void CharCodeToUnicode::incRefCnt() {
308 void CharCodeToUnicode::decRefCnt() {
314 GBool CharCodeToUnicode::match(GString *collectionA) {
315 return collection && !collection->cmp(collectionA);
318 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
328 for (i = 0; i < sMapLen; ++i) {
329 if (sMap[i].c == c) {
330 for (j = 0; j < sMap[i].len && j < size; ++j) {
339 //------------------------------------------------------------------------
341 CIDToUnicodeCache::CIDToUnicodeCache() {
344 for (i = 0; i < cidToUnicodeCacheSize; ++i) {
349 CIDToUnicodeCache::~CIDToUnicodeCache() {
352 for (i = 0; i < cidToUnicodeCacheSize; ++i) {
354 cache[i]->decRefCnt();
359 CharCodeToUnicode *CIDToUnicodeCache::getCIDToUnicode(GString *collection) {
360 CharCodeToUnicode *ctu;
363 if (cache[0] && cache[0]->match(collection)) {
364 cache[0]->incRefCnt();
367 for (i = 1; i < cidToUnicodeCacheSize; ++i) {
368 if (cache[i] && cache[i]->match(collection)) {
370 for (j = i; j >= 1; --j) {
371 cache[j] = cache[j - 1];
378 if ((ctu = CharCodeToUnicode::parseCIDToUnicode(collection))) {
379 if (cache[cidToUnicodeCacheSize - 1]) {
380 cache[cidToUnicodeCacheSize - 1]->decRefCnt();
382 for (j = cidToUnicodeCacheSize - 1; j >= 1; --j) {
383 cache[j] = cache[j - 1];