1 //========================================================================
5 // Copyright 1996-2003 Glyph & Cog, LLC
7 //========================================================================
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
26 #include "ErrorCodes.h"
29 //------------------------------------------------------------------------
31 #define xrefSearchSize 1024 // read this many bytes at end of file
32 // to look for 'startxref'
34 //------------------------------------------------------------------------
36 //------------------------------------------------------------------------
38 #define permPrint (1<<2)
39 #define permChange (1<<3)
40 #define permCopy (1<<4)
41 #define permNotes (1<<5)
42 #define defPermFlags 0xfffc
44 //------------------------------------------------------------------------
46 //------------------------------------------------------------------------
51 // Create an object stream, using object number <objStrNum>,
53 ObjectStream(XRef *xref, int objStrNumA);
57 // Return the object number of this object stream.
58 int getObjStrNum() { return objStrNum; }
60 // Get the <objIdx>th object from this stream, which should be
61 // object number <objNum>, generation 0.
62 Object *getObject(int objIdx, int objNum, Object *obj);
66 int objStrNum; // object number of the object stream
67 int nObjects; // number of objects in the stream
68 Object *objs; // the objects (length = nObjects)
69 int *objNums; // the object numbers (length = nObjects)
72 ObjectStream::ObjectStream(XRef *xref, int objStrNumA) {
76 Object objStr, obj1, obj2;
79 objStrNum = objStrNumA;
84 if (!xref->fetch(objStrNum, 0, &objStr)->isStream()) {
88 if (!objStr.streamGetDict()->lookup("N", &obj1)->isInt()) {
92 nObjects = obj1.getInt();
98 if (!objStr.streamGetDict()->lookup("First", &obj1)->isInt()) {
102 first = obj1.getInt();
108 objs = new Object[nObjects];
109 objNums = (int *)gmallocn(nObjects, sizeof(int));
110 offsets = (int *)gmallocn(nObjects, sizeof(int));
112 // parse the header: object numbers and offsets
113 objStr.streamReset();
115 str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first);
116 parser = new Parser(xref, new Lexer(xref, str));
117 for (i = 0; i < nObjects; ++i) {
118 parser->getObj(&obj1);
119 parser->getObj(&obj2);
120 if (!obj1.isInt() || !obj2.isInt()) {
127 objNums[i] = obj1.getInt();
128 offsets[i] = obj2.getInt();
131 if (objNums[i] < 0 || offsets[i] < 0 ||
132 (i > 0 && offsets[i] < offsets[i-1])) {
138 while (str->getChar() != EOF) ;
141 // skip to the first object - this shouldn't be necessary because
142 // the First key is supposed to be equal to offsets[0], but just in
144 for (i = first; i < offsets[0]; ++i) {
145 objStr.getStream()->getChar();
149 for (i = 0; i < nObjects; ++i) {
151 if (i == nObjects - 1) {
152 str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0);
154 str = new EmbedStream(objStr.getStream(), &obj1, gTrue,
155 offsets[i+1] - offsets[i]);
157 parser = new Parser(xref, new Lexer(xref, str));
158 parser->getObj(&objs[i]);
159 while (str->getChar() != EOF) ;
170 ObjectStream::~ObjectStream() {
174 for (i = 0; i < nObjects; ++i) {
182 Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) {
183 if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) {
184 return obj->initNull();
186 return objs[objIdx].copy(obj);
189 //------------------------------------------------------------------------
191 //------------------------------------------------------------------------
193 XRef::XRef(BaseStream *strA) {
206 permFlags = defPermFlags;
207 ownerPasswordOk = gFalse;
211 start = str->getStart();
212 pos = getStartXref();
214 // if there was a problem with the 'startxref' position, try to
215 // reconstruct the xref table
217 if (!(ok = constructXRef())) {
218 errCode = errDamaged;
222 // read the xref table
224 while (readXRef(&pos)) ;
226 // if there was a problem with the xref table,
227 // try to reconstruct it
229 if (!(ok = constructXRef())) {
230 errCode = errDamaged;
236 // get the root dictionary (catalog) object
237 trailerDict.dictLookupNF("Root", &obj);
239 rootNum = obj.getRefNum();
240 rootGen = obj.getRefGen();
244 if (!(ok = constructXRef())) {
245 errCode = errDamaged;
250 // now set the trailer dictionary's xref pointer so we can fetch
251 // indirect objects from it
252 trailerDict.getDict()->setXRef(this);
266 // Read the 'startxref' position.
267 Guint XRef::getStartXref() {
268 char buf[xrefSearchSize+1];
272 // read last xrefSearchSize bytes
273 str->setPos(xrefSearchSize, -1);
274 for (n = 0; n < xrefSearchSize; ++n) {
275 if ((c = str->getChar()) == EOF) {
283 for (i = n - 9; i >= 0; --i) {
284 if (!strncmp(&buf[i], "startxref", 9)) {
291 for (p = &buf[i+9]; isspace(*p); ++p) ;
292 lastXRefPos = strToUnsigned(p);
297 // Read one xref table section. Also reads the associated trailer
298 // dictionary, and returns the prev pointer (if any).
299 GBool XRef::readXRef(Guint *pos) {
304 // start up a parser, parse one token
306 parser = new Parser(NULL,
308 str->makeSubStream(start + *pos, gFalse, 0, &obj)));
309 parser->getObj(&obj);
311 // parse an old-style xref table
312 if (obj.isCmd("xref")) {
314 more = readXRefTable(parser, pos);
316 // parse an xref stream
317 } else if (obj.isInt()) {
319 if (!parser->getObj(&obj)->isInt()) {
323 if (!parser->getObj(&obj)->isCmd("obj")) {
327 if (!parser->getObj(&obj)->isStream()) {
330 more = readXRefStream(obj.getStream(), pos);
347 GBool XRef::readXRefTable(Parser *parser, Guint *pos) {
352 int first, n, newSize, i;
355 parser->getObj(&obj);
356 if (obj.isCmd("trailer")) {
363 first = obj.getInt();
365 if (!parser->getObj(&obj)->isInt()) {
370 if (first < 0 || n < 0 || first + n < 0) {
373 if (first + n > size) {
374 for (newSize = size ? 2 * size : 1024;
375 first + n > newSize && newSize > 0;
380 entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
381 for (i = size; i < newSize; ++i) {
382 entries[i].offset = 0xffffffff;
383 entries[i].type = xrefEntryFree;
387 for (i = first; i < first + n; ++i) {
388 if (!parser->getObj(&obj)->isInt()) {
391 entry.offset = (Guint)obj.getInt();
393 if (!parser->getObj(&obj)->isInt()) {
396 entry.gen = obj.getInt();
398 parser->getObj(&obj);
399 if (obj.isCmd("n")) {
400 entry.type = xrefEntryUncompressed;
401 } else if (obj.isCmd("f")) {
402 entry.type = xrefEntryFree;
407 if (entries[i].offset == 0xffffffff) {
409 // PDF files of patents from the IBM Intellectual Property
410 // Network have a bug: the xref table claims to start at 1
412 if (i == 1 && first == 1 &&
413 entries[1].offset == 0 && entries[1].gen == 65535 &&
414 entries[1].type == xrefEntryFree) {
416 entries[0] = entries[1];
417 entries[1].offset = 0xffffffff;
423 // read the trailer dictionary
424 if (!parser->getObj(&obj)->isDict()) {
428 // get the 'Prev' pointer
429 obj.getDict()->lookupNF("Prev", &obj2);
431 *pos = (Guint)obj2.getInt();
433 } else if (obj2.isRef()) {
434 // certain buggy PDF generators generate "/Prev NNN 0 R" instead
436 *pos = (Guint)obj2.getRefNum();
443 // save the first trailer dictionary
444 if (trailerDict.isNone()) {
445 obj.copy(&trailerDict);
448 // check for an 'XRefStm' key
449 if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) {
450 pos2 = (Guint)obj2.getInt();
468 GBool XRef::readXRefStream(Stream *xrefStr, Guint *pos) {
472 Object obj, obj2, idx;
473 int newSize, first, n, i;
475 dict = xrefStr->getDict();
477 if (!dict->lookupNF("Size", &obj)->isInt()) {
480 newSize = obj.getInt();
485 if (newSize > size) {
486 entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
487 for (i = size; i < newSize; ++i) {
488 entries[i].offset = 0xffffffff;
489 entries[i].type = xrefEntryFree;
494 if (!dict->lookupNF("W", &obj)->isArray() ||
495 obj.arrayGetLength() < 3) {
498 for (i = 0; i < 3; ++i) {
499 if (!obj.arrayGet(i, &obj2)->isInt()) {
503 w[i] = obj2.getInt();
505 if (w[i] < 0 || w[i] > 4) {
512 dict->lookupNF("Index", &idx);
514 for (i = 0; i+1 < idx.arrayGetLength(); i += 2) {
515 if (!idx.arrayGet(i, &obj)->isInt()) {
519 first = obj.getInt();
521 if (!idx.arrayGet(i+1, &obj)->isInt()) {
527 if (first < 0 || n < 0 ||
528 !readXRefStreamSection(xrefStr, w, first, n)) {
534 if (!readXRefStreamSection(xrefStr, w, 0, newSize)) {
541 dict->lookupNF("Prev", &obj);
543 *pos = (Guint)obj.getInt();
549 if (trailerDict.isNone()) {
550 trailerDict.initDict(dict);
562 GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) {
564 int type, gen, c, newSize, i, j;
569 if (first + n > size) {
570 for (newSize = size ? 2 * size : 1024;
571 first + n > newSize && newSize > 0;
576 entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry));
577 for (i = size; i < newSize; ++i) {
578 entries[i].offset = 0xffffffff;
579 entries[i].type = xrefEntryFree;
583 for (i = first; i < first + n; ++i) {
587 for (type = 0, j = 0; j < w[0]; ++j) {
588 if ((c = xrefStr->getChar()) == EOF) {
591 type = (type << 8) + c;
594 for (offset = 0, j = 0; j < w[1]; ++j) {
595 if ((c = xrefStr->getChar()) == EOF) {
598 offset = (offset << 8) + c;
600 for (gen = 0, j = 0; j < w[2]; ++j) {
601 if ((c = xrefStr->getChar()) == EOF) {
604 gen = (gen << 8) + c;
606 if (entries[i].offset == 0xffffffff) {
609 entries[i].offset = offset;
610 entries[i].gen = gen;
611 entries[i].type = xrefEntryFree;
614 entries[i].offset = offset;
615 entries[i].gen = gen;
616 entries[i].type = xrefEntryUncompressed;
619 entries[i].offset = offset;
620 entries[i].gen = gen;
621 entries[i].type = xrefEntryCompressed;
632 // Attempt to construct an xref table for a damaged file.
633 GBool XRef::constructXRef() {
635 Object newTrailerDict, obj;
649 error(0, "PDF file is damaged - attempting to reconstruct xref table...");
651 streamEndsLen = streamEndsSize = 0;
656 if (!str->getLine(buf, 256)) {
661 // got trailer dictionary
662 if (!strncmp(p, "trailer", 7)) {
664 parser = new Parser(NULL,
666 str->makeSubStream(pos + 7, gFalse, 0, &obj)));
667 parser->getObj(&newTrailerDict);
668 if (newTrailerDict.isDict()) {
669 newTrailerDict.dictLookupNF("Root", &obj);
671 rootNum = obj.getRefNum();
672 rootGen = obj.getRefGen();
673 if (!trailerDict.isNone()) {
676 newTrailerDict.copy(&trailerDict);
681 newTrailerDict.free();
685 } else if (isdigit(*p)) {
690 } while (*p && isdigit(*p));
694 } while (*p && isspace(*p));
699 } while (*p && isdigit(*p));
703 } while (*p && isspace(*p));
704 if (!strncmp(p, "obj", 3)) {
706 newSize = (num + 1 + 255) & ~255;
708 error(-1, "Bad object number");
711 entries = (XRefEntry *)
712 greallocn(entries, newSize, sizeof(XRefEntry));
713 for (i = size; i < newSize; ++i) {
714 entries[i].offset = 0xffffffff;
715 entries[i].type = xrefEntryFree;
719 if (entries[num].type == xrefEntryFree ||
720 gen >= entries[num].gen) {
721 entries[num].offset = pos - start;
722 entries[num].gen = gen;
723 entries[num].type = xrefEntryUncompressed;
731 } else if (!strncmp(p, "endstream", 9)) {
732 if (streamEndsLen == streamEndsSize) {
733 streamEndsSize += 64;
734 streamEnds = (Guint *)greallocn(streamEnds,
735 streamEndsSize, sizeof(int));
737 streamEnds[streamEndsLen++] = pos;
744 error(-1, "Couldn't find trailer dictionary");
748 void XRef::setEncryption(int permFlagsA, GBool ownerPasswordOkA,
749 Guchar *fileKeyA, int keyLengthA, int encVersionA) {
753 permFlags = permFlagsA;
754 ownerPasswordOk = ownerPasswordOkA;
755 if (keyLengthA <= 16) {
756 keyLength = keyLengthA;
760 for (i = 0; i < keyLength; ++i) {
761 fileKey[i] = fileKeyA[i];
763 encVersion = encVersionA;
766 GBool XRef::okToPrint(GBool ignoreOwnerPW) {
767 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint);
770 GBool XRef::okToChange(GBool ignoreOwnerPW) {
771 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange);
774 GBool XRef::okToCopy(GBool ignoreOwnerPW) {
775 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy);
778 GBool XRef::okToAddNotes(GBool ignoreOwnerPW) {
779 return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes);
782 Object *XRef::fetch(int num, int gen, Object *obj) {
785 Object obj1, obj2, obj3;
787 // check for bogus ref - this can happen in corrupted PDF files
788 if (num < 0 || num >= size) {
795 case xrefEntryUncompressed:
800 parser = new Parser(this,
802 str->makeSubStream(start + e->offset, gFalse, 0, &obj1)));
803 parser->getObj(&obj1);
804 parser->getObj(&obj2);
805 parser->getObj(&obj3);
806 if (!obj1.isInt() || obj1.getInt() != num ||
807 !obj2.isInt() || obj2.getInt() != gen ||
808 !obj3.isCmd("obj")) {
815 parser->getObj(obj, encrypted ? fileKey : (Guchar *)NULL, keyLength,
823 case xrefEntryCompressed:
827 if (!objStr || objStr->getObjStrNum() != (int)e->offset) {
831 objStr = new ObjectStream(this, e->offset);
833 objStr->getObject(e->gen, num, obj);
843 return obj->initNull();
846 Object *XRef::getDocInfo(Object *obj) {
847 return trailerDict.dictLookup("Info", obj);
850 // Added for the pdftex project.
851 Object *XRef::getDocInfoNF(Object *obj) {
852 return trailerDict.dictLookupNF("Info", obj);
855 GBool XRef::getStreamEnd(Guint streamStart, Guint *streamEnd) {
858 if (streamEndsLen == 0 ||
859 streamStart > streamEnds[streamEndsLen - 1]) {
864 b = streamEndsLen - 1;
865 // invariant: streamEnds[a] < streamStart <= streamEnds[b]
868 if (streamStart <= streamEnds[m]) {
874 *streamEnd = streamEnds[b];
878 Guint XRef::strToUnsigned(char *s) {
884 for (p = s, i = 0; *p && isdigit(*p) && i < 10; ++p, ++i) {
885 x = 10 * x + (*p - '0');