X-Git-Url: http://git.asbjorn.it/?a=blobdiff_plain;f=lib%2Fxpdf%2FParser.cc;fp=lib%2Fxpdf%2FParser.cc;h=af7c933114a47773f140e42697554c06e45eb4b5;hb=1279e7201aaf80eca58ad982ec11de68c85631af;hp=0000000000000000000000000000000000000000;hpb=e40d573cbf6659328121d0383d6d5ac8acd58c34;p=swftools.git diff --git a/lib/xpdf/Parser.cc b/lib/xpdf/Parser.cc new file mode 100644 index 0000000..af7c933 --- /dev/null +++ b/lib/xpdf/Parser.cc @@ -0,0 +1,214 @@ +//======================================================================== +// +// Parser.cc +// +// Copyright 1996-2003 Glyph & Cog, LLC +// +//======================================================================== + +#include + +#ifdef USE_GCC_PRAGMAS +#pragma implementation +#endif + +#include +#include "Object.h" +#include "Array.h" +#include "Dict.h" +#include "Parser.h" +#include "XRef.h" +#include "Error.h" +#include "Decrypt.h" + +Parser::Parser(XRef *xrefA, Lexer *lexerA) { + xref = xrefA; + lexer = lexerA; + inlineImg = 0; + lexer->getObj(&buf1); + lexer->getObj(&buf2); +} + +Parser::~Parser() { + buf1.free(); + buf2.free(); + delete lexer; +} + +Object *Parser::getObj(Object *obj, + Guchar *fileKey, int keyLength, + int objNum, int objGen) { + char *key; + Stream *str; + Object obj2; + int num; + Decrypt *decrypt; + GString *s; + char *p; + int i; + + // refill buffer after inline image data + if (inlineImg == 2) { + buf1.free(); + buf2.free(); + lexer->getObj(&buf1); + lexer->getObj(&buf2); + inlineImg = 0; + } + + // array + if (buf1.isCmd("[")) { + shift(); + obj->initArray(xref); + while (!buf1.isCmd("]") && !buf1.isEOF()) + obj->arrayAdd(getObj(&obj2, fileKey, keyLength, objNum, objGen)); + if (buf1.isEOF()) + error(getPos(), "End of file inside array"); + shift(); + + // dictionary or stream + } else if (buf1.isCmd("<<")) { + shift(); + obj->initDict(xref); + while (!buf1.isCmd(">>") && !buf1.isEOF()) { + if (!buf1.isName()) { + error(getPos(), "Dictionary key must be a name object"); + shift(); + } else { + key = copyString(buf1.getName()); + shift(); + if (buf1.isEOF() || buf1.isError()) { + gfree(key); + break; + } + obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen)); + } + } + if (buf1.isEOF()) + error(getPos(), "End of file inside dictionary"); + if (buf2.isCmd("stream")) { + if ((str = makeStream(obj))) { + obj->initStream(str); + if (fileKey) { + str->getBaseStream()->doDecryption(fileKey, keyLength, + objNum, objGen); + } + } else { + obj->free(); + obj->initError(); + } + } else { + shift(); + } + + // indirect reference or integer + } else if (buf1.isInt()) { + num = buf1.getInt(); + shift(); + if (buf1.isInt() && buf2.isCmd("R")) { + obj->initRef(num, buf1.getInt()); + shift(); + shift(); + } else { + obj->initInt(num); + } + + // string + } else if (buf1.isString() && fileKey) { + buf1.copy(obj); + s = obj->getString(); + decrypt = new Decrypt(fileKey, keyLength, objNum, objGen); + for (i = 0, p = obj->getString()->getCString(); + i < s->getLength(); + ++i, ++p) { + *p = decrypt->decryptByte(*p); + } + delete decrypt; + shift(); + + // simple object + } else { + buf1.copy(obj); + shift(); + } + + return obj; +} + +Stream *Parser::makeStream(Object *dict) { + Object obj; + BaseStream *baseStr; + Stream *str; + Guint pos, endPos, length; + + // get stream start position + lexer->skipToNextLine(); + pos = lexer->getPos(); + + // get length + dict->dictLookup("Length", &obj); + if (obj.isInt()) { + length = (Guint)obj.getInt(); + obj.free(); + } else { + error(getPos(), "Bad 'Length' attribute in stream"); + obj.free(); + return NULL; + } + + // check for length in damaged file + if (xref && xref->getStreamEnd(pos, &endPos)) { + length = endPos - pos; + } + + // in badly damaged PDF files, we can run off the end of the input + // stream immediately after the "stream" token + if (!lexer->getStream()) { + return NULL; + } + baseStr = lexer->getStream()->getBaseStream(); + + // skip over stream data + lexer->setPos(pos + length); + + // refill token buffers and check for 'endstream' + shift(); // kill '>>' + shift(); // kill 'stream' + if (buf1.isCmd("endstream")) { + shift(); + } else { + error(getPos(), "Missing 'endstream'"); + // kludge for broken PDF files: just add 5k to the length, and + // hope its enough + length += 5000; + } + + // make base stream + str = baseStr->makeSubStream(pos, gTrue, length, dict); + + // get filters + str = str->addFilters(dict); + + return str; +} + +void Parser::shift() { + if (inlineImg > 0) { + if (inlineImg < 2) { + ++inlineImg; + } else { + // in a damaged content stream, if 'ID' shows up in the middle + // of a dictionary, we need to reset + inlineImg = 0; + } + } else if (buf2.isCmd("ID")) { + lexer->skipChar(); // skip char after 'ID' command + inlineImg = 1; + } + buf1.free(); + buf1 = buf2; + if (inlineImg > 0) // don't buffer inline image data + buf2.initNull(); + else + lexer->getObj(&buf2); +}