[Bf-extensions-cvs] [2f47d3d] master: Fix T49583: Update pdf reader to handle PDF 1.5, which has object streams
Howard Trickey
noreply at git.blender.org
Mon Nov 28 16:04:44 CET 2016
Commit: 2f47d3d7cfea05672f7bb706c6878b7d07d52a1b
Author: Howard Trickey
Date: Mon Nov 28 10:03:01 2016 -0500
Branches: master
https://developer.blender.org/rBAC2f47d3d7cfea05672f7bb706c6878b7d07d52a1b
Fix T49583: Update pdf reader to handle PDF 1.5, which has object streams
===================================================================
M io_vector/__init__.py
M io_vector/pdf.py
===================================================================
diff --git a/io_vector/__init__.py b/io_vector/__init__.py
index c15a684..9c7015d 100644
--- a/io_vector/__init__.py
+++ b/io_vector/__init__.py
@@ -21,8 +21,8 @@
bl_info = {
"name": "Adobe Illustrator / PDF / SVG",
"author": "Howard Trickey",
- "version": (1, 0),
- "blender": (2, 73, 0),
+ "version": (1, 1),
+ "blender": (2, 78, 0),
"location": "File > Import-Export > Vector files (.ai, .pdf, .svg)",
"description": "Import Adobe Illustrator, PDF, and SVG",
"warning": "",
diff --git a/io_vector/pdf.py b/io_vector/pdf.py
index e77649e..57f15b6 100644
--- a/io_vector/pdf.py
+++ b/io_vector/pdf.py
@@ -287,7 +287,7 @@ def GetPDFLiteralString(s, i):
elif c == ord('t'):
v += '\t'
elif ord('0') <= c <= ord('7'):
- x = ord(c) - ord('0')
+ x = c - ord('0')
j += 1
if j < len(s):
c = ordat(s, j)
@@ -408,6 +408,23 @@ def GetPDFDict(s, i):
print('unterminated dict starting at', i)
return ((ODICT, v), j)
+# Crossref dict:
+# Cross references are a way of turning an (object #, generation #) into
+# an actual object in the file, when and indirect reference of the form
+# object# generation# R
+# is found in another object.
+# Cross references are of two types:
+# 1) uncompressed: you find the object at a specified byte offset in the file
+# 2) compressed: you find the object in an object stream which is in turn found
+# by looking for a specified object# with implicit generation 0.
+# We will build a map from (object#, generation#) to a tuple
+# (kind, field2, field3)
+# where if kind==XUNCOMPRESSED then field2 is the file byte offset of the object and field2
+# is its generation #
+# and if kind==XCOMPRESSED then field2 is the object # of the object stream containing it,
+# and field3 is the index of that object within the stream
+XUNCOMPRESSED = 1
+XCOMPRESSED = 2
def GetPDFTrailerAndCrossrefs(s):
"""Find and return the (last) PDF trailer dictionary and cross reference
@@ -435,7 +452,69 @@ def GetPDFTrailerAndCrossrefs(s):
print('cannot find crossref index')
return (None, None)
crossrefs = {}
+ d = None
last_trailerdict = None
+ print("looking for crossref at", crossrefi)
+ if s[crossrefi:crossrefi+4] != b'xref':
+ # Could be Crossref stream
+ (obj, j) = GetPDFObject(s, crossrefi)
+ if PDFObjHasType(obj, OINDIRECTDEF):
+ strobj = obj[1][2]
+ if PDFObjHasType(strobj, OSTREAM):
+ strxrefs = GetPDFStreamContents(strobj, s, {}, False)
+ if strxrefs is None:
+ if WARN:
+ print('cannot decode crossref stream')
+ return (None, {})
+ d = strobj[1][0]
+ w = GetTypedValFromDictEntry(d, 'W', OARRAY, s, {})
+ ty = GetTypedValFromDictEntry(d, 'Type', ONAME, s, {})
+ sz = GetTypedValFromDictEntry(d, 'Size', ONUM, s, {})
+ index = GetTypedValFromDictEntry(d, 'Index', OARRAY, s, {})
+ prev = GetTypedValFromDictEntry(d, 'Prev', ONUM, s, {})
+ if ty != 'XRef' or sz is None or w is None:
+ if WARN:
+ print('something wrong with XRef stream dictionary')
+ return (None, {})
+ n1 = w[0][1]
+ n2 = w[1][1]
+ n3 = w[2][1]
+ ntot = n1 + n2 + n3
+ firstobjnum = 0
+ numobjs = sz
+ if index is not None:
+ firstobjnum = index[0][1]
+ numobjs = index[1][1]
+ k = 0
+ objnum = firstobjnum
+ while k + ntot <= len(strxrefs):
+ if n1 == 0:
+ f1 = 1
+ else:
+ (f1, k) = GetPDFMultiByteInt(strxrefs, k, n1)
+ (f2, k) = GetPDFMultiByteInt(strxrefs, k, n2)
+ if n3 == 0:
+ f3 = 0
+ else:
+ (f3, k) = GetPDFMultiByteInt(strxrefs, k, n3)
+ if f1 == 1:
+ crossrefs[(objnum, f3)] = (XUNCOMPRESSED, f2, f3)
+ elif f1 == 2:
+ crossrefs[(objnum, 0)] = (XCOMPRESSED, f2, f3)
+ elif f1 != 0:
+ if WARN:
+ print('unexpected type in XRef:', f1)
+ return (None, {})
+ objnum += 1
+ else:
+ if WARN:
+ print("no xref and object there is not stream")
+ print (obj)
+ else:
+ if WARN:
+ print("no xref and not indirect def")
+ print(obj)
+ return (d, crossrefs)
while crossrefi > 0:
i = crossrefi
if s[i:i + 4] != b'xref':
@@ -459,7 +538,7 @@ def GetPDFTrailerAndCrossrefs(s):
gen = int(s[i + 11:i + 16])
inuse = (ordat(s, i + 17) == ord('n'))
if inuse:
- crossrefs[(k, gen)] = byteoffset
+ crossrefs[(k, gen)] = (XUNCOMPRESSED, byteoffset, gen)
i += 20
# Should be at 'trailer' now
(w, i) = GetPDFKeyword(s, i)
@@ -481,6 +560,21 @@ def GetPDFTrailerAndCrossrefs(s):
crossrefi = -1
return (last_trailerdict, crossrefs)
+def GetPDFMultiByteInt(s, i, fieldlen):
+ """Get a multibyte int from a string of bytes
+
+ Args:
+ s: string of bytes
+ i: int, offset in s to start getting the result
+ fieldlen: int, how many bytes to get
+ Returns:
+ int: accumulated multibyte value (high order byte first in s)
+ """
+
+ ans = 0
+ for k in range(i, i + fieldlen):
+ ans = ans * 256 + ord(s[k])
+ return (ans, i + fieldlen)
def ReadPDFPageOneContents(filename):
"""Read a PDF file and return Content string for its first page.
@@ -604,7 +698,7 @@ def GetPDFObjFromIndirectRef(obj, s, crossrefs):
Args:
obj: (int, value) - should be (OINDIRECTREF, (obj_number, gen_number))
s: string - contents of PDF file
- crossrefs: dict - maps (obj_number, gen_number) to byte offset in s
+ crossrefs: dict - maps (obj_number, gen_number) to crossref triple
Returns:
(objectid, value) - the referred value (inside containing OINDIRECTDEF)
or None if there is any problem
@@ -615,16 +709,85 @@ def GetPDFObjFromIndirectRef(obj, s, crossrefs):
key = obj[1]
if key not in crossrefs:
return None
- i = crossrefs[key]
- if i < 0 or i >= len(s):
+ (f1, f2, f3) = crossrefs[key]
+ if f1 == XUNCOMPRESSED:
+ if f2 < 0 or f2 >= len(s):
+ return None
+ (o, _) = GetPDFObject(s, f2)
+ elif f1 == XCOMPRESSED:
+ o = GetPDFCompressedObject(s, f2, f3, crossrefs)
+ return o
+ else:
+ if WARN:
+ print("Bad xref type")
return None
- (o, _) = GetPDFObject(s, i)
if PDFObjHasType(o, OINDIRECTDEF):
return o[1][2]
else:
return None
+def GetPDFCompressedObject(s, strnum, oindex, crossrefs):
+ """Get one complete object from compressed stream.
+
+ Args:
+ s : bytes holding contents of a PDF file
+ strnum: object number of object stream where object is
+ oindex: index of object within the stream
+ crossrefs: dict - maps (obj_number, gen_number) to crossref triple
+ Returns:
+ (objectid, value) - or None, if no such object
+ """
+
+ strkey = (strnum, 0)
+ if strkey not in crossrefs:
+ if WARN:
+ print("could not find object", strnum, "in crossrefs")
+ return None
+ (g1, g2, g3) = crossrefs[strkey]
+ if g1 != XUNCOMPRESSED:
+ if WARN:
+ print("stream object is not uncompressed", g1, g2, g3)
+ return None
+ if g2 < 0 or g2 >= len(s):
+ return None
+ (ostream, _) = GetPDFObject(s, g2)
+ if PDFObjHasType(ostream, OINDIRECTDEF):
+ ostream = ostream[1][2]
+ if not PDFObjHasType(ostream, OSTREAM):
+ if WARN:
+ print("stream object does not have type stream")
+ return None
+ streamcont = GetPDFStreamContents(ostream, s, crossrefs, False)
+ d = ostream[1][0]
+ ty = GetTypedValFromDictEntry(d, "Type", ONAME, s, crossrefs)
+ if ty != "ObjStm":
+ if WARN:
+ print("stream object does not have Type ObjStm")
+ return None
+ n = GetTypedValFromDictEntry(d, "N", ONUM, s, crossrefs)
+ first = GetTypedValFromDictEntry(d, "First", ONUM, s, crossrefs)
+ if not n or not first:
+ if WARN:
+ print("required n or first not in object stream")
+ return None
+ i = 0
+ ans = None
+ for count in range(n):
+ (intpair, i) = GetPDFTwoInts(streamcont, i)
+ if not intpair:
+ if WARN:
+ print("stream object did not find int pair at count", count)
+ return None
+ (id, off) = intpair
+ obj = GetPDFObject(streamcont, first + off)
+ if count == oindex:
+ if obj:
+ ans = obj[0]
+ break
+ return ans
+
+
def GetPDFObjFromDictEntry(d, entryname, s, crossrefs):
"""Return the PDF object that should be at given entry in d.
@@ -668,7 +831,7 @@ def PDFDictType(d):
return ''
-def GetPDFStreamContents(contentsobj, s, crossrefs):
+def GetPDFStreamContents(contentsobj, s, crossrefs, dodecode=True):
"""Return the contents of a stream object, applying any needed filters.
For now, only handle FlateDecode filter, and with no DecodeParms.
@@ -677,8 +840,9 @@ def GetPDFStreamContents(contentsobj, s, crossrefs):
contentsobj: (OSTREAM, (dict, istart, iend))
s: bytes - PDF file contents
crossrefs: dict - maps (obj_number, gen_number) to byte offset in s
+ dodecode: bool - should we decode too?
Returns:
- stri
@@ Diff output truncated at 10240 characters. @@
More information about the Bf-extensions-cvs
mailing list