Logo Search packages:      
Sourcecode: zope-backtalk version File versions

PDFClass.py

##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
# 
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
# 
##############################################################################
"""
Revision information:
$Id: PDFClass.py,v 1.6 2002/07/08 20:44:06 chrism Exp $
"""
from reportlab.lib.units import inch
from reportlab.lib.pagesizes import LETTER
from reportlab.platypus.doctemplate import SimpleDocTemplate, _doNothing
from reportlab.platypus import Preformatted,Paragraph,Image,Spacer,PageBreak
from reportlab.platypus.xpreformatted import PythonPreformatted
from reportlab.platypus.tableofcontents import TableOfContents
from cgi import escape
import cStringIO, types, string, re, sys
from PDFStylesheet import STYLES
from Retrievers import ZODBImageRetriever
import reportlabmonkeypatch
# above used to patch reportlabs under python 1.5.2, do not remove.

DEBUG = 0

def test_bullet():
   raw = open('/home/chrism/testbullet.stx').read()
   from StructuredText.ST import StructuredText
   from StructuredText.DocumentWithImages import DocumentWithImages
   bullets = StructuredText(raw)
   bulletdoc = DocumentWithImages()(bullets)
   test(bulletdoc, None)

def test_numbered():
   raw = open('/home/chrism/testnumbered.stx').read()
   from StructuredText.ST import StructuredText
   from StructuredText.DocumentWithImages import DocumentWithImages
   numbers = StructuredText(raw)
   numberdoc = DocumentWithImages()(numbers)
   test(numberdoc, None)

def test_zopebook(context):
   raw = []
   raw.append(open('/home/chrism/ZopeBook/IntroducingZope.stx').read())
   raw.append(open('/home/chrism/ZopeBook/UsingZope.stx').read())
   raw.append(open('/home/chrism/ZopeBook/BasicObject.stx').read())
   raw.append(open('/home/chrism/ZopeBook/DTML.stx').read())
   raw.append(open('/home/chrism/ZopeBook/ZPT.stx').read())
   raw.append(open('/home/chrism/ZopeBook/SimpleExamples.stx').read())
   raw.append(open('/home/chrism/ZopeBook/Security.stx').read())
   raw.append(open('/home/chrism/ZopeBook/AdvDTML.stx').read())
   raw.append(open('/home/chrism/ZopeBook/ScriptingZope.stx').read())
   raw.append(open('/home/chrism/ZopeBook/SearchingZCatalog.stx').read())
   raw.append(open('/home/chrism/ZopeBook/RelationalDatabases.stx').read())
   raw.append(open('/home/chrism/ZopeBook/ZEO.stx').read())
   raw.append(open('/home/chrism/ZopeBook/CustomZopeObjects.stx').read())
   raw.append(open('/home/chrism/ZopeBook/AppendixA.stx').read())
   raw.append(open('/home/chrism/ZopeBook/AppendixB.stx').read())
   raw.append(open('/home/chrism/ZopeBook/AppendixC.stx').read())
   raw.append(open('/home/chrism/ZopeBook/AppendixD.stx').read())
   from StructuredText.ST import StructuredText
   from StructuredText.DocumentWithImages import DocumentWithImages
   DocumentWithImages = DocumentWithImages()
   st = map(StructuredText, raw)
   docs = map(DocumentWithImages, st)
   test(docs,context,filename='/home/chrism/zopebook.pdf',title='The Zope Book')

def test(docs, context, filename='/home/chrism/temp.pdf', title=''):
   f = cStringIO.StringIO()
   finder = ZODBImageRetriever(context)
   pdf = PDFClass(file=f, imageretriever=finder, title=title)
   pdf(docs)
   out = open(filename, 'w')
   f.seek(0)
   out.write(f.read())
   f.close()
   out.close()

def pageNumber(canvas, doc):
   canvas.saveState()
   canvas.setFont('Helvetica',8)
   width, height = doc.pagesize
   canvas.drawString(width - (1.5*inch), 0.75 * inch, "%d" % doc.page)
   canvas.restoreState()

def pageNumberAndTitle(canvas, doc):
   width, height = doc.pagesize
   title = doc.title
   canvas.saveState()
   canvas.setFont('Helvetica-Oblique', 10)
   center = width/2
   canvas.drawCentredString(center, height - (.75 * inch), title)
   canvas.setFont('Helvetica', 8)
   canvas.drawString(width - (1.5*inch), 0.75 * inch, "%d" % doc.page)
   canvas.restoreState()

class Outputter:
   def __init__(self):
      self.stack = []

   def push(self, t):
      self.stack.append(t)

   def pop(self):
      self.stack.pop()

   def __call__(self, t):
      self.stack[-1].add_text(t)

   def get_output(self):
      paragraphs = map(lambda para: para.finish(), self.stack)
      out = []
      for paragraph in paragraphs:
         out.append(paragraph)
         out.append(Spacer(1, .10*inch))
      return out[:-1]

class ParagraphWrapper:
   def __init__(self, style):
      self.style = style
      self.text  = []

   def __repr__(self):
      return '<Paragraph wrapper: %s >' % string.join(self.text, '')
   
   def add_text(self, t):
      self.text.append(t)

   def finish(self):
      return Paragraph(string.join(self.text, '\n'), self.style)

class PreformattedWrapper(ParagraphWrapper):
   def finish(self):
      return Preformatted(string.join(self.text, '\n'), self.style)

class PythonPreformattedWrapper(ParagraphWrapper):
   def finish(self):
      return PythonPreformatted(string.join(self.text,'\n'), self.style)

class ImageWrapper:
   def __init__(self, f, width=400, height=240):
      while (width > 400) or (height > 240): # max height and width
         width = width * .9
         height = height * .9
      f.seek(0)
      self.f  = f
      self.width = width
      self.height = height

   def __repr__(self):
      return '<Image wrapper: %s >' % f
   
   def finish(self):
      image = Image(self.f, self.width, self.height, kind='proportional')
      return image

class PageBreakWrapper:
   def finish(self):
      return PageBreak()

class TableOfContentsWrapper:
   def finish(self):
      return TableOfContents()

class SpacerWrapper:
   def __init__(self, w, h):
      self.w = w
      self.h = h
      
   def finish(self):
      return Spacer(self.w * inch, self.h * inch)

class PDFClass:
   element_types={
      '#text': '_text',
      'StructuredTextDocument': 'document',
      'StructuredTextParagraph': 'paragraph',
      'StructuredTextExample': 'example',
      'StructuredTextBullet': 'bullet',
      'StructuredTextNumbered': 'numbered',
      'StructuredTextDescription': 'description',
      'StructuredTextDescriptionTitle': 'descriptionTitle',
      'StructuredTextDescriptionBody': 'descriptionBody',
      'StructuredTextSection': 'section',
      'StructuredTextSectionTitle': 'sectionTitle',
      'StructuredTextLiteral': 'literal',
      'StructuredTextEmphasis': 'emphasis',
      'StructuredTextStrong': 'strong',
      'StructuredTextLink': 'link',
      'StructuredTextXref': 'xref',
      'StructuredTextSGML': 'sgml',
      'StructuredTextImage': 'image',
      'StructuredTextInnerLink': 'innerlink',
      'StructuredTextUnderline': 'emphasis', # no real underline in PDF
      'CommentableExample': 'example',
      'Comment':'comment',
      }

   title_page_paragraphs = []
   toc_paragraphs = []
   
   def __init__(self,
                file=None, # filename or file object
                title='',
                pagesize=LETTER,
                leftMargin=inch*.5,
                rightMargin=inch*.5,
                topMargin=inch,
                bottomMargin=inch,
                onFirstPage=_doNothing,
                onLaterPages=pageNumberAndTitle,
                imageretriever=None,
                styles=STYLES,
                do_bookmarks=1,
                do_toc=1):

      self.file = file
      self.title = title
      self.pagesize = pagesize
      self.leftMargin = leftMargin
      self.rightMargin = rightMargin
      self.topMargin = topMargin
      self.bottomMargin = bottomMargin
      self.onFirstPage = onFirstPage
      self.onLaterPages = onLaterPages
      self.imageretriever = imageretriever
      self.styles = styles
      self.do_bookmarks = do_bookmarks
      self.do_toc = do_toc

   def dispatch(self, doc, level, output):
      getattr(self, self.element_types[doc.getNodeName()])(doc, level, output)

   def makeTitlePage(self, titlepage_header='', titlepage_author='',
                     titlepage_center='', titlepage_bottom=''):
      header = ParagraphWrapper(self.styles["Title"])
      header.add_text(titlepage_header)
      center = ParagraphWrapper(self.styles["BoldCenteredBodyText"])
      center.add_text(titlepage_center)
      author = ParagraphWrapper(self.styles["Author"])
      author.add_text(titlepage_author)
      bottom = ParagraphWrapper(self.styles["CenteredBodyText"])
      bottom.add_text(titlepage_bottom)
      self.title_page_paragraphs = [header,
                                    center,
                                    author,
                                    bottom,
                                    PageBreakWrapper(),
                                    ]

   def makeTOC(self):
      self.toc_paragraphs = [TableOfContentsWrapper(), PageBreakWrapper()]

   def __call__(self, docs, level=1):
##       f = '/usr/share/pixmaps/redhat/rhad.png'
##       story = [Image(f, 400, 200)]
##       doc = SimpleDocTemplate('/home/chrism/projects/BackTalk/redhat.pdf')
##       doc.build(story)
      if type(self.file) is types.StringType:
         f = open(self.file, 'wb')
      else:
         f = self.file
      if not (type(docs) == types.ListType or type(docs) == types.TupleType):
         docs = [docs]
      self.pdf = BookmarkDocTemplate(f,
                                     pagesize=self.pagesize,
                                     leftMargin=self.leftMargin,
                                     rightMargin=self.rightMargin,
                                     topMargin=self.topMargin,
                                     bottomMargin=self.bottomMargin,
                                     title=self.title)
      self.pdf.setBTStyles(self.styles)
      self.pdf.setBTDoBookmarks(self.do_bookmarks)
      self.pdf.setBTDoToc(self.do_toc)
      outputter = Outputter()
      for item in self.title_page_paragraphs:
         outputter.push(item)
      for item in self.toc_paragraphs:
         outputter.push(item)
      for chapter in docs:
         self.dispatch(chapter, level-1, outputter)
         outputter.push(PageBreakWrapper())
      outputter.pop() # get rid of last page break
      paragraphs = outputter.get_output()
      if self.do_toc:
         m = self.pdf.multiBuild
      else:
         m = self.pdf.build
      m(paragraphs,onFirstPage=self.onFirstPage,onLaterPages=self.onLaterPages)

   def _text(self, doc, level, output):
      DEBUG and dout("_text")
      if doc.getNodeName() == 'StructuredTextLiteral':
         output(escape(doc.getNodeValue()))
      else:
         output(escape(string.strip(doc.getNodeValue())))
      
   def document(self, doc, level, output):
      DEBUG and dout("document")
      children=doc.getChildNodes()
      for c in children:
         getattr(self, self.element_types[c.getNodeName()])(c, level, output)

   def section(self, doc, level, output):
      DEBUG and dout("section")
      children=doc.getChildNodes()
      for c in children:
         getattr(self,self.element_types[c.getNodeName()])(c, level+1, output)
      
   def sectionTitle(self, doc, level, output):
      DEBUG and dout("sectionTitle")
      text = doc.getNodeValue()
      if level <=0:
         style = self.styles["H1"]
         bookmark_level = 0
      elif level == 1:
         style = self.styles["H2"]
         bookmark_level = 1
      elif level == 2:
         style = self.styles["H3"]
         bookmark_level = 2
      elif level >= 3:
         style = self.styles["H4"]
         bookmark_level = 3
      output.push(ParagraphWrapper(style))
      for c in doc.getChildNodes():
         getattr(self,self.element_types[c.getNodeName()])(c, level, output)

   def example(self, doc, level, output):
      DEBUG and dout("example")
      output.push(PythonPreformattedWrapper(self.styles["Code"]))
      for c in doc.getChildNodes():
         output(prestrip(c.getNodeValue()))

   def paragraph(self, doc, level, output):
      DEBUG and dout("paragraph")
      output.push(ParagraphWrapper(self.styles["BodyText"]))
      for c in doc.getChildNodes():
         getattr(self, self.element_types[c.getNodeName()])(c, level, output)

   def description(self, doc, level, output):
      DEBUG and dout("description")
      output.push(ParagraphWrapper(self.styles["Definition"]))
      for c in doc.getChildNodes():
         getattr(self, self.element_types[c.getNodeName()])(c, level, output)

   def descriptionTitle(self, doc, level, output):
      DEBUG and dout("descriptionTitle")
      emdash = chr(151)
      output('<b><i>')
      for c in doc.getChildNodes():
         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
      output('</i></b> %s ' % emdash)
      
   def descriptionBody(self, doc, level, output):
      DEBUG and dout("descriptionBody")
      for c in doc.getChildNodes():
         getattr(self, self.element_types[c.getNodeName()])(c, level, output)

   def emphasis(self, doc, level, output):
      DEBUG and dout("emphasis")
      output('<i>')
      for c in doc.getChildNodes():
         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
      output('</i> ')

   def literal(self, doc, level, output):
      DEBUG and dout("literal")
      output('<font face="courier">')
      for c in doc.getChildNodes():
         output(escape(c.getNodeValue()))
      output('</font>')

   def strong(self, doc, level, output):
      DEBUG and dout("strong")
      output('<b>')
      for c in doc.getChildNodes():
         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
      output('</b>')

   def bullet(self, doc, level, output):
      DEBUG and dout("bullet")
      if level <=0:
         style = self.styles["Bullet1"]
      if level == 1:
         style = self.styles["Bullet2"]
      if level == 2:
         style = self.styles["Bullet3"]
      if level >= 3:
         style = self.styles["Bullet4"]
      output.push(ParagraphWrapper(style))
      output('<bullet>')
      output('\267')
      output('</bullet>')
      for c in doc.getChildNodes():
         getattr(self,self.element_types[c.getNodeName()])(c, level+1, output)

   def sgml(self,doc,level,output):
      DEBUG and dout("sgml")
      for c in doc.getChildNodes():
         getattr(self, self.element_types[c.getNodeName()])(c, level, output)

   def link(self, doc, level, output):
      DEBUG and dout("link")
      output('<a href="%s">' % doc.href)
      for c in doc.getChildNodes():
         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
      output('</a>')

   def numbered(self, doc, level, output):
      DEBUG and dout("numbered")
      output.push(ParagraphWrapper(self.styles["Definition"]))
      p=doc.getPreviousSibling()
      output('<seq id="number">. ')
      for c in doc.getChildNodes():
         getattr(self, self.element_types[c.getNodeName()])(c, level, output)
      n=doc.getNextSibling()
      if n is None or n.getNodeName() is not doc.getNodeName():
         output('<seqreset id="number">')

   def image(self, doc, level, output):
      DEBUG and dout("image")
      path = doc.href
      data, width, height = self.imageretriever(path)
      f = cStringIO.StringIO()
      f.write(data)
      f.seek(0)
      output.push(ImageWrapper(f, width=width, height=height))
      if path and hasattr(doc, 'key'):
         output.push(ParagraphWrapper(self.styles["Caption"]))
         output('<b>Figure %s</b> %s\n' % (doc.key, doc.getNodeValue()))

   def xref(self, doc, level, output):
      DEBUG and dout("xref")
      val = doc.getNodeValue()
      output('<a href="#%s">Figure %s</a>' % (val, val) )

   def innerlink(self, doc, level, output):
      DEBUG and dout("innerlink")
      val = doc.getNodeValue()
      output('innerlink %s' % val)

   def comment(self, doc, level, output):
      DEBUG and dout("description")
      output.push(ParagraphWrapper(self.styles["Comment"]))
      for c in doc.getChildNodes():
         output(markupComment(c.getNodeValue()))

class PDFWithoutCommentsClass(PDFClass):
   def comment(self, doc, level, output):
      pass

################################################################
# Regexes and constants
################################################################

letters = r'\w'
prefix = r'(http|https|ftp|mailto|file|about|wais|gopher)'
curses = r'\/\#\~\:\.\?\+\=\&\%\@\!\\\-\,'
punc = r'\.\?\-\,;'
any = letters + curses + punc
url = re.compile(r'\b(%s:[%s]+?)(?=[%s]*[^%s]|$)' % (prefix, any, punc, any))

################################################################
# Utility functions
################################################################

def markupComment(comment):
    # we may break the comment into paragraphs using the "vertical tab"
    # notation that we created in insertComment... we turn these
    # into carriage returns.
    commentlines = []
    indentation = 0
    paragraphs = string.split(comment, chr(11))
    if len(paragraphs) > 1:
        # we have a multiparagraph comment
        for paragraph in paragraphs:
            paragraph = escape(paragraph)
            commentlines.extend(string.split(paragraph, '\n'))
    else:
        # we don't have a multiparagraph comment
        comment = escape(comment)
        commentlines.extend(string.split(comment, '\n'))
    for line in commentlines:
        indentation = indentation_plus(line)
    comment = string.join(commentlines, '\n')
    comment = url.sub(r'<a href="\1">\1</a>', comment)
    return comment

def indentation_plus(t, plus=0, spaces_expr=re.compile(r'^(\s*)').match):
    m = spaces_expr(t)
    if m:
        start, end = m.span()
        plus = end-start + plus
    return plus

def prestrip(v):
   v=string.replace(v, '\r\n', '\n')
   v=string.replace(v, '\r', '\n')
   v=string.replace(v, '\t', '        ')
   lines=string.split(v, '\n')
   indent=len(lines[0])
   for line in lines:
      if not len(line): continue
      i=len(line)-len(string.lstrip(line))
      if i < indent:
         indent=i
   nlines=[]
   for line in lines:
      nlines.append(line[indent:])
   return string.join(nlines, '\n')
      
class BookmarkDocTemplate(SimpleDocTemplate):

   BT_do_bookmarks = None
   BT_do_toc = None
   
   def multiBuild(self, story,
                  filename=None,
                  onFirstPage=_doNothing,
                  onLaterPages=_doNothing,
                  maxPasses = 10):
      """Makes multiple passes until all indexing flowables
      are happy."""
      self._indexingFlowables = []
      for thing in story:
         if thing.isIndexing():
            self._indexingFlowables.append(thing)
      
      passes = 0
      while 1:
         passes = passes + 1
      
         for fl in self._indexingFlowables:
            fl.beforeBuild()
      
         # work with a copy of the story, since it is consumed
         tempStory = story[:]
         self.build(tempStory, onFirstPage, onLaterPages)
      
         #clean up so multi-build does not go wrong - the frame
         #packer might have tacked an attribute onto some
         #paragraphs
         for elem in story:
            if hasattr(elem, '_postponed'):
               del elem._postponed
      
         for fl in self._indexingFlowables:
            fl.afterBuild()
      
         if self._allSatisfied():
            break

         if passes > maxPasses:
            raise IndexError, ("Index entries not resolved after %d passes"
                               % maxPasses)
      
   def setBTStyles(self, styles):
      self.BT_interesting = {
         styles["H1"]: 0,
         styles["H2"]: 1,
         styles["H3"]: 2,
         styles["H4"]: 3
         }
      self.BT_reverse_interesting = {}
      for k,v in self.BT_interesting.items():
            self.BT_reverse_interesting[v] = k

   def setBTDoBookmarks(self, do_bookmarks):
      self.BT_do_bookmarks = do_bookmarks

   def setBTDoToc(self, do_toc):
      self.BT_do_toc = do_toc

   def afterFlowable(self, flowable):
      if not self.BT_do_toc and not self.BT_do_bookmarks:
         return # short circuit
      if not hasattr(flowable, 'getPlainText'):
         return
      if not hasattr(flowable, 'style'):
         return
      style = flowable.style
      if style in self.BT_interesting.keys():
         if not hasattr(self, 'firstlevel'):
            self.firstlevel = self.BT_interesting[style]
         real_level = self.BT_interesting[style] - self.firstlevel
         if not self.BT_reverse_interesting.get(real_level):
            real_level = 0
         text = flowable.getPlainText()
         key = str(abs(hash(text)))
         page_num = self.page
         if self.BT_do_toc:
            self.notify('TOCEntry', (real_level, text, page_num))
         DEBUG and dout("%s %s" % (page_num, key))
         if self.BT_do_bookmarks:
            self.canv.bookmarkPage(key)
            try:
               self.canv.addOutlineEntry(text, key, real_level)
            except ValueError: # illegal level jump, e.g. lv. 0 to lv. 2
               DEBUG and dout("Could not add outline entry for key %s, lv %s"
                              % (key, real_level))


   def beforeDocument(self):
      # show the outline when the document is loaded into a reader
      if self.BT_do_bookmarks:
         self.canv.showOutline()

def dout(text):
   print text
   

Generated by  Doxygen 1.6.0   Back to index