raise EreaderError('Unknown book compression %i.' % self.header_record.compression)
self.header_record.compression not in (2, 10)
get_metadata = get_metadata
import calibre.ebooks.metadata.pdb
self.mi = get_metadata(stream, False)
def section_data(self, number):
return self.sections[number]
def decompress_text(self, number):
if self.header_record.compression == 2:
decompress_doc = decompress_doc
import calibre.ebooks.compression.palmdoc
return None(decompress_doc(self.section_data(number)).decode if self.encoding is None else self.encoding, 'replace')
if self.header_record.compression == 10:
return self.header_record.compression == 2(zlib.decompress(self.section_data(number)).decode if self.encoding is None else self.encoding, 'replace')
def get_image(self, number):
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
return ('empty', '')
data = self.section_data(number)
name = data[4:36].strip('\x00')
img = data[62:]
return (name, img)
def get_text_page(self, number):
if number not in range(1, self.header_record.num_text_pages + 1):
return ''
return self.decompress_text(number)
def extract_content(self, output_dir):
footnote_to_html = footnote_to_html
sidebar_to_html = sidebar_to_html
import calibre.ebooks.pml.pmlconverter
PML_HTMLizer = PML_HTMLizer
import calibre.ebooks.pml.pmlconverter
output_dir = os.path.abspath(output_dir)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
title = self.mi.title
if not isinstance(title, unicode):
title = title.decode('utf-8', 'replace')
html = u'<html><head><title>%s</title></head><body>' % title
pml = u''
for i in range(1, self.header_record.num_text_pages + 1):
self.log.debug('Extracting text page %i' % i)
pml += self.get_text_page(i)
hizer = PML_HTMLizer()
html += hizer.parse_pml(pml, 'index.html')
toc = hizer.get_toc()
if self.header_record.footnote_count > 0:
html += '<br /><h1>%s</h1>' % _('Footnotes')
footnoteids = None(re.findall, '\\w+(?=\x00)'(self.section_data(self.header_record.footnote_offset).decode if self.encoding is None else self.encoding))
for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_count)):
self.log.debug('Extracting footnote page %i' % i)
html += footnote_to_html(footnoteids[fid], self.decompress_text(i))
if self.header_record.sidebar_count > 0:
html += '<br /><h1>%s</h1>' % _('Sidebar')
sidebarids = None(re.findall, '\\w+(?=\x00)'(self.section_data(self.header_record.sidebar_offset).decode if self.encoding is None else self.encoding))
for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_count)):
self.log.debug('Extracting sidebar page %i' % i)
html += sidebar_to_html(sidebarids[sid], self.decompress_text(i))