"""
  PDA.Palm.App.Doc - E-Text Document
  $Id: Doc.py,v 1.9 1998/09/09 16:55:24 rob Exp $

  Copyright 1998 Rob Tillotson <rob@io.com>

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU Library General Public License, version 2,
  as published by the Free Software Foundation.

  This program is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  General Public License for more details.

  You should have received a copy of the GNU Library General Public License
  along with this program; if not, write the Free Software Foundation,
  Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.


  This module handles databases in the standard Palm e-text
  format used by applications such as AportisDoc, JDoc, TealDoc,
  etc.

  DOC databases are an excellent example of a reasonably complex
  file format for the Palm.  DOC databases contain three types
  of records: a header, any number of text records (which may be
  compressed by a simple algorithm that gives 40-50% compression
  on many files), and some bookmark records.  The classes in this
  module show how such a database could be handled; the appropriate
  class for a record is selected according to its position in the
  database and the information in the header.

  A small C module (_Doc) goes with this one; it contains the
  the compression and decompression routines from "makedoc", since
  it would be significantly slower to use Python for this.

  Also included are a pair of classes, DOCReader and DOCWriter, which
  implement streamed access to a DOC database.  These classes allow
  you to treat DOCs as ordinary text files, to a certain extent.
  While the reader class is useful, the writer class is even more so,
  since it allows (beware! large memory consumption) you to stream
  data into the file, set bookmarks, etc. without having to know in
  advance how big the text is.  (The util subdirectory of the package
  contains a couple of examples of what this is good for.)
"""

__version__ = '$Id: Doc.py,v 1.9 1998/09/09 16:55:24 rob Exp $'
  
import PDA.Palm

import string, struct

# C module
from PDA.Palm import _Doc, FLD_STRING, FLD_INT

COUNT_BITS = 3

# DOC compression is fairly simple, and is optimized for compressing ASCII
# text... for a simple algorithm, it does reasonably well, achieving almost
# 50% compression on most files.  The two main compression techniques are:
#  - collapsing of spaces into an adjacent ASCII character, by setting the
#    high bit... this accounts for all inter-word spaces, and probably
#    10% or more compression.
#  - simple run encoding within a 4096 byte text record; it can encode a repeat
#    sequence of 3-10 bytes (3 count bits) anywhere within 2048 bytes before
#    the current position

# This function has been replaced by C but is left here as a reminder of
# inefficient Python coding :)
def _uncompress(s):
    s = map(ord, s)
    x = 0
    o = []
    try:
	while 1:
	    c = s[x]
	    x = x + 1
	    if c > 0 and c < 9:  # just copy that many bytes
		for y in range(0, c):
		    o.append(s[x])
		    x = x + 1
	    elif c < 128: # a regular ascii character
		o.append(c)
	    elif c > 0xc0: # a regular ascii character with a space before it
		o.append(32)
		o.append(c & 0x7f)
	    else: # a compressed sequence
		c = c << 8
		c = c | s[x]
		x = x + 1
		m = (c & 0x3fff) >> COUNT_BITS
		n = (c & ((1 << COUNT_BITS)-1)) + 3
		for y in range(0, n):
		    o.append(o[len(o)-m])
    except IndexError:
	pass
    return string.join(map(chr, o), '')


_header_fields = {
    'version': (FLD_INT, 0),
    'spare': (FLD_INT, 0),
    'storylen': (FLD_INT, 0),
    'textrecs': (FLD_INT, 0),
    'recsize': (FLD_INT, 0),
    'spare2': (FLD_INT, 0)
    }

class HeaderRecord(PDA.Palm.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = _header_fields
	PDA.Palm.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.raw = raw

	self.unpackfields('>hhlhhl',
			  ['version','spare','storylen','textrecs',
			   'recsize','spare2'],
			  raw[0:16])
	# hmm, some doc files have a different length of record...

    def pack(self):
	self.raw = self.packfields('>hhlhhl',['version','spare','storylen',
					      'textrecs','recsize','spare2'])
	return self.raw

class TextRecord(PDA.Palm.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = {'text': (FLD_STRING, '')}
	PDA.Palm.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.raw = raw
	self['text'] = _Doc.uncompress(self.raw)

    def pack(self):
	self.raw = _Doc.compress(self['text'])
	return self.raw

class TextRecordV1(PDA.Palm.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = {'text': (FLD_STRING, '')}
	PDA.Palm.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self['text'] = self.raw

    def pack(self):
	self.raw = self['text']
	return self.raw
    
class BookmarkRecord(PDA.Palm.Record):
    def __init__(self, raw='', index=0, id=0, attr=0, category=0):
	self.fields = { 'text': (FLD_STRING, '', 16),
			'pos': (FLD_INT, 0) }
	PDA.Palm.Record.__init__(self, raw, index, id, attr, category)

    def unpack(self, raw):
	self.raw = raw
	self.unpackfields('>16sl', ['text','pos'], raw)
	if '\000' in self['text']:
	    self['text'] = self['text'][0:string.find(self['text'],'\000')]

    def pack(self):
	self.raw = self.packfields('>16sl', ['text','pos'])
	return self.raw
    

class Database(PDA.Palm.Database):
    def __init__(self, db, info):
	PDA.Palm.Database.__init__(self, db, info)
	# note: we don't set a default record class, because we have
	# magic behavior depending on location in the file.
	if self.getRecords() > 0:
	    self.header = self.getRecord(0, HeaderRecord)
	else:
	    self.header = HeaderRecord()

    def getRecord(self, index, cls=None):
	if not cls:
	    if index == 0: cls = HeaderRecord
	    elif self.header.has_key('textrecs') and (index <= self.header['textrecs'] \
						       or self.header['textrecs'] == 0):
		if self.header.has_key('version') and self.header['version'] == 1:
		    cls = TextRecordV1
		else:
		    cls = TextRecord
	    else:
		cls = BookmarkRecord
	return PDA.Palm.Database.getRecord(self, index, cls)
    

PDA.Palm.Types.register(Database, {'type':'TEXt', 'creator':'REAd'})
PDA.Palm.Types.register(Database, {'type':'TEXt', 'creator':'TlDc'})

class DOCWriter:
    """Write to a DOC file as a stream.  Does not support appending or
    seeking.

    Note that this class takes up a lot of memory, because it has to
    buffer all the records before it knows what to put in the header!
    """
    def __init__(self, title='', target=None, compress=1,
		 category=0, creator='REAd', type='TEXt', backup=0,
		 version=0):
	"""Initialize the writing stream.  The second argument is either
	a filename or a DLP stream, depending on whether 'remote' is
	true or not.
	"""
	self.title = title
	self.target = target
	self.creator = creator
	self.version = version
	self.backup = backup
	self.type = type
	self.compress = compress
	self.buf = ''
	self.records = []
	self.index = 0
	self.len = 0
	self.bookmark_pos = []
	self.appinfo = ''
	# testing...
	self.uid = 0x6f8001
	self.opened = 1
	
	self.header = HeaderRecord()
	if self.compress: self.header['version'] = 2
	else: self.header['version'] = 1
	self.header['recsize'] = 4096
	self.header.id = 0x6f8000
	self.header.modified = 1
	self.header.index = 0
	self.header.category = category

    def set_appinfo(self, raw=''):
	self.appinfo = raw
	
    def bookmark(self, title, pos = None):
	"""Set a bookmark.  If no position is supplied, uses the current
	write position.
	"""
	if not self.opened: raise IOError, 'document closed'
	if pos is None: pos = self.len
	self.bookmark_pos.append((title, pos))
	
    def __output(self):
	if not self.opened: raise IOError, 'document closed'
	while len(self.buf) >= 4096:
	    b = self.buf[:4096]
	    self.buf = self.buf[4096:]

	    if self.compress: r = TextRecord()
	    else: r = TextRecordV1()

	    r['text'] = b
	    r.id = self.uid
	    self.uid = self.uid + 1
	    r.modified = 1
	    self.records.append(r)

    def write(self, data):
	if not self.opened: raise IOError, 'document closed'
	self.buf = self.buf + data
	self.len = self.len + len(data)
	self.__output()

    def writelines(self, list):
	for l in list: self.write(l)
	
    def close(self):
	if not self.opened: raise IOError, 'document closed'
	self.__output()
	if self.buf:
	    if self.compress: r = TextRecord()
	    else: r = TextRecordV1()

	    r['text'] = self.buf
	    r.id = self.uid
	    self.uid = self.uid + 1
	    r.modified = 1
	    self.records.append(r)

	# open the database
	if isinstance(self.target, PDA.Palm.DLP):
	    db = self.target.create(self.title, self.creator, self.type,
				    self.backup and 0x0008)
	elif isinstance(self.target, PDA.Palm.Database):
	    db = self.target
	else:
	    i = {'creator':self.creator, 'type':self.type,
		 'flagBackup':self.backup,
		 'version':self.version,
		 'name':self.title or '(untitled)'}
	    db = PDA.Palm.createFile(self.target, i)
	self.header['storylen'] = self.len
	self.header['textrecs'] = len(self.records)
	db.addRecord(self.header)
	for r in self.records: db.addRecord(r)

	if len(self.bookmark_pos):
	    for t, p in self.bookmark_pos:
		r = BookmarkRecord()
		r['text'] = t[:15]+'\000'
		r['pos'] = p
		r.id = self.uid
		self.uid = self.uid + 1
		r.modified = 1
		db.addRecord(r)

	if self.appinfo:
	    b = PDA.Palm.AppBlock(self.appinfo)
	    db.setAppBlock(b)
	    
	self.opened = 0

    # there seems to be a problem with this?
    def __del__(self):
	if self.opened: self.close()
	
class DOCReader:
    """Read from a DOC file as a stream.

    Does not support bookmarks yet.  They will probably be available as
    a list or dictionary.

    Note that seeking depends quite heavily on the assumption that all
    records contain exactly 'recsize' bytes of text.
    """
    def __init__(self, pdb):
	self.db = pdb
	self.rec = 0    # record currently in buffer
	self.buf = ''

    def __next(self):
	if self.rec >= self.db.header['textrecs']:
	    return None
	else:
	    self.rec = self.rec + 1
	    r = self.db[self.rec]
	    self.buf = self.buf + r['text']
	    return r
	
    def read(self, nbytes=0):
	"""Per the standard python behavior, read() reads until 'no more data
	is available'.  That is, it reads at most whatever is in the buffer,
	or one record (if the buffer was empty).
	"""
	if not buf:
	    if self.__next() is None:
		return ''
	    
	e = self.buf[:nbytes]
	self.buf = self.buf[nbytes:]
	return e

    def readline(self):
	while not '\n' in self.buf:
	    # if we get eof while trying to find the end of the line,
	    # just return whatever is in the buffer (if it is empty,
	    # that is the same as eof)
	    if self.__next() is None:
		b = self.buf
		self.buf = ''
		return b

	j = string.find(self.buf, '\n')
	e = self.buf[:j+1]
	self.buf = self.buf[j+1:]
	return e

    def readlines(self):
	l = []
	while 1:
	    m = self.readline()
	    if not m: break
	    l.append(m)
	return l

    def tell(self):
	return (self.rec * db.header['recsize']) - len(self.buf)

    def seek(self, pos, whence = 0):
	if whence == 1: pos = self.tell() + pos
	elif whence == 2: pos = self.db.header['storylen'] + pos

	if pos >= self.db.header['storylen']:
	    pos = self.db.header['storylen']

	self.rec = int(pos / db.header['recsize']) + 1
	p = pos % db.header['recsize']
	r = self.db[self.rec]
	self.buf = r['text'][p:]

    def close(self):
	del self.db
	self.rec = 0
	self.buf = ''
	

def openDoc(name, mode='r', title=None):
    if mode == 'r':
	f = PDA.Palm.openFile(name)
	return DOCReader(f)
    elif mode == 'w':
	return DOCWriter(title, name)

    
def openDocRemote(dlp, name, mode='r'):
    if mode == 'r':
	f = dlp.open(name)
	return DOCReader(f)
    elif mode == 'w':
	return DOCWriter(name, dlp, remote=1)
    
