"""This module co-ordinates the reading of the source file.
It maintains the current position of the parser in the source file.
(c) Copyright by Jay Love, 2000 (mailto:jsliv@jslove.org)
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee or royalty is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation or portions thereof, including modifications,
that you make.
This software is based in part on work done by the Jakarta group.
"""
from copy import copy
import os
from PSPUtils import PSPParserException
class Mark(object):
"""The Mark class marks a point in an input stream."""
def __init__(self, reader,
fileId=None, stream=None, inBaseDir=None, encoding=None):
if isinstance(reader, StreamReader):
self.reader = reader
self.fileId = fileId
self.includeStack = []
self.cursor = 0
self.stream = stream
self.baseDir = inBaseDir
self.encoding = encoding
else:
self = copy(reader)
def __str__(self):
return '%s(%d)' % (self.getFile(), self.cursor)
def getFile(self):
return self.reader.getFile(self.fileId)
def pushStream(self, inFileId, inStream, inBaseDir, inEncoding):
self.includeStack.append((self.cursor, self.fileId, self.baseDir,
self.encoding, self.stream))
self.cursor = 0
self.fileId = inFileId
self.stream = inStream
self.baseDir = inBaseDir
self.encoding = inEncoding
def popStream(self):
if not self.includeStack:
return False
(self.cursor, self.fileId, self.baseDir,
self.encoding, self.stream) = self.includeStack.pop()
return True
class StreamReader(object):
"""This class handles the PSP source file.
It provides the characters to the other parts of the system.
It can move forward and backwards in a file and remember locactions.
"""
def __init__(self, filename, ctxt):
self._pspfile = filename
self._ctxt = ctxt
self.sourcefiles = []
self.current = None
self.master = None
def init(self):
self.pushFile(self._ctxt.getFullPspFileName())
def registerSourceFile(self, filepath):
self.sourcefiles.append(filepath)
return len(self.sourcefiles) - 1
def pushFile(self, filepath, encoding=None):
assert isinstance(filepath, basestring)
if self.master is None:
parent = None
self.master = filepath
else:
parent = os.path.split(self.master)[0]
isAbsolute = os.path.isabs(filepath)
if parent is not None and not isAbsolute:
filepath = os.path.join(parent, filepath)
fileId = self.registerSourceFile(filepath)
handle = open(filepath, 'rU')
stream = handle.read()
handle.seek(0, 0)
if self.current is None:
self.current = Mark(self, fileId, stream,
self._ctxt.getBaseUri(), encoding)
else:
self.current.pushStream(fileId, stream,
self._ctxt.getBaseUri(), encoding)
def popFile(self):
if self.current is None:
return None
return self.current.popStream()
def getFile(self, i):
return self.sourcefiles[i]
def newSourceFile(self, filename):
if filename in self.sourcefiles:
return None
self.sourcefiles.append(filename)
return len(self.sourcefiles)
def mark(self):
return copy(self.current)
def skipUntil(self, s):
"""Greedy search.
Return the point before the string, but move reader past it.
"""
new_cursor = self.current.stream.find(s, self.current.cursor)
if new_cursor < 0:
self.current.cursor = len(self.current.stream)
if self.hasMoreInput():
self.popFile()
self.skipUntil(s)
else:
raise EOFError
else:
self.current.cursor = new_cursor
mark = self.mark()
self.current.cursor += len(s)
return mark
def reset(self, mark):
self.current = mark
def matches(self, s):
if s == self.current.stream[
self.current.cursor:self.current.cursor+len(s)]:
return True
return False
def advance(self, length):
"""Advance length characters"""
if length + self.current.cursor <= len(self.current.stream):
self.current.cursor += length
else:
prog = len(self.current.stream) - self.current.cursor
self.current.cursor = len(self.current.stream)
if self.hasMoreInput():
self.advance(length - prog)
else:
raise EOFError()
def nextChar(self):
if not self.hasMoreInput():
return -1
c = self.current.stream[self.current.cursor]
self.advance(1)
return c
def isSpace(self):
"""No advancing."""
return self.current.stream[self.current.cursor] in (' ', '\n')
def isDelimiter(self):
if not self.isSpace():
c = self.peekChar()
if c in ('=', '"', "'", '/'):
return True
if c == '-':
mark = self.mark()
c = self.nextChar()
try:
return c == '>' or (c == '-' and self.nextChar() == '>')
finally:
self.reset(mark)
else:
return True
def peekChar(self, cnt=1):
if self.hasMoreInput():
return self.current.stream[
self.current.cursor:self.current.cursor+cnt]
raise EOFError
def skipSpaces(self):
i = 0
while self.isSpace():
self.nextChar()
i += 1
return i
def getChars(self, start, stop):
mark = self.mark()
self.reset(start)
chars = self.current.stream[start.cursor:stop.cursor]
self.reset(mark)
return chars
def hasMoreInput(self):
if self.current.cursor >= len(self.current.stream):
while self.popFile():
if self.current.cursor < len(self.current.stream):
return True
return False
return True
def nextContent(self):
"""Find next < char."""
cur_cursor = self.current.cursor
self.current.cursor += 1
new_cursor = self.current.stream.find('<', self.current.cursor)
if new_cursor < 0:
new_cursor = len(self.current.stream)
self.current.cursor = new_cursor
return self.current.stream[cur_cursor:new_cursor]
def parseTagAttributes(self):
"""Parse the attributes at the beginning of a tag."""
values = {}
while 1:
self.skipSpaces()
c = self.peekChar()
if c == '>':
return values
if c == '-':
mark = self.mark()
self.nextChar()
try:
if self.nextChar() == '-' and self.nextChar() == '>':
return values
finally:
self.reset(mark)
elif c == '%':
mark = self.mark()
self.nextChar()
try:
if self.peekChar() == '>':
return values
finally:
self.reset(mark)
elif not c:
break
self.parseAttributeValue(values)
raise PSPParserException('Unterminated attribute')
def parseAttributeValue(self, valueDict):
self.skipSpaces()
name = self.parseToken(0)
self.skipSpaces()
if self.peekChar() != '=':
raise PSPParserException('No attribute value')
self.nextChar()
self.skipSpaces()
value = self.parseToken(1)
self.skipSpaces()
valueDict[name] = value
def parseToken(self, quoted):
buffer = []
self.skipSpaces()
c = self.peekChar()
if quoted:
if c in ('"', "'"):
endquote = c
self.nextChar()
c = self.peekChar()
while c is not None and c != endquote:
c = self.nextChar()
if c == '\\':
c = self.nextChar()
buffer.append(c)
c = self.peekChar()
if c is None:
raise PSPParserException('Unterminated attribute value')
self.nextChar()
else:
if not self.isDelimiter():
while not self.isDelimiter():
c = self.nextChar()
if c == '\\':
c = self.peekChar()
if c in ('"', "'", '>', '%'):
c = self.nextChar()
buffer.append(c)
return ''.join(buffer)