Initial commit (Clean history)
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
import collections
|
||||
|
||||
|
||||
def style(document_matcher, html_path):
|
||||
return Style(document_matcher, html_path)
|
||||
|
||||
|
||||
Style = collections.namedtuple("Style", ["document_matcher", "html_path"])
|
||||
Binary file not shown.
@@ -0,0 +1,14 @@
|
||||
from .errors import LineParseError
|
||||
from .style_mapping_parser import parse_style_mapping
|
||||
from .tokeniser import tokenise
|
||||
from .token_iterator import TokenIterator
|
||||
from ... import results
|
||||
|
||||
|
||||
def read_style_mapping(string):
|
||||
try:
|
||||
tokens = tokenise(string)
|
||||
return results.success(parse_style_mapping(TokenIterator(tokens)))
|
||||
except LineParseError:
|
||||
warning = "Did not understand this style mapping, so ignored it: " + string
|
||||
return results.Result(None, [results.warning(warning)])
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,130 @@
|
||||
from ... import documents, document_matchers
|
||||
from .errors import LineParseError
|
||||
from .tokeniser import TokenType
|
||||
from .token_parser import try_parse_class_name, parse_string
|
||||
|
||||
|
||||
def parse_document_matcher(tokens):
|
||||
if tokens.try_skip(TokenType.IDENTIFIER, "p"):
|
||||
style_id = try_parse_class_name(tokens)
|
||||
style_name = _parse_style_name(tokens)
|
||||
numbering = _parse_numbering(tokens)
|
||||
|
||||
return document_matchers.paragraph(
|
||||
style_id=style_id,
|
||||
style_name=style_name,
|
||||
numbering=numbering,
|
||||
)
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "r"):
|
||||
style_id = try_parse_class_name(tokens)
|
||||
style_name = _parse_style_name(tokens)
|
||||
|
||||
return document_matchers.run(
|
||||
style_id=style_id,
|
||||
style_name=style_name,
|
||||
)
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "table"):
|
||||
style_id = try_parse_class_name(tokens)
|
||||
style_name = _parse_style_name(tokens)
|
||||
|
||||
return document_matchers.table(
|
||||
style_id=style_id,
|
||||
style_name=style_name,
|
||||
)
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "b"):
|
||||
return document_matchers.bold
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "i"):
|
||||
return document_matchers.italic
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "u"):
|
||||
return document_matchers.underline
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "strike"):
|
||||
return document_matchers.strikethrough
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "all-caps"):
|
||||
return document_matchers.all_caps
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "small-caps"):
|
||||
return document_matchers.small_caps
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "highlight"):
|
||||
return _parse_highlight(tokens)
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "comment-reference"):
|
||||
return document_matchers.comment_reference
|
||||
|
||||
elif tokens.try_skip(TokenType.IDENTIFIER, "br"):
|
||||
return _parse_break(tokens)
|
||||
|
||||
else:
|
||||
raise LineParseError("Unrecognised document element: {0}".format(tokens.next_value(TokenType.IDENTIFIER)))
|
||||
|
||||
def _parse_style_name(tokens):
|
||||
if tokens.try_skip(TokenType.SYMBOL, "["):
|
||||
tokens.skip(TokenType.IDENTIFIER, "style-name")
|
||||
string_matcher = _parse_string_matcher(tokens)
|
||||
tokens.skip(TokenType.SYMBOL, "]")
|
||||
return string_matcher
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_string_matcher(tokens):
|
||||
if tokens.try_skip(TokenType.SYMBOL, "="):
|
||||
return document_matchers.equal_to(parse_string(tokens))
|
||||
elif tokens.try_skip(TokenType.SYMBOL, "^="):
|
||||
return document_matchers.starts_with(parse_string(tokens))
|
||||
else:
|
||||
raise LineParseError("Unrecognised string matcher: {0}".format(tokens.next_value()))
|
||||
|
||||
def _parse_numbering(tokens):
|
||||
if tokens.try_skip(TokenType.SYMBOL, ":"):
|
||||
is_ordered = _parse_list_type(tokens)
|
||||
tokens.skip(TokenType.SYMBOL, "(")
|
||||
level = int(tokens.next_value(TokenType.INTEGER)) - 1
|
||||
tokens.skip(TokenType.SYMBOL, ")")
|
||||
return documents.numbering_level(level, is_ordered=is_ordered)
|
||||
|
||||
|
||||
def _parse_list_type(tokens):
|
||||
list_type = tokens.next_value(TokenType.IDENTIFIER)
|
||||
if list_type == "ordered-list":
|
||||
return True
|
||||
elif list_type == "unordered-list":
|
||||
return False
|
||||
else:
|
||||
raise LineParseError("Unrecognised list type: {0}".format(list_type))
|
||||
|
||||
|
||||
def _parse_highlight(tokens):
|
||||
if tokens.try_skip(TokenType.SYMBOL, "["):
|
||||
tokens.skip(TokenType.IDENTIFIER, "color")
|
||||
tokens.skip(TokenType.SYMBOL, "=")
|
||||
color = parse_string(tokens)
|
||||
tokens.skip(TokenType.SYMBOL, "]");
|
||||
else:
|
||||
color = None
|
||||
|
||||
return document_matchers.highlight(color=color)
|
||||
|
||||
|
||||
def _parse_break(tokens):
|
||||
tokens.skip(TokenType.SYMBOL, "[")
|
||||
tokens.skip(TokenType.IDENTIFIER, "type")
|
||||
tokens.skip(TokenType.SYMBOL, "=")
|
||||
type_name = parse_string(tokens)
|
||||
tokens.skip(TokenType.SYMBOL, "]");
|
||||
|
||||
if type_name == "line":
|
||||
return document_matchers.line_break
|
||||
elif type_name == "page":
|
||||
return document_matchers.page_break
|
||||
elif type_name == "column":
|
||||
return document_matchers.column_break
|
||||
else:
|
||||
raise LineParseError("Unrecognised break type: {0}".format(type_name))
|
||||
@@ -0,0 +1,2 @@
|
||||
class LineParseError(Exception):
|
||||
pass
|
||||
@@ -0,0 +1,120 @@
|
||||
import cobble
|
||||
|
||||
from ... import html_paths
|
||||
from .tokeniser import TokenType
|
||||
from .token_parser import parse_identifier, parse_string
|
||||
|
||||
|
||||
@cobble.data
|
||||
class _AttributeOrClassName(object):
|
||||
name = cobble.field()
|
||||
value = cobble.field()
|
||||
append = cobble.field()
|
||||
|
||||
|
||||
def parse_html_path(tokens):
|
||||
if tokens.try_skip(TokenType.SYMBOL, "!"):
|
||||
return html_paths.ignore
|
||||
else:
|
||||
return html_paths.path(_parse_html_path_elements(tokens))
|
||||
|
||||
|
||||
def _parse_html_path_elements(tokens):
|
||||
elements = []
|
||||
|
||||
if tokens.peek_token_type() == TokenType.IDENTIFIER:
|
||||
elements.append(_parse_element(tokens))
|
||||
|
||||
while tokens.try_skip_many(((TokenType.WHITESPACE, None), (TokenType.SYMBOL, ">"))):
|
||||
tokens.skip(TokenType.WHITESPACE)
|
||||
elements.append(_parse_element(tokens))
|
||||
|
||||
return elements
|
||||
|
||||
|
||||
def _parse_element(tokens):
|
||||
tag_names = _parse_tag_names(tokens)
|
||||
attributes_list = _parse_attribute_or_class_names(tokens)
|
||||
is_fresh = _parse_is_fresh(tokens)
|
||||
separator = _parse_separator(tokens)
|
||||
|
||||
attributes = {}
|
||||
for attribute in attributes_list:
|
||||
if attribute.append and attributes.get(attribute.name):
|
||||
attributes[attribute.name] += " " + attribute.value
|
||||
else:
|
||||
attributes[attribute.name] = attribute.value
|
||||
|
||||
return html_paths.element(
|
||||
tag_names,
|
||||
attributes=attributes,
|
||||
fresh=is_fresh,
|
||||
separator=separator,
|
||||
)
|
||||
|
||||
|
||||
def _parse_tag_names(tokens):
|
||||
tag_names = [parse_identifier(tokens)]
|
||||
|
||||
while tokens.try_skip(TokenType.SYMBOL, "|"):
|
||||
tag_names.append(parse_identifier(tokens))
|
||||
|
||||
return tag_names
|
||||
|
||||
|
||||
def _parse_attribute_or_class_names(tokens):
|
||||
attribute_or_class_names = []
|
||||
|
||||
while True:
|
||||
attribute_or_class_name = _try_parse_attribute_or_class_name(tokens)
|
||||
if attribute_or_class_name is None:
|
||||
break
|
||||
else:
|
||||
attribute_or_class_names.append(attribute_or_class_name)
|
||||
|
||||
return attribute_or_class_names
|
||||
|
||||
|
||||
def _try_parse_attribute_or_class_name(tokens):
|
||||
if tokens.is_next(TokenType.SYMBOL, "["):
|
||||
return _parse_attribute(tokens)
|
||||
if tokens.is_next(TokenType.SYMBOL, "."):
|
||||
return _parse_class_name(tokens)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_attribute(tokens):
|
||||
tokens.skip(TokenType.SYMBOL, "[")
|
||||
name = parse_identifier(tokens)
|
||||
tokens.skip(TokenType.SYMBOL, "=")
|
||||
value = parse_string(tokens)
|
||||
tokens.skip(TokenType.SYMBOL, "]")
|
||||
return _AttributeOrClassName(name=name, value=value, append=False)
|
||||
|
||||
|
||||
def _parse_class_name(tokens):
|
||||
tokens.skip(TokenType.SYMBOL, ".")
|
||||
class_name = parse_identifier(tokens)
|
||||
return _AttributeOrClassName(name="class", value=class_name, append=True)
|
||||
|
||||
|
||||
def _parse_is_fresh(tokens):
|
||||
return tokens.try_skip_many((
|
||||
(TokenType.SYMBOL, ":"),
|
||||
(TokenType.IDENTIFIER, "fresh"),
|
||||
))
|
||||
|
||||
|
||||
def _parse_separator(tokens):
|
||||
is_separator = tokens.try_skip_many((
|
||||
(TokenType.SYMBOL, ":"),
|
||||
(TokenType.IDENTIFIER, "separator"),
|
||||
))
|
||||
if is_separator:
|
||||
tokens.skip(TokenType.SYMBOL, "(")
|
||||
value = parse_string(tokens)
|
||||
tokens.skip(TokenType.SYMBOL, ")")
|
||||
return value
|
||||
else:
|
||||
return None
|
||||
@@ -0,0 +1,15 @@
|
||||
from .tokeniser import TokenType
|
||||
from .document_matcher_parser import parse_document_matcher
|
||||
from .html_path_parser import parse_html_path
|
||||
from ...styles import Style
|
||||
|
||||
|
||||
def parse_style_mapping(tokens):
|
||||
document_matcher = parse_document_matcher(tokens)
|
||||
tokens.skip(TokenType.WHITESPACE)
|
||||
tokens.skip(TokenType.SYMBOL, "=>")
|
||||
tokens.try_skip(TokenType.WHITESPACE)
|
||||
html_path = parse_html_path(tokens)
|
||||
tokens.skip(TokenType.END)
|
||||
|
||||
return Style(document_matcher, html_path)
|
||||
@@ -0,0 +1,59 @@
|
||||
# TODO: check indices
|
||||
# TODO: proper tests for unexpected tokens
|
||||
|
||||
from .errors import LineParseError
|
||||
|
||||
|
||||
class TokenIterator(object):
|
||||
def __init__(self, tokens):
|
||||
self._tokens = tokens
|
||||
self._index = 0
|
||||
|
||||
def peek_token_type(self):
|
||||
return self._tokens[self._index].type
|
||||
|
||||
def next_value(self, token_type=None):
|
||||
return self._next(token_type).value
|
||||
|
||||
def _next(self, token_type=None):
|
||||
token = self._tokens[self._index]
|
||||
if token_type is None or token.type == token_type:
|
||||
self._index += 1
|
||||
return token
|
||||
else:
|
||||
raise self._unexpected_token_type(token_type, token)
|
||||
|
||||
def skip(self, token_type, token_value=None):
|
||||
token = self._tokens[self._index]
|
||||
if token.type == token_type and (token_value is None or token.value == token_value):
|
||||
self._index += 1
|
||||
return True
|
||||
else:
|
||||
raise self._unexpected_token_type(token_type, token)
|
||||
|
||||
def try_skip(self, token_type, token_value=None):
|
||||
if self.is_next(token_type, token_value):
|
||||
self._index += 1
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def try_skip_many(self, tokens):
|
||||
start = self._index
|
||||
for token_type, token_value in tokens:
|
||||
token = self._tokens[self._index]
|
||||
if not (token.type == token_type and (token_value is None or token.value == token_value)):
|
||||
self._index = start
|
||||
return False
|
||||
else:
|
||||
self._index += 1
|
||||
|
||||
return True
|
||||
|
||||
def is_next(self, token_type, token_value=None):
|
||||
token = self._tokens[self._index]
|
||||
return token.type == token_type and (token_value is None or token.value == token_value)
|
||||
|
||||
def _unexpected_token_type(self, token_type, token):
|
||||
raise LineParseError()
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
import re
|
||||
|
||||
from .tokeniser import TokenType
|
||||
|
||||
|
||||
def try_parse_class_name(tokens):
|
||||
if tokens.try_skip(TokenType.SYMBOL, "."):
|
||||
return parse_identifier(tokens)
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def parse_identifier(tokens):
|
||||
return decode_escape_sequences(tokens.next_value(TokenType.IDENTIFIER))
|
||||
|
||||
|
||||
def parse_string(tokens):
|
||||
return decode_escape_sequences(tokens.next_value(TokenType.STRING)[1:-1])
|
||||
|
||||
|
||||
_ESCAPE_SEQUENCE_REGEX = re.compile(r"\\(.)")
|
||||
|
||||
|
||||
def decode_escape_sequences(value):
|
||||
return _ESCAPE_SEQUENCE_REGEX.sub(_decode_escape_sequence, value)
|
||||
|
||||
|
||||
def _decode_escape_sequence(match):
|
||||
code = match.group(1)
|
||||
if code == "n":
|
||||
return "\n"
|
||||
elif code == "r":
|
||||
return "\r"
|
||||
elif code == "t":
|
||||
return "\t"
|
||||
else:
|
||||
return code
|
||||
@@ -0,0 +1,61 @@
|
||||
import collections
|
||||
import re
|
||||
|
||||
|
||||
Token = collections.namedtuple("Token", ["character_index", "type", "value"])
|
||||
|
||||
|
||||
class TokenType(object):
|
||||
IDENTIFIER = "identifier"
|
||||
SYMBOL = "symbol"
|
||||
WHITESPACE = "whitespace"
|
||||
STRING = "string"
|
||||
UNTERMINATED_STRING = "unterminated string"
|
||||
INTEGER = "integer"
|
||||
END = "end"
|
||||
|
||||
|
||||
|
||||
def regex_tokeniser(rules):
|
||||
rules = [(token_type, _to_regex(regex)) for token_type, regex in rules]
|
||||
rules.append(("unknown", re.compile(".")))
|
||||
|
||||
def tokenise(value):
|
||||
tokens = []
|
||||
index = 0
|
||||
while index < len(value):
|
||||
for token_type, regex in rules:
|
||||
match = regex.match(value, index)
|
||||
if match is not None:
|
||||
tokens.append(Token(index, token_type, match.group(0)))
|
||||
index = match.end()
|
||||
break
|
||||
else:
|
||||
# Should be impossible
|
||||
raise Exception("Remaining: " + value[index:])
|
||||
|
||||
tokens.append(Token(index, TokenType.END, ""))
|
||||
|
||||
return tokens
|
||||
|
||||
return tokenise
|
||||
|
||||
|
||||
def _to_regex(value):
|
||||
if hasattr(value, "match"):
|
||||
return value
|
||||
else:
|
||||
return re.compile(value)
|
||||
|
||||
|
||||
_string_prefix = r"'(?:\\.|[^'])*"
|
||||
_identifier_character = r"(?:[a-zA-Z\-_]|\\.)"
|
||||
|
||||
tokenise = regex_tokeniser([
|
||||
(TokenType.IDENTIFIER, _identifier_character + "(?:" + _identifier_character + "|[0-9])*"),
|
||||
(TokenType.SYMBOL, r":|>|=>|\^=|=|\(|\)|\[|\]|\||!|\."),
|
||||
(TokenType.WHITESPACE, r"\s+"),
|
||||
(TokenType.STRING, _string_prefix + "'"),
|
||||
(TokenType.UNTERMINATED_STRING, _string_prefix),
|
||||
(TokenType.INTEGER, "([0-9]+)"),
|
||||
])
|
||||
Reference in New Issue
Block a user