1
0
mirror of https://github.com/certbot/certbot.git synced 2026-01-26 07:41:33 +03:00

Add and test new nginx parsing abstractions (#6383)

* feat(nginx): add and test new parsing abstractions

* chore(nginx parser): fix mypy and address small comments

* chore(nginx parser): clean up by removing context object

* fix integration test and lint
This commit is contained in:
sydneyli
2018-10-19 12:30:32 -07:00
committed by ohemorange
parent 0dab41ee13
commit 8dd68a6551
3 changed files with 646 additions and 0 deletions

View File

@@ -0,0 +1,392 @@
""" This file contains parsing routines and object classes to help derive meaning from
raw lists of tokens from pyparsing. """
import abc
import logging
import six
from certbot import errors
from acme.magic_typing import List # pylint: disable=unused-import, no-name-in-module
logger = logging.getLogger(__name__)
COMMENT = " managed by Certbot"
COMMENT_BLOCK = ["#", COMMENT]
class Parsable(object):
""" Abstract base class for "Parsable" objects whose underlying representation
is a tree of lists.
:param .Parsable parent: This object's parsed parent in the tree
"""
__metaclass__ = abc.ABCMeta
def __init__(self, parent=None):
self._data = [] # type: List[object]
self._tabs = None
self.parent = parent
@classmethod
def parsing_hooks(cls):
"""Returns object types that this class should be able to `parse` recusrively.
The order of the objects indicates the order in which the parser should
try to parse each subitem.
:returns: A list of Parsable classes.
:rtype list:
"""
return (Block, Sentence, Statements)
@staticmethod
@abc.abstractmethod
def should_parse(lists):
""" Returns whether the contents of `lists` can be parsed into this object.
:returns: Whether `lists` can be parsed as this object.
:rtype bool:
"""
raise NotImplementedError()
@abc.abstractmethod
def parse(self, raw_list, add_spaces=False):
""" Loads information into this object from underlying raw_list structure.
Each Parsable object might make different assumptions about the structure of
raw_list.
:param list raw_list: A list or sublist of tokens from pyparsing, containing whitespace
as separate tokens.
:param bool add_spaces: If set, the method can and should manipulate and insert spacing
between non-whitespace tokens and lists to delimit them.
:raises .errors.MisconfigurationError: when the assumptions about the structure of
raw_list are not met.
"""
raise NotImplementedError()
@abc.abstractmethod
def iterate(self, expanded=False, match=None):
""" Iterates across this object. If this object is a leaf object, only yields
itself. If it contains references other parsing objects, and `expanded` is set,
this function should first yield itself, then recursively iterate across all of them.
:param bool expanded: Whether to recursively iterate on possible children.
:param callable match: If provided, an object is only iterated if this callable
returns True when called on that object.
:returns: Iterator over desired objects.
"""
raise NotImplementedError()
@abc.abstractmethod
def get_tabs(self):
""" Guess at the tabbing style of this parsed object, based on whitespace.
If this object is a leaf, it deducts the tabbing based on its own contents.
Other objects may guess by calling `get_tabs` recursively on child objects.
:returns: Guess at tabbing for this object. Should only return whitespace strings
that does not contain newlines.
:rtype str:
"""
raise NotImplementedError()
@abc.abstractmethod
def set_tabs(self, tabs=" "):
"""This tries to set and alter the tabbing of the current object to a desired
whitespace string. Primarily meant for objects that were constructed, so they
can conform to surrounding whitespace.
:param str tabs: A whitespace string (not containing newlines).
"""
raise NotImplementedError()
def dump(self, include_spaces=False):
""" Dumps back to pyparsing-like list tree. The opposite of `parse`.
Note: if this object has not been modified, `dump` with `include_spaces=True`
should always return the original input of `parse`.
:param bool include_spaces: If set to False, magically hides whitespace tokens from
dumped output.
:returns: Pyparsing-like list tree.
:rtype list:
"""
return [elem.dump(include_spaces) for elem in self._data]
class Statements(Parsable):
""" A group or list of "Statements". A Statement is either a Block or a Sentence.
The underlying representation is simply a list of these Statement objects, with
an extra `_trailing_whitespace` string to keep track of the whitespace that does not
precede any more statements.
"""
def __init__(self, parent=None):
super(Statements, self).__init__(parent)
self._trailing_whitespace = None
# ======== Begin overridden functions
@staticmethod
def should_parse(lists):
return isinstance(lists, list)
def set_tabs(self, tabs=" "):
""" Sets the tabbing for this set of statements. Does this by calling `set_tabs`
on each of the child statements.
Then, if a parent is present, sets trailing whitespace to parent tabbing. This
is so that the trailing } of any Block that contains Statements lines up
with parent tabbing.
"""
for statement in self._data:
statement.set_tabs(tabs)
if self.parent is not None:
self._trailing_whitespace = "\n" + self.parent.get_tabs()
def parse(self, parse_this, add_spaces=False):
""" Parses a list of statements.
Expects all elements in `parse_this` to be parseable by `type(self).parsing_hooks`,
with an optional whitespace string at the last index of `parse_this`.
"""
if not isinstance(parse_this, list):
raise errors.MisconfigurationError("Statements parsing expects a list!")
# If there's a trailing whitespace in the list of statements, keep track of it.
if len(parse_this) > 0 and isinstance(parse_this[-1], six.string_types) \
and parse_this[-1].isspace():
self._trailing_whitespace = parse_this[-1]
parse_this = parse_this[:-1]
self._data = [parse_raw(elem, self, add_spaces) for elem in parse_this]
def get_tabs(self):
""" Takes a guess at the tabbing of all contained Statements by retrieving the
tabbing of the first Statement."""
if len(self._data) > 0:
return self._data[0].get_tabs()
return ""
def dump(self, include_spaces=False):
""" Dumps this object by first dumping each statement, then appending its
trailing whitespace (if `include_spaces` is set) """
data = super(Statements, self).dump(include_spaces)
if include_spaces and self._trailing_whitespace is not None:
return data + [self._trailing_whitespace]
return data
def iterate(self, expanded=False, match=None):
""" Combines each statement's iterator. """
for elem in self._data:
for sub_elem in elem.iterate(expanded, match):
yield sub_elem
# ======== End overridden functions
def _space_list(list_):
""" Inserts whitespace between adjacent non-whitespace tokens. """
spaced_statement = [] # type: List[str]
for i in reversed(six.moves.xrange(len(list_))):
spaced_statement.insert(0, list_[i])
if i > 0 and not list_[i].isspace() and not list_[i-1].isspace():
spaced_statement.insert(0, " ")
return spaced_statement
class Sentence(Parsable):
""" A list of words. Non-whitespace words are typically separated with whitespace tokens. """
# ======== Begin overridden functions
@staticmethod
def should_parse(lists):
""" Returns True if `lists` can be parseable as a `Sentence`-- that is,
every element is a string type.
:param list lists: The raw unparsed list to check.
:returns: whether this lists is parseable by `Sentence`.
"""
return isinstance(lists, list) and len(lists) > 0 and \
all([isinstance(elem, six.string_types) for elem in lists])
def parse(self, parse_this, add_spaces=False):
""" Parses a list of string types into this object.
If add_spaces is set, adds whitespace tokens between adjacent non-whitespace tokens."""
if add_spaces:
parse_this = _space_list(parse_this)
if not isinstance(parse_this, list) or \
any([not isinstance(elem, six.string_types) for elem in parse_this]):
raise errors.MisconfigurationError("Sentence parsing expects a list of string types.")
self._data = parse_this
def iterate(self, expanded=False, match=None):
""" Simply yields itself. """
if match is None or match(self):
yield self
def set_tabs(self, tabs=" "):
""" Sets the tabbing on this sentence. Inserts a newline and `tabs` at the
beginning of `self._data`. """
if self._data[0].isspace():
return
self._data.insert(0, "\n" + tabs)
def dump(self, include_spaces=False):
""" Dumps this sentence. If include_spaces is set, includes whitespace tokens."""
if not include_spaces:
return self.words
return self._data
def get_tabs(self):
""" Guesses at the tabbing of this sentence. If the first element is whitespace,
returns the whitespace after the rightmost newline in the string. """
first = self._data[0]
if not first.isspace():
return ""
rindex = first.rfind("\n")
return first[rindex+1:]
# ======== End overridden functions
@property
def words(self):
""" Iterates over words, but without spaces. Like Unspaced List. """
return [word.strip("\"\'") for word in self._data if not word.isspace()]
def __getitem__(self, index):
return self.words[index]
def __contains__(self, word):
return word in self.words
class Block(Parsable):
""" Any sort of bloc, denoted by a block name and curly braces, like so:
The parsed block:
block name {
content 1;
content 2;
}
might be represented with the list [names, contents], where
names = ["block", " ", "name", " "]
contents = [["\n ", "content", " ", "1"], ["\n ", "content", " ", "2"], "\n"]
"""
def __init__(self, parent=None):
super(Block, self).__init__(parent)
self.names = None # type: Sentence
self.contents = None # type: Block
@staticmethod
def should_parse(lists):
""" Returns True if `lists` can be parseable as a `Block`-- that is,
it's got a length of 2, the first element is a `Sentence` and the second can be
a `Statements`.
:param list lists: The raw unparsed list to check.
:returns: whether this lists is parseable by `Block`. """
return isinstance(lists, list) and len(lists) == 2 and \
Sentence.should_parse(lists[0]) and isinstance(lists[1], list)
def set_tabs(self, tabs=" "):
""" Sets tabs by setting equivalent tabbing on names, then adding tabbing
to contents."""
self.names.set_tabs(tabs)
self.contents.set_tabs(tabs + " ")
def iterate(self, expanded=False, match=None):
""" Iterator over self, and if expanded is set, over its contents. """
if match is None or match(self):
yield self
if expanded:
for elem in self.contents.iterate(expanded, match):
yield elem
def parse(self, parse_this, add_spaces=False):
""" Parses a list that resembles a block.
The assumptions that this routine makes are:
1. the first element of `parse_this` is a valid Sentence.
2. the second element of `parse_this` is a valid Statement.
If add_spaces is set, we call it recursively on `names` and `contents`, and
add an extra trailing space to `names` (to separate the block's opening bracket
and the block name).
"""
if not Block.should_parse(parse_this):
raise errors.MisconfigurationError("Block parsing expects a list of length 2. "
"First element should be a list of string types (the bloc names), "
"and second should be another list of statements (the bloc content).")
self.names = Sentence(self)
if add_spaces:
parse_this[0].append(" ")
self.names.parse(parse_this[0], add_spaces)
self.contents = Statements(self)
self.contents.parse(parse_this[1], add_spaces)
self._data = [self.names, self.contents]
def get_tabs(self):
""" Guesses tabbing by retrieving tabbing guess of self.names. """
return self.names.get_tabs()
def _is_comment(parsed_obj):
""" Checks whether parsed_obj is a comment.
:param .Parsable parsed_obj:
:returns: whether parsed_obj represents a comment sentence.
:rtype bool:
"""
if not isinstance(parsed_obj, Sentence):
return False
return parsed_obj.words[0] == "#"
def _is_certbot_comment(parsed_obj):
""" Checks whether parsed_obj is a "managed by Certbot" comment.
:param .Parsable parsed_obj:
:returns: whether parsed_obj is a "managed by Certbot" comment.
:rtype bool:
"""
if not _is_comment(parsed_obj):
return False
if len(parsed_obj.words) != len(COMMENT_BLOCK):
return False
for i, word in enumerate(parsed_obj.words):
if word != COMMENT_BLOCK[i]:
return False
return True
def _certbot_comment(parent, preceding_spaces=4):
""" A "Managed by Certbot" comment.
:param int preceding_spaces: Number of spaces between the end of the previous
statement and the comment.
:returns: Sentence containing the comment.
:rtype: .Sentence
"""
result = Sentence(parent)
result.parse([" " * preceding_spaces] + COMMENT_BLOCK)
return result
def _choose_parser(parent, list_):
""" Choose a parser from type(parent).parsing_hooks, depending on whichever hook
returns True first. """
hooks = Parsable.parsing_hooks()
if parent:
hooks = type(parent).parsing_hooks()
for type_ in hooks:
if type_.should_parse(list_):
return type_(parent)
raise errors.MisconfigurationError(
"None of the parsing hooks succeeded, so we don't know how to parse this set of lists.")
def parse_raw(lists_, parent=None, add_spaces=False):
""" Primary parsing factory function.
:param list lists_: raw lists from pyparsing to parse.
:param .Parent parent: The parent containing this object.
:param bool add_spaces: Whether to pass add_spaces to the parser.
:returns .Parsable: The parsed object.
:raises errors.MisconfigurationError: If no parsing hook passes, and we can't
determine which type to parse the raw lists into.
"""
parser = _choose_parser(parent, lists_)
parser.parse(lists_, add_spaces)
return parser

View File

@@ -0,0 +1,253 @@
""" Tests for functions and classes in parser_obj.py """
import unittest
import mock
from certbot_nginx.parser_obj import parse_raw
from certbot_nginx.parser_obj import COMMENT_BLOCK
class CommentHelpersTest(unittest.TestCase):
def test_is_comment(self):
from certbot_nginx.parser_obj import _is_comment
self.assertTrue(_is_comment(parse_raw(['#'])))
self.assertTrue(_is_comment(parse_raw(['#', ' literally anything else'])))
self.assertFalse(_is_comment(parse_raw(['not', 'even', 'a', 'comment'])))
def test_is_certbot_comment(self):
from certbot_nginx.parser_obj import _is_certbot_comment
self.assertTrue(_is_certbot_comment(
parse_raw(COMMENT_BLOCK)))
self.assertFalse(_is_certbot_comment(
parse_raw(['#', ' not a certbot comment'])))
self.assertFalse(_is_certbot_comment(
parse_raw(['#', ' managed by Certbot', ' also not a certbot comment'])))
self.assertFalse(_is_certbot_comment(
parse_raw(['not', 'even', 'a', 'comment'])))
def test_certbot_comment(self):
from certbot_nginx.parser_obj import _certbot_comment, _is_certbot_comment
comment = _certbot_comment(None)
self.assertTrue(_is_certbot_comment(comment))
self.assertEqual(comment.dump(), COMMENT_BLOCK)
self.assertEqual(comment.dump(True), [' '] + COMMENT_BLOCK)
self.assertEqual(_certbot_comment(None, 2).dump(True),
[' '] + COMMENT_BLOCK)
class ParsingHooksTest(unittest.TestCase):
def test_is_sentence(self):
from certbot_nginx.parser_obj import Sentence
self.assertFalse(Sentence.should_parse([]))
self.assertTrue(Sentence.should_parse(['']))
self.assertTrue(Sentence.should_parse(['word']))
self.assertTrue(Sentence.should_parse(['two', 'words']))
self.assertFalse(Sentence.should_parse([[]]))
self.assertFalse(Sentence.should_parse(['word', []]))
def test_is_block(self):
from certbot_nginx.parser_obj import Block
self.assertFalse(Block.should_parse([]))
self.assertFalse(Block.should_parse(['']))
self.assertFalse(Block.should_parse(['two', 'words']))
self.assertFalse(Block.should_parse([[[]], []]))
self.assertFalse(Block.should_parse([['block_name'], ['hi', []], []]))
self.assertFalse(Block.should_parse([['block_name'], 'lol']))
self.assertTrue(Block.should_parse([['block_name'], ['hi', []]]))
self.assertTrue(Block.should_parse([['hello'], []]))
self.assertTrue(Block.should_parse([['block_name'], [['many'], ['statements'], 'here']]))
self.assertTrue(Block.should_parse([['if', ' ', '(whatever)'], ['hi']]))
def test_parse_raw(self):
fake_parser1 = mock.Mock()
fake_parser1.should_parse = lambda x: True
fake_parser2 = mock.Mock()
fake_parser2.should_parse = lambda x: False
# First encountered "match" should parse.
parse_raw([])
fake_parser1.called_once()
fake_parser2.not_called()
fake_parser1.reset_mock()
# "match" that returns False shouldn't parse.
parse_raw([])
fake_parser1.not_called()
fake_parser2.called_once()
@mock.patch("certbot_nginx.parser_obj.Parsable.parsing_hooks")
def test_parse_raw_no_match(self, parsing_hooks):
from certbot import errors
fake_parser1 = mock.Mock()
fake_parser1.should_parse = lambda x: False
parsing_hooks.return_value = (fake_parser1,)
self.assertRaises(errors.MisconfigurationError, parse_raw, [])
parsing_hooks.return_value = tuple()
self.assertRaises(errors.MisconfigurationError, parse_raw, [])
def test_parse_raw_passes_add_spaces(self):
fake_parser1 = mock.Mock()
fake_parser1.should_parse = lambda x: True
parse_raw([])
fake_parser1.parse.called_with([None])
parse_raw([], add_spaces=True)
fake_parser1.parse.called_with([None, True])
class SentenceTest(unittest.TestCase):
def setUp(self):
from certbot_nginx.parser_obj import Sentence
self.sentence = Sentence(None)
def test_parse_bad_sentence_raises_error(self):
from certbot import errors
self.assertRaises(errors.MisconfigurationError, self.sentence.parse, 'lol')
self.assertRaises(errors.MisconfigurationError, self.sentence.parse, [[]])
self.assertRaises(errors.MisconfigurationError, self.sentence.parse, [5])
def test_parse_sentence_words_hides_spaces(self):
og_sentence = ['\r\n', 'hello', ' ', ' ', '\t\n ', 'lol', ' ', 'spaces']
self.sentence.parse(og_sentence)
self.assertEquals(self.sentence.words, ['hello', 'lol', 'spaces'])
self.assertEquals(self.sentence.dump(), ['hello', 'lol', 'spaces'])
self.assertEquals(self.sentence.dump(True), og_sentence)
def test_parse_sentence_with_add_spaces(self):
self.sentence.parse(['hi', 'there'], add_spaces=True)
self.assertEquals(self.sentence.dump(True), ['hi', ' ', 'there'])
self.sentence.parse(['one', ' ', 'space', 'none'], add_spaces=True)
self.assertEquals(self.sentence.dump(True), ['one', ' ', 'space', ' ', 'none'])
def test_iterate(self):
expected = [['1', '2', '3']]
self.sentence.parse(['1', ' ', '2', ' ', '3'])
for i, sentence in enumerate(self.sentence.iterate()):
self.assertEquals(sentence.dump(), expected[i])
def test_set_tabs(self):
self.sentence.parse(['tabs', 'pls'], add_spaces=True)
self.sentence.set_tabs()
self.assertEquals(self.sentence.dump(True)[0], '\n ')
self.sentence.parse(['tabs', 'pls'], add_spaces=True)
def test_get_tabs(self):
self.sentence.parse(['no', 'tabs'])
self.assertEquals(self.sentence.get_tabs(), '')
self.sentence.parse(['\n \n ', 'tabs'])
self.assertEquals(self.sentence.get_tabs(), ' ')
self.sentence.parse(['\n\t ', 'tabs'])
self.assertEquals(self.sentence.get_tabs(), '\t ')
self.sentence.parse(['\n\t \n', 'tabs'])
self.assertEquals(self.sentence.get_tabs(), '')
class BlockTest(unittest.TestCase):
def setUp(self):
from certbot_nginx.parser_obj import Block
self.bloc = Block(None)
self.name = ['server', 'name']
self.contents = [['thing', '1'], ['thing', '2'], ['another', 'one']]
self.bloc.parse([self.name, self.contents])
def test_iterate(self):
# Iterates itself normally
self.assertEquals(self.bloc, next(self.bloc.iterate()))
# Iterates contents while expanded
expected = [self.bloc.dump()] + self.contents
for i, elem in enumerate(self.bloc.iterate(expanded=True)):
self.assertEquals(expected[i], elem.dump())
def test_iterate_match(self):
# can match on contents while expanded
from certbot_nginx.parser_obj import Block, Sentence
expected = [['thing', '1'], ['thing', '2']]
for i, elem in enumerate(self.bloc.iterate(expanded=True,
match=lambda x: isinstance(x, Sentence) and 'thing' in x.words)):
self.assertEquals(expected[i], elem.dump())
# can match on self
self.assertEquals(self.bloc, next(self.bloc.iterate(
expanded=True,
match=lambda x: isinstance(x, Block) and 'server' in x.names)))
def test_parse_with_added_spaces(self):
import copy
self.bloc.parse([copy.copy(self.name), self.contents], add_spaces=True)
self.assertEquals(self.bloc.dump(), [self.name, self.contents])
self.assertEquals(self.bloc.dump(True), [
['server', ' ', 'name', ' '],
[['thing', ' ', '1'],
['thing', ' ', '2'],
['another', ' ', 'one']]])
def test_bad_parse_raises_error(self):
from certbot import errors
self.assertRaises(errors.MisconfigurationError, self.bloc.parse, [[[]], [[]]])
self.assertRaises(errors.MisconfigurationError, self.bloc.parse, ['lol'])
self.assertRaises(errors.MisconfigurationError, self.bloc.parse, ['fake', 'news'])
def test_set_tabs(self):
self.bloc.set_tabs()
self.assertEquals(self.bloc.names.dump(True)[0], '\n ')
for elem in self.bloc.contents.dump(True)[:-1]:
self.assertEquals(elem[0], '\n ')
self.assertEquals(self.bloc.contents.dump(True)[-1][0], '\n')
def test_get_tabs(self):
self.bloc.parse([[' \n \t', 'lol'], []])
self.assertEquals(self.bloc.get_tabs(), ' \t')
class StatementsTest(unittest.TestCase):
def setUp(self):
from certbot_nginx.parser_obj import Statements
self.statements = Statements(None)
self.raw = [
['sentence', 'one'],
['sentence', 'two'],
['and', 'another']
]
self.raw_spaced = [
['\n ', 'sentence', ' ', 'one'],
['\n ', 'sentence', ' ', 'two'],
['\n ', 'and', ' ', 'another'],
'\n\n'
]
def test_set_tabs(self):
self.statements.parse(self.raw)
self.statements.set_tabs()
for statement in self.statements.iterate():
self.assertEquals(statement.dump(True)[0], '\n ')
def test_set_tabs_with_parent(self):
# Trailing whitespace should inherit from parent tabbing.
self.statements.parse(self.raw)
self.statements.parent = mock.Mock()
self.statements.parent.get_tabs.return_value = '\t\t'
self.statements.set_tabs()
for statement in self.statements.iterate():
self.assertEquals(statement.dump(True)[0], '\n ')
self.assertEquals(self.statements.dump(True)[-1], '\n\t\t')
def test_get_tabs(self):
self.raw[0].insert(0, '\n \n \t')
self.statements.parse(self.raw)
self.assertEquals(self.statements.get_tabs(), ' \t')
self.statements.parse([])
self.assertEquals(self.statements.get_tabs(), '')
def test_parse_with_added_spaces(self):
self.statements.parse(self.raw, add_spaces=True)
self.assertEquals(self.statements.dump(True)[0], ['sentence', ' ', 'one'])
def test_parse_bad_list_raises_error(self):
from certbot import errors
self.assertRaises(errors.MisconfigurationError, self.statements.parse, 'lol not a list')
def test_parse_hides_trailing_whitespace(self):
self.statements.parse(self.raw + ['\n\n '])
self.assertTrue(isinstance(self.statements.dump()[-1], list))
self.assertTrue(self.statements.dump(True)[-1].isspace())
self.assertEquals(self.statements.dump(True)[-1], '\n\n ')
def test_iterate(self):
self.statements.parse(self.raw)
expected = [['sentence', 'one'], ['sentence', 'two']]
for i, elem in enumerate(self.statements.iterate(match=lambda x: 'sentence' in x)):
self.assertEquals(expected[i], elem.dump())
if __name__ == "__main__":
unittest.main() # pragma: no cover

View File

@@ -25,6 +25,7 @@ certbot_test_no_force_renew () {
omit_patterns="*/*.egg-info/*,*/dns_common*,*/setup.py,*/test_*,*/tests/*"
omit_patterns="$omit_patterns,*_test.py,*_test_*,certbot-apache/*"
omit_patterns="$omit_patterns,certbot-compatibility-test/*,certbot-dns*/"
omit_patterns="$omit_patterns,certbot-nginx/certbot_nginx/parser_obj.py"
coverage run \
--append \
--source $sources \