From 86a3c2da10491f7cec96a9c2720a5cd92b8aad67 Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Mon, 2 Jul 2018 04:23:50 +0200 Subject: Refactor tokenization a bit Have _tokenize() take the string to tokenize and return a list of tokens, and handle all the token list management outside. Simplifies the internal logic a bit. Likely faster too. --- kconfiglib.py | 38 ++++++++++++++++++-------------------- testsuite.py | 14 +++++++------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/kconfiglib.py b/kconfiglib.py index def3341..6cec108 100644 --- a/kconfiglib.py +++ b/kconfiglib.py @@ -1316,12 +1316,12 @@ class Kconfig(object): self._filename = None - self._line = "if " + s - self._tokenize() - # Remove the "if " to avoid giving confusing error messages + # Don't include the "if " from below to avoid giving confusing error + # messages self._line = s # Remove the _T_IF token - del self._tokens[0] + self._tokens = self._tokenize("if " + s)[1:] + self._tokens_i = -1 return expr_value(self._parse_expr(True)) # transform_m @@ -1523,7 +1523,9 @@ class Kconfig(object): self._line = self._line[:-2] + self._file.readline() self._linenr += 1 - self._tokenize() + self._tokens = self._tokenize(self._line) + self._tokens_i = -1 # Token index (minus one) + return True @@ -1569,24 +1571,18 @@ class Kconfig(object): return sym - def _tokenize(self): - # Parses Kconfig._line, putting the tokens in Kconfig._tokens. - # Registers any new symbols encountered with _lookup(_const)_sym(). + def _tokenize(self, s): + # Parses 's', returning a None-terminated list of tokens. Registers any + # new symbols encountered with _lookup(_const)_sym(). # # Tries to be reasonably speedy by processing chunks of text via # regexes and string operations where possible. This is the biggest # hotspot during parsing. - s = self._line - - # Token index (minus one). Set for later -- not further updated here. - self._tokens_i = -1 - # Initial token on the line command_match = _command_re_match(s) if not command_match: - self._tokens = (None,) - return + return (None,) # Tricky implementation detail: While parsing a token, 'token' refers # to the previous token. See _STRING_LEX for why this is needed. @@ -1594,7 +1590,7 @@ class Kconfig(object): if not token: self._parse_error("expected keyword as first token") - self._tokens = [token] + tokens = [token] # The current index in the string being tokenized i = command_match.end() @@ -1699,7 +1695,7 @@ class Kconfig(object): # refer to a constant symbol named "FOO". token = val \ if token in _STRING_LEX or \ - self._tokens[0] == _T_OPTION else \ + tokens[0] == _T_OPTION else \ self._lookup_const_sym(val) elif c == "&": @@ -1758,11 +1754,13 @@ class Kconfig(object): while i < len(s) and s[i].isspace(): i += 1 - self._tokens.append(token) + tokens.append(token) - # None-terminating token streams makes the token fetching functions + # None-terminating the token list makes the token fetching functions # simpler/faster - self._tokens.append(None) + tokens.append(None) + + return tokens def _next_token(self): self._tokens_i += 1 diff --git a/testsuite.py b/testsuite.py index 64b5bd9..536a98f 100644 --- a/testsuite.py +++ b/testsuite.py @@ -190,15 +190,15 @@ def run_selftests(): # Dummy empty configuration just to get a Kconfig object c = Kconfig("Kconfiglib/tests/empty") - def verify_string_lex(s, res): + def verify_string_lex(s, expected): """ Verifies that a constant symbol with the name 'res' is produced from lexing 's' """ - c.eval_string(s) - verify(c._tokens[0].name == res, + res = c._tokenize("if " + s)[1].name + verify(res == expected, "expected <{}> to produced the constant symbol <{}>, " - 'produced <{}>'.format(s[1:-1], c._tokens[0].name, res)) + 'produced <{}>'.format(s[1:-1], expected, res)) verify_string_lex(r""" "" """, "") verify_string_lex(r""" '' """, "") @@ -1046,9 +1046,9 @@ g def verify_split(to_split, op, operand_strs): # The same hackage as in Kconfig.eval_string() - c._line = "if " + to_split - c._tokenize() - del c._tokens[0] + c._tokens = c._tokenize("if " + to_split)[1:] + c._tokens_i = -1 + operands = split_expr(c._parse_expr(False), op) verify(len(operands) == len(operand_strs), -- cgit v1.2.3