Refactor tokenization a bit

Have _tokenize() take the string to tokenize and return a list of tokens, and handle all the token list management outside. Simplifies the internal logic a bit. Likely faster too.
author: Ulf Magnusson <ulfalizer@gmail.com> 2018-07-02 04:23:50 +0200
committer: Ulf Magnusson <ulfalizer@gmail.com> 2018-07-02 04:36:17 +0200
commit: 86a3c2da10491f7cec96a9c2720a5cd92b8aad67 (patch)
tree: 1848909f81772f554292b96b6f06ce1877542e22
parent: db90cb85094a2cab19a524ad30b603e26cbb76fe (diff)
2 files changed, 25 insertions, 27 deletions
diff --git a/kconfiglib.py b/kconfiglib.py
index def3341..6cec108 100644
--- a/kconfiglib.py
+++ b/kconfiglib.py
@@ -1316,12 +1316,12 @@ class Kconfig(object):
 
         self._filename = None
 
-        self._line = "if " + s
-        self._tokenize()
-        # Remove the "if " to avoid giving confusing error messages
+        # Don't include the "if " from below to avoid giving confusing error
+        # messages
         self._line = s
         # Remove the _T_IF token
-        del self._tokens[0]
+        self._tokens = self._tokenize("if " + s)[1:]
+        self._tokens_i = -1
 
         return expr_value(self._parse_expr(True))  # transform_m
 
@@ -1523,7 +1523,9 @@ class Kconfig(object):
             self._line = self._line[:-2] + self._file.readline()
             self._linenr += 1
 
-        self._tokenize()
+        self._tokens = self._tokenize(self._line)
+        self._tokens_i = -1  # Token index (minus one)
+
         return True
 
 
@@ -1569,24 +1571,18 @@ class Kconfig(object):
 
         return sym
 
-    def _tokenize(self):
-        # Parses Kconfig._line, putting the tokens in Kconfig._tokens.
-        # Registers any new symbols encountered with _lookup(_const)_sym().
+    def _tokenize(self, s):
+        # Parses 's', returning a None-terminated list of tokens. Registers any
+        # new symbols encountered with _lookup(_const)_sym().
         #
         # Tries to be reasonably speedy by processing chunks of text via
         # regexes and string operations where possible. This is the biggest
         # hotspot during parsing.
 
-        s = self._line
-
-        # Token index (minus one). Set for later -- not further updated here.
-        self._tokens_i = -1
-
         # Initial token on the line
         command_match = _command_re_match(s)
         if not command_match:
-            self._tokens = (None,)
-            return
+            return (None,)
 
         # Tricky implementation detail: While parsing a token, 'token' refers
         # to the previous token. See _STRING_LEX for why this is needed.
@@ -1594,7 +1590,7 @@ class Kconfig(object):
         if not token:
             self._parse_error("expected keyword as first token")
 
-        self._tokens = [token]
+        tokens = [token]
         # The current index in the string being tokenized
         i = command_match.end()
 
@@ -1699,7 +1695,7 @@ class Kconfig(object):
                     # refer to a constant symbol named "FOO".
                     token = val \
                             if token in _STRING_LEX or \
-                                self._tokens[0] == _T_OPTION else \
+                                tokens[0] == _T_OPTION else \
                             self._lookup_const_sym(val)
 
                 elif c == "&":
@@ -1758,11 +1754,13 @@ class Kconfig(object):
                 while i < len(s) and s[i].isspace():
                     i += 1
 
-            self._tokens.append(token)
+            tokens.append(token)
 
-        # None-terminating token streams makes the token fetching functions
+        # None-terminating the token list makes the token fetching functions
         # simpler/faster
-        self._tokens.append(None)
+        tokens.append(None)
+
+        return tokens
 
     def _next_token(self):
         self._tokens_i += 1
diff --git a/testsuite.py b/testsuite.py
index 64b5bd9..536a98f 100644
--- a/testsuite.py
+++ b/testsuite.py
@@ -190,15 +190,15 @@ def run_selftests():
     # Dummy empty configuration just to get a Kconfig object
     c = Kconfig("Kconfiglib/tests/empty")
 
-    def verify_string_lex(s, res):
+    def verify_string_lex(s, expected):
         """
         Verifies that a constant symbol with the name 'res' is produced from
         lexing 's'
         """
-        c.eval_string(s)
-        verify(c._tokens[0].name == res,
+        res = c._tokenize("if " + s)[1].name
+        verify(res == expected,
                "expected <{}> to produced the constant symbol <{}>, "
-               'produced <{}>'.format(s[1:-1], c._tokens[0].name, res))
+               'produced <{}>'.format(s[1:-1], expected, res))
 
     verify_string_lex(r""" "" """, "")
     verify_string_lex(r""" '' """, "")
@@ -1046,9 +1046,9 @@ g
 
     def verify_split(to_split, op, operand_strs):
         # The same hackage as in Kconfig.eval_string()
-        c._line = "if " + to_split
-        c._tokenize()
-        del c._tokens[0]
+        c._tokens = c._tokenize("if " + to_split)[1:]
+        c._tokens_i = -1
+
         operands = split_expr(c._parse_expr(False), op)
 
         verify(len(operands) == len(operand_strs),
author	Ulf Magnusson <ulfalizer@gmail.com>	2018-07-02 04:23:50 +0200
committer	Ulf Magnusson <ulfalizer@gmail.com>	2018-07-02 04:36:17 +0200
commit	86a3c2da10491f7cec96a9c2720a5cd92b8aad67 (patch)
tree	1848909f81772f554292b96b6f06ce1877542e22
parent	db90cb85094a2cab19a524ad30b603e26cbb76fe (diff)