From 86a3c2da10491f7cec96a9c2720a5cd92b8aad67 Mon Sep 17 00:00:00 2001
From: Ulf Magnusson <ulfalizer@gmail.com>
Date: Mon, 2 Jul 2018 04:23:50 +0200
Subject: Refactor tokenization a bit

Have _tokenize() take the string to tokenize and return a list of
tokens, and handle all the token list management outside.

Simplifies the internal logic a bit. Likely faster too.
---
 kconfiglib.py | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

(limited to 'kconfiglib.py')

diff --git a/kconfiglib.py b/kconfiglib.py
index def3341..6cec108 100644
--- a/kconfiglib.py
+++ b/kconfiglib.py
@@ -1316,12 +1316,12 @@ class Kconfig(object):
 
         self._filename = None
 
-        self._line = "if " + s
-        self._tokenize()
-        # Remove the "if " to avoid giving confusing error messages
+        # Don't include the "if " from below to avoid giving confusing error
+        # messages
         self._line = s
         # Remove the _T_IF token
-        del self._tokens[0]
+        self._tokens = self._tokenize("if " + s)[1:]
+        self._tokens_i = -1
 
         return expr_value(self._parse_expr(True))  # transform_m
 
@@ -1523,7 +1523,9 @@ class Kconfig(object):
             self._line = self._line[:-2] + self._file.readline()
             self._linenr += 1
 
-        self._tokenize()
+        self._tokens = self._tokenize(self._line)
+        self._tokens_i = -1  # Token index (minus one)
+
         return True
 
 
@@ -1569,24 +1571,18 @@ class Kconfig(object):
 
         return sym
 
-    def _tokenize(self):
-        # Parses Kconfig._line, putting the tokens in Kconfig._tokens.
-        # Registers any new symbols encountered with _lookup(_const)_sym().
+    def _tokenize(self, s):
+        # Parses 's', returning a None-terminated list of tokens. Registers any
+        # new symbols encountered with _lookup(_const)_sym().
         #
         # Tries to be reasonably speedy by processing chunks of text via
         # regexes and string operations where possible. This is the biggest
         # hotspot during parsing.
 
-        s = self._line
-
-        # Token index (minus one). Set for later -- not further updated here.
-        self._tokens_i = -1
-
         # Initial token on the line
         command_match = _command_re_match(s)
         if not command_match:
-            self._tokens = (None,)
-            return
+            return (None,)
 
         # Tricky implementation detail: While parsing a token, 'token' refers
         # to the previous token. See _STRING_LEX for why this is needed.
@@ -1594,7 +1590,7 @@ class Kconfig(object):
         if not token:
             self._parse_error("expected keyword as first token")
 
-        self._tokens = [token]
+        tokens = [token]
         # The current index in the string being tokenized
         i = command_match.end()
 
@@ -1699,7 +1695,7 @@ class Kconfig(object):
                     # refer to a constant symbol named "FOO".
                     token = val \
                             if token in _STRING_LEX or \
-                                self._tokens[0] == _T_OPTION else \
+                                tokens[0] == _T_OPTION else \
                             self._lookup_const_sym(val)
 
                 elif c == "&":
@@ -1758,11 +1754,13 @@ class Kconfig(object):
                 while i < len(s) and s[i].isspace():
                     i += 1
 
-            self._tokens.append(token)
+            tokens.append(token)
 
-        # None-terminating token streams makes the token fetching functions
+        # None-terminating the token list makes the token fetching functions
         # simpler/faster
-        self._tokens.append(None)
+        tokens.append(None)
+
+        return tokens
 
     def _next_token(self):
         self._tokens_i += 1
-- 
cgit v1.2.3