Clean up and optimize tokenization some more.

- Get rid of the weird strip/reindex dance and keep 's' intact throughout. This also fixes _tokenization_error() output, as it would previously report the stripped string. - Strip trailing whitespace in the regexes. This makes the stripping loop very cheap. - Shorten a comment that might've been more confusing than helpful.
author: Ulf Magnusson <ulfalizer@gmail.com> 2015-06-16 16:10:55 +0200
committer: Ulf Magnusson <ulfalizer@gmail.com> 2015-06-16 17:29:03 +0200
commit: b7b5474954e3af31fd4da222a8d84795c917c136 (patch)
tree: 7f14184f4ef13698288d81eeb5a1f00663c4f021
parent: eedfb571d08347c5c6bf35afa87bc7538686455e (diff)
1 files changed, 13 insertions, 11 deletions
diff --git a/kconfiglib.py b/kconfiglib.py
index 2296211..8379182 100644
--- a/kconfiglib.py
+++ b/kconfiglib.py
@@ -696,14 +696,14 @@ class Config(object):
                     append(sym)
 
             else:
-                # This restrips whitespace that could have been stripped in the
-                # regex above, but it's worth it since identifiers/keywords are
-                # more common
-                s = s[i:].lstrip()
-                if s == "":
+                # Not an identifier/keyword
+
+                while i < strlen and s[i].isspace():
+                    i += 1
+                if i == strlen:
                     break
-                c = s[0]
-                i = 1
+                c = s[i]
+                i += 1
 
                 # String literal (constant symbol)
                 if c == '"' or c == "'":
@@ -3446,11 +3446,13 @@ BOOL_STR = {False: "false", True: "true"}
 STRING_LEX = frozenset((T_BOOL, T_TRISTATE, T_INT, T_HEX, T_STRING, T_CHOICE,
                         T_PROMPT, T_MENU, T_COMMENT, T_SOURCE, T_MAINMENU))
 
-# Matches the initial token on a line; see _tokenize().
-_initial_token_re_match = re.compile(r"[^\w]*(\w+)").match
+# Matches the initial token on a line; see _tokenize(). Also eats trailing
+# whitespace as an optimization.
+_initial_token_re_match = re.compile(r"[^\w]*(\w+)\s*").match
 
-# Matches an identifier/keyword optionally preceded by whitespace
-_id_keyword_re_match = re.compile(r"\s*([\w./-]+)").match
+# Matches an identifier/keyword optionally preceded by whitespace. Also eats
+# trailing whitespace as an optimization.
+_id_keyword_re_match = re.compile(r"\s*([\w./-]+)\s*").match
 
 # Regular expressions for parsing .config files
 _set_re_match = re.compile(r"CONFIG_(\w+)=(.*)").match
author	Ulf Magnusson <ulfalizer@gmail.com>	2015-06-16 16:10:55 +0200
committer	Ulf Magnusson <ulfalizer@gmail.com>	2015-06-16 17:29:03 +0200
commit	b7b5474954e3af31fd4da222a8d84795c917c136 (patch)
tree	7f14184f4ef13698288d81eeb5a1f00663c4f021
parent	eedfb571d08347c5c6bf35afa87bc7538686455e (diff)