From b7b5474954e3af31fd4da222a8d84795c917c136 Mon Sep 17 00:00:00 2001
From: Ulf Magnusson <ulfalizer@gmail.com>
Date: Tue, 16 Jun 2015 16:10:55 +0200
Subject: Clean up and optimize tokenization some more.

 - Get rid of the weird strip/reindex dance and keep 's' intact
   throughout. This also fixes _tokenization_error() output, as it would
   previously report the stripped string.

 - Strip trailing whitespace in the regexes. This makes the stripping
   loop very cheap.

 - Shorten a comment that might've been more confusing than helpful.
---
 kconfiglib.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'kconfiglib.py')

diff --git a/kconfiglib.py b/kconfiglib.py
index 2296211..8379182 100644
--- a/kconfiglib.py
+++ b/kconfiglib.py
@@ -696,14 +696,14 @@ class Config(object):
                     append(sym)
 
             else:
-                # This restrips whitespace that could have been stripped in the
-                # regex above, but it's worth it since identifiers/keywords are
-                # more common
-                s = s[i:].lstrip()
-                if s == "":
+                # Not an identifier/keyword
+
+                while i < strlen and s[i].isspace():
+                    i += 1
+                if i == strlen:
                     break
-                c = s[0]
-                i = 1
+                c = s[i]
+                i += 1
 
                 # String literal (constant symbol)
                 if c == '"' or c == "'":
@@ -3446,11 +3446,13 @@ BOOL_STR = {False: "false", True: "true"}
 STRING_LEX = frozenset((T_BOOL, T_TRISTATE, T_INT, T_HEX, T_STRING, T_CHOICE,
                         T_PROMPT, T_MENU, T_COMMENT, T_SOURCE, T_MAINMENU))
 
-# Matches the initial token on a line; see _tokenize().
-_initial_token_re_match = re.compile(r"[^\w]*(\w+)").match
+# Matches the initial token on a line; see _tokenize(). Also eats trailing
+# whitespace as an optimization.
+_initial_token_re_match = re.compile(r"[^\w]*(\w+)\s*").match
 
-# Matches an identifier/keyword optionally preceded by whitespace
-_id_keyword_re_match = re.compile(r"\s*([\w./-]+)").match
+# Matches an identifier/keyword optionally preceded by whitespace. Also eats
+# trailing whitespace as an optimization.
+_id_keyword_re_match = re.compile(r"\s*([\w./-]+)\s*").match
 
 # Regular expressions for parsing .config files
 _set_re_match = re.compile(r"CONFIG_(\w+)=(.*)").match
-- 
cgit v1.2.3