From b7b5474954e3af31fd4da222a8d84795c917c136 Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Tue, 16 Jun 2015 16:10:55 +0200 Subject: Clean up and optimize tokenization some more. - Get rid of the weird strip/reindex dance and keep 's' intact throughout. This also fixes _tokenization_error() output, as it would previously report the stripped string. - Strip trailing whitespace in the regexes. This makes the stripping loop very cheap. - Shorten a comment that might've been more confusing than helpful. --- kconfiglib.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'kconfiglib.py') diff --git a/kconfiglib.py b/kconfiglib.py index 2296211..8379182 100644 --- a/kconfiglib.py +++ b/kconfiglib.py @@ -696,14 +696,14 @@ class Config(object): append(sym) else: - # This restrips whitespace that could have been stripped in the - # regex above, but it's worth it since identifiers/keywords are - # more common - s = s[i:].lstrip() - if s == "": + # Not an identifier/keyword + + while i < strlen and s[i].isspace(): + i += 1 + if i == strlen: break - c = s[0] - i = 1 + c = s[i] + i += 1 # String literal (constant symbol) if c == '"' or c == "'": @@ -3446,11 +3446,13 @@ BOOL_STR = {False: "false", True: "true"} STRING_LEX = frozenset((T_BOOL, T_TRISTATE, T_INT, T_HEX, T_STRING, T_CHOICE, T_PROMPT, T_MENU, T_COMMENT, T_SOURCE, T_MAINMENU)) -# Matches the initial token on a line; see _tokenize(). -_initial_token_re_match = re.compile(r"[^\w]*(\w+)").match +# Matches the initial token on a line; see _tokenize(). Also eats trailing +# whitespace as an optimization. +_initial_token_re_match = re.compile(r"[^\w]*(\w+)\s*").match -# Matches an identifier/keyword optionally preceded by whitespace -_id_keyword_re_match = re.compile(r"\s*([\w./-]+)").match +# Matches an identifier/keyword optionally preceded by whitespace. Also eats +# trailing whitespace as an optimization. +_id_keyword_re_match = re.compile(r"\s*([\w./-]+)\s*").match # Regular expressions for parsing .config files _set_re_match = re.compile(r"CONFIG_(\w+)=(.*)").match -- cgit v1.2.3