diff options
| author | Ulf Magnusson <ulfalizer@gmail.com> | 2015-06-16 16:10:55 +0200 |
|---|---|---|
| committer | Ulf Magnusson <ulfalizer@gmail.com> | 2015-06-16 17:29:03 +0200 |
| commit | b7b5474954e3af31fd4da222a8d84795c917c136 (patch) | |
| tree | 7f14184f4ef13698288d81eeb5a1f00663c4f021 | |
| parent | eedfb571d08347c5c6bf35afa87bc7538686455e (diff) | |
Clean up and optimize tokenization some more.
- Get rid of the weird strip/reindex dance and keep 's' intact
throughout. This also fixes _tokenization_error() output, as it would
previously report the stripped string.
- Strip trailing whitespace in the regexes. This makes the stripping
loop very cheap.
- Shorten a comment that might've been more confusing than helpful.
| -rw-r--r-- | kconfiglib.py | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/kconfiglib.py b/kconfiglib.py index 2296211..8379182 100644 --- a/kconfiglib.py +++ b/kconfiglib.py @@ -696,14 +696,14 @@ class Config(object): append(sym) else: - # This restrips whitespace that could have been stripped in the - # regex above, but it's worth it since identifiers/keywords are - # more common - s = s[i:].lstrip() - if s == "": + # Not an identifier/keyword + + while i < strlen and s[i].isspace(): + i += 1 + if i == strlen: break - c = s[0] - i = 1 + c = s[i] + i += 1 # String literal (constant symbol) if c == '"' or c == "'": @@ -3446,11 +3446,13 @@ BOOL_STR = {False: "false", True: "true"} STRING_LEX = frozenset((T_BOOL, T_TRISTATE, T_INT, T_HEX, T_STRING, T_CHOICE, T_PROMPT, T_MENU, T_COMMENT, T_SOURCE, T_MAINMENU)) -# Matches the initial token on a line; see _tokenize(). -_initial_token_re_match = re.compile(r"[^\w]*(\w+)").match +# Matches the initial token on a line; see _tokenize(). Also eats trailing +# whitespace as an optimization. +_initial_token_re_match = re.compile(r"[^\w]*(\w+)\s*").match -# Matches an identifier/keyword optionally preceded by whitespace -_id_keyword_re_match = re.compile(r"\s*([\w./-]+)").match +# Matches an identifier/keyword optionally preceded by whitespace. Also eats +# trailing whitespace as an optimization. +_id_keyword_re_match = re.compile(r"\s*([\w./-]+)\s*").match # Regular expressions for parsing .config files _set_re_match = re.compile(r"CONFIG_(\w+)=(.*)").match |
