diff options
| author | Ulf Magnusson <ulfalizer@gmail.com> | 2017-09-29 03:49:35 +0200 |
|---|---|---|
| committer | Ulf Magnusson <ulfalizer@gmail.com> | 2017-09-29 06:15:01 +0200 |
| commit | 5fea36ffd71d8f908133d091860ef4b139cc7e81 (patch) | |
| tree | 3b78d0c6a8724ed741fbeec535b803f33bd07a6b /kconfiglib.py | |
| parent | 3d27a14dbcb057eb99491babe1238bde6cfe66e6 (diff) | |
Reorganize whitespace stripping in _tokenize()
By consistently stripping trailing whitespace instead of stripping
initial whitespace in the string/operator case, we do less redundant
work and handle the '\n's at the end of lines better. Shaves a few % off
the _tokenize() runtime in cProfile and line_profiler.
Diffstat (limited to 'kconfiglib.py')
| -rw-r--r-- | kconfiglib.py | 37 |
1 files changed, 25 insertions, 12 deletions
diff --git a/kconfiglib.py b/kconfiglib.py index df7591f..274f0fd 100644 --- a/kconfiglib.py +++ b/kconfiglib.py @@ -1350,11 +1350,16 @@ class Config(object): else: # Not an identifier/keyword - # Find the next non-whitespace character - while i < len(s) and s[i].isspace(): - i += 1 - if i == len(s): - break + # Note: _id_keyword_match and _initial_token_match strip + # trailing whitespace, making it safe to assume s[i] is the + # start of a token here. We manually strip trailing whitespace + # below as well. + # + # An old version stripped whitespace in this spot instead, but + # that leads to some redundancy and would cause + # _id_keyword_match to be tried against just "\n" fairly often + # (because file.readlines() keeps newlines). + c = s[i] i += 1 @@ -1436,7 +1441,13 @@ class Config(object): else: token = _T_GREATER - else: continue # Invalid characters are ignored + else: + # Invalid characters are ignored + continue + + # Skip trailing whitespace + while i < len(s) and s[i].isspace(): + i += 1 tokens.append(token) @@ -3627,14 +3638,16 @@ _STRING_LEX = frozenset(( # command_chars characters. # This is why things like "----help--" are accepted. # -# As an optimization, this regex also fails to match for lines containing just -# a comment, and also matches trailing whitespace so it can be jumped over -# immediately. +# In addition to the initial token, the regex also matches trailing whitespace +# so that we can jump straight to the next token (or to the end of the line if +# there's just a single token). +# +# As an optimization, this regex fails to match for lines containing just a +# comment. _initial_token_re_match = re.compile(r"[^\w#]*(\w+)\s*").match -# Matches an identifier/keyword optionally preceded by whitespace. Also eats -# trailing whitespace as an optimization. -_id_keyword_re_match = re.compile(r"\s*([\w./-]+)\s*").match +# Matches an identifier/keyword, also eating trailing whitespace +_id_keyword_re_match = re.compile(r"([\w./-]+)\s*").match # Regular expression for finding $-references to symbols in strings _sym_ref_re_search = re.compile(r"\$[A-Za-z0-9_]+").search |
