diff options
| author | Ulf Magnusson <ulfalizer@gmail.com> | 2015-06-04 02:26:17 +0200 |
|---|---|---|
| committer | Ulf Magnusson <ulfalizer@gmail.com> | 2015-06-04 03:07:46 +0200 |
| commit | 8e77ba8bd7c13aa0b39fb991dca7175e793f17de (patch) | |
| tree | 1e96b6b950c269754dac0eda7d978d902a00bfaf | |
| parent | 23dc02085e5ad167cff76c4a078a9d300feadf9b (diff) | |
Small regex-related optimization.
Prefetch the SRE_Pattern.match() methods. Shaves a few % off
Config._tokenize().
Do the same elsewhere for consistency, though I'm not sure if it's a
meaningful win.
| -rw-r--r-- | kconfiglib.py | 40 |
1 files changed, 19 insertions, 21 deletions
diff --git a/kconfiglib.py b/kconfiglib.py index 5cefdd2..2a74d92 100644 --- a/kconfiglib.py +++ b/kconfiglib.py @@ -252,11 +252,9 @@ class Config(object): self.config_header = None def is_header_line(line): - return line.startswith("#") and \ - not unset_re.match(line) + return line.startswith("#") and not unset_re_match(line) first_line = line_feeder.get_next() - if first_line is None: return @@ -295,9 +293,9 @@ class Config(object): line = line.strip() - set_re_match = set_re.match(line) - if set_re_match: - name, val = set_re_match.groups() + set_match = set_re_match(line) + if set_match: + name, val = set_match.groups() # The unescaping producedure below should be safe since " can # only appear as \" inside the string val = _strip_quotes(val, line, filename, linenr)\ @@ -328,9 +326,9 @@ class Config(object): linenr) else: - unset_re_match = unset_re.match(line) - if unset_re_match: - name = unset_re_match.group(1) + unset_match = unset_re_match(line) + if unset_match: + name = unset_match.group(1) if name in self.syms: sym = self.syms[name] @@ -632,7 +630,7 @@ class Config(object): # characters. # This is why things like "----help--" are accepted. - initial_token_match = initial_token_re.match(s) + initial_token_match = initial_token_re_match(s) if initial_token_match is None: return _Feed([]) # The current index in the string being tokenized @@ -658,7 +656,7 @@ class Config(object): while i < strlen: # Test for an identifier/keyword preceded by whitespace first; this # is the most common case. - id_keyword_match = id_keyword_re.match(s, i) + id_keyword_match = id_keyword_re_match(s, i) if id_keyword_match: # We have an identifier or keyword. The above also stripped any # whitespace for us. @@ -1637,17 +1635,17 @@ error, and you should email ulfalizer a.t Google's email service.""" empty string for undefined symbols.""" while 1: - sym_ref_re_match = sym_ref_re.search(s) - if sym_ref_re_match is None: + sym_ref_match = sym_ref_re_search(s) + if sym_ref_match is None: return s - sym_name = sym_ref_re_match.group(0)[1:] + sym_name = sym_ref_match.group(0)[1:] sym = self.syms.get(sym_name) expansion = "" if sym is None else sym.get_value() - s = s[:sym_ref_re_match.start()] + \ + s = s[:sym_ref_match.start()] + \ expansion + \ - s[sym_ref_re_match.end():] + s[sym_ref_match.end():] def _get_sym_or_choice_str(self, sc): """Symbols and choices have many properties in common, so we factor out @@ -2089,17 +2087,17 @@ string_lex = frozenset((T_BOOL, T_TRISTATE, T_INT, T_HEX, T_STRING, T_CHOICE, T_PROMPT, T_MENU, T_COMMENT, T_SOURCE, T_MAINMENU)) # Matches the initial token on a line; see _tokenize(). -initial_token_re = re.compile(r"[^\w]*(\w+)") +initial_token_re_match = re.compile(r"[^\w]*(\w+)").match # Matches an identifier/keyword optionally preceded by whitespace -id_keyword_re = re.compile(r"\s*([\w./-]+)") +id_keyword_re_match = re.compile(r"\s*([\w./-]+)").match # Regular expressions for parsing .config files -set_re = re.compile(r"CONFIG_(\w+)=(.*)") -unset_re = re.compile(r"# CONFIG_(\w+) is not set") +set_re_match = re.compile(r"CONFIG_(\w+)=(.*)").match +unset_re_match = re.compile(r"# CONFIG_(\w+) is not set").match # Regular expression for finding $-references to symbols in strings -sym_ref_re = re.compile(r"\$[A-Za-z0-9_]+") +sym_ref_re_search = re.compile(r"\$[A-Za-z0-9_]+").search # Integers representing symbol types UNKNOWN, BOOL, TRISTATE, STRING, HEX, INT = range(0, 6) |
