Arrange _tokenize() cases by frequency.

Now arranged by the frequency different cases are encountered while parsing the x86 configuration (likely the same as for all archs). Gives a small speed-up. Also get rid of an unnecessary sym_chars membership test and add/fix some comments.
author: Ulf Magnusson <ulfalizer@gmail.com> 2012-12-08 02:04:56 +0100
committer: Ulf Magnusson <ulfalizer@gmail.com> 2012-12-08 05:28:50 +0100
commit: bce7ded2528b90eef91e4ac872332d1bc1bd55b5 (patch)
tree: 62893e603899b2f2669294ffb114c8831fede3b1 /kconfiglib.py
parent: 3006be3e3a147d3f6efaf2097adde950efd62dc2 (diff)
1 files changed, 76 insertions, 71 deletions
diff --git a/kconfiglib.py b/kconfiglib.py
index 0a3f364..0c812f9 100644
--- a/kconfiglib.py
+++ b/kconfiglib.py
@@ -609,6 +609,7 @@ class Config():
         previous = None
 
         strlen = len(s)
+        # This is a hotspot during parsing, and this speeds things up a bit
         append = tokens.append
 
         # The current index in the string being tokenized
@@ -647,31 +648,69 @@ class Config():
         while i < strlen:
             c = s[i]
 
+            # Arranged by the frequency the cases are encountered, which gives
+            # a small speed-up
+
             if c.isspace():
                 i += 1
                 continue
 
-            elif c == "=":
-                append(T_EQUAL)
+            if c in sym_chars:
+                name_start = i
+
+                # Locate the end of the symbol/keyword
                 i += 1
+                while i < strlen and s[i] in sym_chars:
+                    i += 1
 
-            elif c == "!":
-                if i + 1 >= strlen:
-                    _tokenization_error(s, strlen, filename, linenr)
-                if s[i + 1] == "=":
-                    append(T_UNEQUAL)
-                    i += 2
+                name = s[name_start:i]
+
+                keyword = keywords.get(name)
+
+                if keyword is not None:
+                    append(keyword)
+                # What would ordinarily be considered a name is treated as a
+                # string after certain tokens.
+                elif previous in string_lex:
+                    append(name)
                 else:
-                    append(T_NOT)
-                    i += 1
+                    # We're dealing with a symbol. _sym_lookup() will take care
+                    # of allocating a new Symbol instance if it's the first
+                    # time we see it.
+                    sym = self._sym_lookup(name, not for_eval)
 
-            elif c == "(":
-                append(T_OPEN_PAREN)
-                i += 1
+                    if previous == T_CONFIG or previous == T_MENUCONFIG:
+                        # If the previous token is T_(MENU)CONFIG
+                        # ("(menu)config"), we're tokenizing the first line of
+                        # a symbol definition, and should remember this as a
+                        # location where the symbol is defined.
+                        sym.def_locations.append((filename, linenr))
+                    else:
+                        # Otherwise, it's a reference to the symbol
+                        sym.ref_locations.append((filename, linenr))
 
-            elif c == ")":
-                append(T_CLOSE_PAREN)
+                    append(sym)
+
+            elif c == '"' or c == "'":
+                quote = c
+                value = ""
+                i += 1
+                while True:
+                    if i >= strlen:
+                        _tokenization_error(s, strlen, filename, linenr)
+                    c = s[i]
+                    if c == quote:
+                        break
+                    elif c == "\\":
+                        if i + 1 >= strlen:
+                            _tokenization_error(s, strlen, filename, linenr)
+                        value += s[i + 1]
+                        i += 2
+                    else:
+                        value += c
+                        i += 1
                 i += 1
+                append(value)
 
             elif c == "&":
                 if i + 1 >= strlen:
@@ -695,71 +734,36 @@ class Config():
                 append(T_OR)
                 i += 2
 
-            elif c == '"' or c == "'":
-                quote = c
-                value = ""
+            elif c == "!":
+                if i + 1 >= strlen:
+                    _tokenization_error(s, strlen, filename, linenr)
+                if s[i + 1] == "=":
+                    append(T_UNEQUAL)
+                    i += 2
+                else:
+                    append(T_NOT)
+                    i += 1
+
+            elif c == "=":
+                append(T_EQUAL)
                 i += 1
-                while True:
-                    if i >= strlen:
-                        _tokenization_error(s, strlen, filename, linenr)
-                    c = s[i]
-                    if c == quote:
-                        break
-                    elif c == "\\":
-                        if i + 1 >= strlen:
-                            _tokenization_error(s, strlen, filename, linenr)
-                        value += s[i + 1]
-                        i += 2
-                    else:
-                        value += c
-                        i += 1
+
+            elif c == "(":
+                append(T_OPEN_PAREN)
+                i += 1
+
+            elif c == ")":
+                append(T_CLOSE_PAREN)
                 i += 1
-                append(value)
 
             elif c == "#":
                 break
 
-            elif c not in sym_chars:
+            else:
                 # Invalid characters are ignored
                 i += 1
                 continue
 
-            else: # Symbol or keyword
-                name_start = i
-
-                # Locate the end of the symbol/keyword
-                i += 1
-                while i < strlen and s[i] in sym_chars:
-                    i += 1
-
-                name = s[name_start:i]
-
-                keyword = keywords.get(name)
-
-                if keyword is not None:
-                    append(keyword)
-                # What would ordinarily be considered a name is treated as a
-                # string after certain tokens.
-                elif previous in string_lex:
-                    append(name)
-                else:
-                    # We're dealing with a symbol. _sym_lookup() will take care
-                    # of allocating a new Symbol instance if it's the first
-                    # time we see it.
-                    sym = self._sym_lookup(name, not for_eval)
-
-                    if previous in (T_CONFIG, T_MENUCONFIG):
-                        # If the previous token is T_CONFIG ("config"), we're
-                        # tokenizing the first line of a symbol definition, and
-                        # should remember this as a location where the symbol
-                        # is defined.
-                        sym.def_locations.append((filename, linenr))
-                    else:
-                        # Otherwise, it's a reference to the symbol
-                        sym.ref_locations.append((filename, linenr))
-
-                    append(sym)
-
             previous = tokens[-1]
 
         return _Feed(tokens)
@@ -860,7 +864,8 @@ class Config():
            isinstance(sym_or_string, Symbol):
             self.parse_expr_cur_sym_or_choice.referenced_syms.add(sym_or_string)
 
-        if feed.peek_next() not in (T_EQUAL, T_UNEQUAL):
+        next_token = feed.peek_next()
+        if next_token != T_EQUAL and next_token != T_UNEQUAL:
             if self.parse_expr_transform_m and (sym_or_string is self.m or
                                                 sym_or_string == "m"):
                 return (AND, ["m", self._sym_lookup("MODULES")])
author	Ulf Magnusson <ulfalizer@gmail.com>	2012-12-08 02:04:56 +0100
committer	Ulf Magnusson <ulfalizer@gmail.com>	2012-12-08 05:28:50 +0100
commit	bce7ded2528b90eef91e4ac872332d1bc1bd55b5 (patch)
tree	62893e603899b2f2669294ffb114c8831fede3b1 /kconfiglib.py
parent	3006be3e3a147d3f6efaf2097adde950efd62dc2 (diff)