summaryrefslogtreecommitdiff
path: root/kconfiglib.py
diff options
context:
space:
mode:
authorUlf Magnusson <ulfalizer@gmail.com>2012-11-08 14:34:25 +0100
committerUlf Magnusson <ulfalizer@gmail.com>2012-11-08 14:34:25 +0100
commitb9409a7ca140f5f677b46293f0c8db52b14280d9 (patch)
tree95c1b7b5005b2c33adfc4f468a48c97990194582 /kconfiglib.py
parent00c10acf00140f5f14324c7ebbc316b10d0e4688 (diff)
Skip invalid characters when tokenizing.
Emulate zconf.l w.r.t. invalid characters at different locations within a line. This eliminates the need for special hacks to handle -*help-* "tokens". The Linux 3.7.0 configuration files now parse without errors.
Diffstat (limited to 'kconfiglib.py')
-rw-r--r--kconfiglib.py71
1 files changed, 57 insertions, 14 deletions
diff --git a/kconfiglib.py b/kconfiglib.py
index db8feb4..ee81c50 100644
--- a/kconfiglib.py
+++ b/kconfiglib.py
@@ -812,7 +812,7 @@ class Config():
implementation and by kconfiglib, so that MODULES needs to be enabled
for the expression to be true. Pass True here if you want that to
happen; otherwise, pass False."""
- return self._eval_expr(self._parse_expr(self._tokenize(s, False),
+ return self._eval_expr(self._parse_expr(self._tokenize(s, True),
None,
s,
transform_m = transform_m))
@@ -899,7 +899,7 @@ class Config():
def _tokenize(self,
s,
- add_sym_if_not_exists = True,
+ for_eval = False,
filename = None,
linenr = None):
"""Returns a _Feed instance containing tokens derived from the string
@@ -908,9 +908,9 @@ class Config():
(I experimented with a regular expression implementation, but it came
out 5% _slower_ and wouldn't have been as flexible.)
- add_sym_if_not_exists -- False when we do not want to register new
- symbols, such as when called from
- Config.eval()."""
+ for_eval -- True when parsing an expression for a call to
+ Config.eval(), in which case we should not treat the first
+ token specially nor register new symbols."""
tokens = []
previous = None
@@ -920,6 +920,36 @@ class Config():
# The current index in the string being tokenized
i = 0
+ if not for_eval:
+ # The initial word on a line is parsed specially. Let
+ # command_chars = [A-Za-z0-9_]. Then
+ # - leading non-command_chars characters on the line are ignored, and
+ # - the first token consists the following one or more command_chars
+ # characters.
+ # This is why things like "----help--" are accepted.
+
+ # Ignore leading non-command_chars characters
+ while i < strlen and s[i] not in command_chars:
+ # We need to look out for comments
+ if s[i] == "#":
+ return _Feed([])
+ i += 1
+ keyword_start = i
+ # Locate the end of the keyword
+ while i < strlen and s[i] in command_chars:
+ i += 1
+ keyword_lexeme = s[keyword_start:i]
+ # Blank line?
+ if keyword_lexeme == "":
+ return _Feed([])
+ keyword = keywords.get(keyword_lexeme)
+ if keyword is None:
+ # We expect a keyword as the first token
+ _tokenization_error(s, strlen, filename, linenr)
+ append(keyword)
+ previous = keyword
+
+ # Main tokenization loop (handles tokens past the first one)
while i < strlen:
c = s[i]
@@ -951,17 +981,23 @@ class Config():
elif c == "&":
if i + 1 >= strlen:
- _tokenization_error(s, strlen, filename, linenr)
+ # Invalid characters are ignored
+ continue
if s[i + 1] != "&":
- _tokenization_error(s, i + 1, filename, linenr)
+ # Invalid characters are ignored
+ i += 1
+ continue
append(T_AND)
i += 2
elif c == "|":
if i + 1 >= strlen:
- _tokenization_error(s, strlen, filename, linenr)
+ # Invalid characters are ignored
+ continue
if s[i + 1] != "|":
- _tokenization_error(s, i + 1, filename, linenr)
+ # Invalid characters are ignored
+ i += 1
+ continue
append(T_OR)
i += 2
@@ -989,6 +1025,11 @@ class Config():
elif c == "#":
break
+ elif c not in sym_chars:
+ # Invalid characters are ignored
+ i += 1
+ continue
+
else: # Symbol or keyword
name_start = i
@@ -1011,7 +1052,7 @@ class Config():
# We're dealing with a symbol. _sym_lookup() will take care
# of allocating a new Symbol instance if it's the first
# time we see it.
- sym = self._sym_lookup(name, add_sym_if_not_exists)
+ sym = self._sym_lookup(name, not for_eval)
if previous == T_CONFIG:
# If the previous token is T_CONFIG ("config"), we're
@@ -1194,7 +1235,7 @@ class Config():
linenr = line_feeder.get_linenr()
- tokens = self._tokenize(line, True, filename, linenr)
+ tokens = self._tokenize(line, False, filename, linenr)
if tokens.is_empty():
continue
@@ -1386,7 +1427,7 @@ class Config():
filename = line_feeder.get_filename()
linenr = line_feeder.get_linenr()
- tokens = self._tokenize(line, True, filename, linenr)
+ tokens = self._tokenize(line, False, filename, linenr)
if tokens.is_empty():
continue
@@ -2249,8 +2290,6 @@ keywords = {
"comment" : T_COMMENT,
"menuconfig" : T_MENUCONFIG,
"help" : T_HELP,
- "---help---" : T_HELP,
- "---" : T_HELP, # Hack to handle CONFIG_W1_CON
"if" : T_IF,
"depends" : T_DEPENDS,
"on" : T_ON,
@@ -2289,6 +2328,10 @@ string_lex = (T_BOOL, T_TRISTATE, T_INT, T_HEX, T_STRING,
# Characters that may appear in symbol names
sym_chars = frozenset(string.ascii_letters + string.digits + "._/-")
+# Characters that might be the first significant character on a line. Other
+# characters are ignored at the beginning of a line.
+command_chars = frozenset(string.ascii_letters + string.digits + "_");
+
# Regular expressions for parsing .config files
set_re = re.compile(r"CONFIG_(\w+)=(.*)")
unset_re = re.compile(r"# CONFIG_(\w+) is not set")