From efb0a239c4aaee370d97caf216859a724d7f72bd Mon Sep 17 00:00:00 2001 From: Jacob McDonnell Date: Sun, 15 Mar 2026 20:03:26 -0400 Subject: feat: Parsing modifiers and directives Support added for parsing a limited set of directives and for all access modifiers. --- src/lexer.l | 50 ++++++++++++++++++++++++++++++++++--- src/parser.y | 80 ++++++++++++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 110 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/lexer.l b/src/lexer.l index 26be828..ac579e7 100644 --- a/src/lexer.l +++ b/src/lexer.l @@ -7,11 +7,53 @@ extern size_t line_number; %} %option noyywrap %% -^[a-zA-Z]+ { yylval.symbol = strdup(yytext); return T_INSTRUCTION; } -%*[a-zA-Z\_\-]+[a-zA-Z0-9\_\-]* { yylval.symbol = strdup(yytext); return T_SYMBOL; } 0x[0-9A-Fa-f]+ { yylval.i_val = atoi(yytext); return T_INTEGER; } -[0-9]+ { yylval.i_val = atoi(yytext); return T_INTEGER; } --*[0-9]*\.*[0-9]* { yylval.i_val = atof(yytext); return T_FLOAT; } +-*[0-9]+ { yylval.i_val = atoi(yytext); return T_INTEGER; } +^[a-zA-Z0-9\.\_]+: { + yylval.symbol = strdup(yytext); + char *const colon = strrchr(yylval.symbol, ':'); + if (colon != NULL) { + *colon = '\0'; + } + return T_LABEL; +} +.align { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.globl { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.local { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.ident { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.section { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.size { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.text { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.data { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.rodata { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.bss { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.string { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.asciz { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.equ { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.byte { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.2byte { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.half { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.short { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.4byte { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.word { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.long { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.8byte { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.dword { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.quad { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.float { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.double { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +.zero { yylval.directive = strdup(yytext); return T_DIRECTIVE; } +%hi { yylval.modifier = strdup(yytext); return T_MODIFIER; } +%lo { yylval.modifier = strdup(yytext); return T_MODIFIER; } +%pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; } +%pcrel_lo { yylval.modifier = strdup(yytext); return T_MODIFIER; } +%tprel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; } +%tprel_lo { yylval.modifier = strdup(yytext); return T_MODIFIER; } +%tprel_add { yylval.modifier = strdup(yytext); return T_MODIFIER; } +%tls_ie_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; } +%tls_gd_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; } +%got_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; } +[a-zA-Z\_\.]+[a-zA-Z0-9\_\.]* { yylval.modifier = strdup(yytext); return T_SYMBOL; } \".*\" { yylval.s_val = strdup(yytext+1); char *const close_paren = strrchr(yylval.s_val, '"'); diff --git a/src/parser.y b/src/parser.y index e63935a..5f7b4bb 100644 --- a/src/parser.y +++ b/src/parser.y @@ -14,20 +14,22 @@ void yyerror(const char *s); %union { int i_val; - float f_val; char *s_val; char *symbol; + char *modifier; + char *directive; } -%token T_INTEGER -%token T_FLOAT -%token T_STRING -%token T_SYMBOL -%token T_INSTRUCTION -%token T_ENDL -%token T_COMMA -%token T_OPENPAREN -%token T_CLOSEPAREN +%token T_INTEGER +%token T_STRING +%token T_SYMBOL +%token T_DIRECTIVE +%token T_LABEL +%token T_MODIFIER +%token T_ENDL +%token T_COMMA +%token T_OPENPAREN +%token T_CLOSEPAREN %% asm: statements @@ -39,48 +41,94 @@ statements: ; statement: - instructions + label + | directives + | instructions | T_ENDL ; +label: + T_LABEL T_ENDL + { + printf("Found Label: %s\n", $1); + }; + instructions: rb_type | i_type | s_type | u_type | j_type + | i_type_modifier + | s_type_modifier + | pseudo_type ; rb_type: - T_INSTRUCTION T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_SYMBOL T_ENDL + T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_SYMBOL T_ENDL { printf("Read instruction: %s(%s, %s, %s)\n", $1, $2, $4, $6); }; i_type: - T_INSTRUCTION T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_INTEGER T_ENDL + T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_INTEGER T_ENDL { printf("Read instruction: %s(%s, %s, %d)\n", $1, $2, $4, $6); }; +i_type_modifier: + T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL + { + printf("Read instruction: %s(%s, %s, %s of %s)\n", $1, $2, $4, $6, $8); + }; + s_type: - T_INSTRUCTION T_SYMBOL T_COMMA T_INTEGER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL + T_SYMBOL T_SYMBOL T_COMMA T_INTEGER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL { printf("Read instruction: %s(%s, %s + %d)\n", $1, $2, $6, $4); }; +s_type_modifier: + T_SYMBOL T_SYMBOL T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL + { + printf("Read instruction: %s(%s, %s of %s)\n", $1, $2, $4, $6); + }; + u_type: - T_INSTRUCTION T_SYMBOL T_COMMA T_INTEGER T_ENDL + T_SYMBOL T_SYMBOL T_COMMA T_INTEGER T_ENDL { printf("Read instruction: %s(%s, %d)\n", $1, $2, $4); }; j_type: - T_INSTRUCTION T_SYMBOL T_COMMA T_SYMBOL T_ENDL + T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_ENDL { printf("Read instruction: %s(%s, %s)\n", $1, $2, $4); }; +pseudo_type: + T_SYMBOL T_SYMBOL T_ENDL + { + printf("Read Pseudo Instruction: %s(%s)\n", $1, $2); + }; + +directives: + directive_int + | directive_string + ; + +directive_int: + T_DIRECTIVE T_INTEGER T_ENDL + { + printf("Read directive %s %d\n", $1, $2); + }; + +directive_string: + T_DIRECTIVE T_STRING T_ENDL + { + printf("Read directive %s %s\n", $1, $2); + }; + %% void yyerror(const char *s) { -- cgit v1.2.3