summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJacob McDonnell <jacob@jacobmcdonnell.com>2026-03-15 20:03:26 -0400
committerJacob McDonnell <jacob@jacobmcdonnell.com>2026-03-15 20:03:26 -0400
commitefb0a239c4aaee370d97caf216859a724d7f72bd (patch)
treed13e9523103fab6c7c2eebbc654847f4924ea603 /src
parent68b9f87bc3b6bd6ccb3773641578b0d914e61104 (diff)
feat: Parsing modifiers and directives
Support added for parsing a limited set of directives and for all access modifiers.
Diffstat (limited to 'src')
-rw-r--r--src/lexer.l50
-rw-r--r--src/parser.y80
2 files changed, 110 insertions, 20 deletions
diff --git a/src/lexer.l b/src/lexer.l
index 26be828..ac579e7 100644
--- a/src/lexer.l
+++ b/src/lexer.l
@@ -7,11 +7,53 @@ extern size_t line_number;
%}
%option noyywrap
%%
-^[a-zA-Z]+ { yylval.symbol = strdup(yytext); return T_INSTRUCTION; }
-%*[a-zA-Z\_\-]+[a-zA-Z0-9\_\-]* { yylval.symbol = strdup(yytext); return T_SYMBOL; }
0x[0-9A-Fa-f]+ { yylval.i_val = atoi(yytext); return T_INTEGER; }
-[0-9]+ { yylval.i_val = atoi(yytext); return T_INTEGER; }
--*[0-9]*\.*[0-9]* { yylval.i_val = atof(yytext); return T_FLOAT; }
+-*[0-9]+ { yylval.i_val = atoi(yytext); return T_INTEGER; }
+^[a-zA-Z0-9\.\_]+: {
+ yylval.symbol = strdup(yytext);
+ char *const colon = strrchr(yylval.symbol, ':');
+ if (colon != NULL) {
+ *colon = '\0';
+ }
+ return T_LABEL;
+}
+.align { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.globl { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.local { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.ident { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.section { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.size { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.text { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.data { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.rodata { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.bss { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.string { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.asciz { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.equ { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.byte { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.2byte { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.half { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.short { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.4byte { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.word { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.long { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.8byte { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.dword { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.quad { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.float { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.double { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+.zero { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
+%hi { yylval.modifier = strdup(yytext); return T_MODIFIER; }
+%lo { yylval.modifier = strdup(yytext); return T_MODIFIER; }
+%pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; }
+%pcrel_lo { yylval.modifier = strdup(yytext); return T_MODIFIER; }
+%tprel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; }
+%tprel_lo { yylval.modifier = strdup(yytext); return T_MODIFIER; }
+%tprel_add { yylval.modifier = strdup(yytext); return T_MODIFIER; }
+%tls_ie_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; }
+%tls_gd_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; }
+%got_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; }
+[a-zA-Z\_\.]+[a-zA-Z0-9\_\.]* { yylval.modifier = strdup(yytext); return T_SYMBOL; }
\".*\" {
yylval.s_val = strdup(yytext+1);
char *const close_paren = strrchr(yylval.s_val, '"');
diff --git a/src/parser.y b/src/parser.y
index e63935a..5f7b4bb 100644
--- a/src/parser.y
+++ b/src/parser.y
@@ -14,20 +14,22 @@ void yyerror(const char *s);
%union {
int i_val;
- float f_val;
char *s_val;
char *symbol;
+ char *modifier;
+ char *directive;
}
-%token <i_val> T_INTEGER
-%token <f_val> T_FLOAT
-%token <s_val> T_STRING
-%token <symbol> T_SYMBOL
-%token <symbol> T_INSTRUCTION
-%token T_ENDL
-%token T_COMMA
-%token T_OPENPAREN
-%token T_CLOSEPAREN
+%token <i_val> T_INTEGER
+%token <s_val> T_STRING
+%token <symbol> T_SYMBOL
+%token <directive> T_DIRECTIVE
+%token <symbol> T_LABEL
+%token <modifier> T_MODIFIER
+%token T_ENDL
+%token T_COMMA
+%token T_OPENPAREN
+%token T_CLOSEPAREN
%%
asm:
statements
@@ -39,48 +41,94 @@ statements:
;
statement:
- instructions
+ label
+ | directives
+ | instructions
| T_ENDL
;
+label:
+ T_LABEL T_ENDL
+ {
+ printf("Found Label: %s\n", $1);
+ };
+
instructions:
rb_type
| i_type
| s_type
| u_type
| j_type
+ | i_type_modifier
+ | s_type_modifier
+ | pseudo_type
;
rb_type:
- T_INSTRUCTION T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_SYMBOL T_ENDL
+ T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_SYMBOL T_ENDL
{
printf("Read instruction: %s(%s, %s, %s)\n", $1, $2, $4, $6);
};
i_type:
- T_INSTRUCTION T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_INTEGER T_ENDL
+ T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_INTEGER T_ENDL
{
printf("Read instruction: %s(%s, %s, %d)\n", $1, $2, $4, $6);
};
+i_type_modifier:
+ T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL
+ {
+ printf("Read instruction: %s(%s, %s, %s of %s)\n", $1, $2, $4, $6, $8);
+ };
+
s_type:
- T_INSTRUCTION T_SYMBOL T_COMMA T_INTEGER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL
+ T_SYMBOL T_SYMBOL T_COMMA T_INTEGER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL
{
printf("Read instruction: %s(%s, %s + %d)\n", $1, $2, $6, $4);
};
+s_type_modifier:
+ T_SYMBOL T_SYMBOL T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL
+ {
+ printf("Read instruction: %s(%s, %s of %s)\n", $1, $2, $4, $6);
+ };
+
u_type:
- T_INSTRUCTION T_SYMBOL T_COMMA T_INTEGER T_ENDL
+ T_SYMBOL T_SYMBOL T_COMMA T_INTEGER T_ENDL
{
printf("Read instruction: %s(%s, %d)\n", $1, $2, $4);
};
j_type:
- T_INSTRUCTION T_SYMBOL T_COMMA T_SYMBOL T_ENDL
+ T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_ENDL
{
printf("Read instruction: %s(%s, %s)\n", $1, $2, $4);
};
+pseudo_type:
+ T_SYMBOL T_SYMBOL T_ENDL
+ {
+ printf("Read Pseudo Instruction: %s(%s)\n", $1, $2);
+ };
+
+directives:
+ directive_int
+ | directive_string
+ ;
+
+directive_int:
+ T_DIRECTIVE T_INTEGER T_ENDL
+ {
+ printf("Read directive %s %d\n", $1, $2);
+ };
+
+directive_string:
+ T_DIRECTIVE T_STRING T_ENDL
+ {
+ printf("Read directive %s %s\n", $1, $2);
+ };
+
%%
void yyerror(const char *s) {