diff options
| author | Jacob McDonnell <jacob@jacobmcdonnell.com> | 2026-03-15 20:59:54 -0400 |
|---|---|---|
| committer | Jacob McDonnell <jacob@jacobmcdonnell.com> | 2026-03-15 20:59:54 -0400 |
| commit | 7025f30be3b6cccf9f419daec9f0b7aeeaa2d6d3 (patch) | |
| tree | 75bf8ff2b8bfcd5f7766203e63c2a4be67e99546 | |
| parent | efb0a239c4aaee370d97caf216859a724d7f72bd (diff) | |
refactor: Explicit register names & instructions
Explicit tokenization of register names and instructions. This forces
stricter syntax conformance and prevents errors from slipping through.
This also prevents the user from using these keywords as names other
than for their intended use.
| -rw-r--r-- | examples/instructions.s | 2 | ||||
| -rw-r--r-- | src/lexer.l | 229 | ||||
| -rw-r--r-- | src/parser.y | 89 |
3 files changed, 268 insertions, 52 deletions
diff --git a/examples/instructions.s b/examples/instructions.s index 5379657..d3a7fab 100644 --- a/examples/instructions.s +++ b/examples/instructions.s @@ -2,7 +2,7 @@ add x5, x6, x7 sub x10, x11, x12 and x1, x2, x3 -sll x8, x9, x10o +sll x8, x9, x10 # I-Type addi x5, x6, 10 diff --git a/src/lexer.l b/src/lexer.l index ac579e7..6daaf6b 100644 --- a/src/lexer.l +++ b/src/lexer.l @@ -7,16 +7,209 @@ extern size_t line_number; %} %option noyywrap %% -0x[0-9A-Fa-f]+ { yylval.i_val = atoi(yytext); return T_INTEGER; } --*[0-9]+ { yylval.i_val = atoi(yytext); return T_INTEGER; } -^[a-zA-Z0-9\.\_]+: { - yylval.symbol = strdup(yytext); - char *const colon = strrchr(yylval.symbol, ':'); - if (colon != NULL) { - *colon = '\0'; +\".*\" { + yylval.s_val = strdup(yytext+1); + char *const close_paren = strrchr(yylval.s_val, '"'); + if (close_paren != NULL) { + *close_paren = '\0'; } - return T_LABEL; + return T_STRING; } +, { return T_COMMA; } +\( { return T_OPENPAREN; } +\) { return T_CLOSEPAREN; } +#.* ; +[ \t] ; +\n { ++line_number; return T_ENDL; } +0x[0-9A-Fa-f]+ { yylval.i_val = atoi(yytext); return T_INTEGER; } +-*[0-9]+ { yylval.i_val = atoi(yytext); return T_INTEGER; } +lb { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +lh { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +lw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +ld { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +lbu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +lhu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +lwu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sb { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sh { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sd { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +li { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +lui { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +auipc { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +mv { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sext.b { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sext.h { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sext.w { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +zext.b { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +zext.h { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +zext.w { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +rev8 { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +czero.eqz { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +czero.nez { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +addi { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +add { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sh1add { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sh2add { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sh3add { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +add.wu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sh1add.wu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sh2add.wu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sh3add.wu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +addiw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +addw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sub { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +subw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +neg { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +negw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +mul { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +mulw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +mulh { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +mulhu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +mulhsu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +div { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +divu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +rem { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +remu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +min { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +max { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +minu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +maxu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +seqz { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +snez { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +slti { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +slt { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sltiu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sltu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bexti { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bext { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +andi { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +and { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +andn { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bclri { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bclr { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +ori { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +or { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +orn { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bseti { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bset { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +xori { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +xor { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +xnor { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +binvi { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +binv { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +not { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +orc.b { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +slli { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sll { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +slliw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sllw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +slli.wu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +srli { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +srl { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +srliw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +srlw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +srai { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sra { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sraiw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +sraw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +rori { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +ror { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +rol { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +roriw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +rorw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +rolw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +clz { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +clzw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +ctz { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +ctzw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +cpop { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +cpopw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +j { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +jal { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +jr { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +jalr { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +call { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +tail { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +ret { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +beq { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bne { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +blt { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bgt { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bge { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +ble { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bltu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bgtu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bgeu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +bleu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +nop { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +ecall { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +ebreak { yylval.instruction = strdup(yytext); return T_INSTRUCTION; } +zero { yylval.reg = strdup(yytext); return T_REGISTER; } +ra { yylval.reg = strdup(yytext); return T_REGISTER; } +sp { yylval.reg = strdup(yytext); return T_REGISTER; } +gp { yylval.reg = strdup(yytext); return T_REGISTER; } +tp { yylval.reg = strdup(yytext); return T_REGISTER; } +t0 { yylval.reg = strdup(yytext); return T_REGISTER; } +t1 { yylval.reg = strdup(yytext); return T_REGISTER; } +t2 { yylval.reg = strdup(yytext); return T_REGISTER; } +s0 { yylval.reg = strdup(yytext); return T_REGISTER; } +fp { yylval.reg = strdup(yytext); return T_REGISTER; } +s1 { yylval.reg = strdup(yytext); return T_REGISTER; } +a0 { yylval.reg = strdup(yytext); return T_REGISTER; } +a1 { yylval.reg = strdup(yytext); return T_REGISTER; } +a2 { yylval.reg = strdup(yytext); return T_REGISTER; } +a3 { yylval.reg = strdup(yytext); return T_REGISTER; } +a4 { yylval.reg = strdup(yytext); return T_REGISTER; } +a5 { yylval.reg = strdup(yytext); return T_REGISTER; } +a6 { yylval.reg = strdup(yytext); return T_REGISTER; } +a7 { yylval.reg = strdup(yytext); return T_REGISTER; } +s2 { yylval.reg = strdup(yytext); return T_REGISTER; } +s3 { yylval.reg = strdup(yytext); return T_REGISTER; } +s4 { yylval.reg = strdup(yytext); return T_REGISTER; } +s5 { yylval.reg = strdup(yytext); return T_REGISTER; } +s6 { yylval.reg = strdup(yytext); return T_REGISTER; } +s7 { yylval.reg = strdup(yytext); return T_REGISTER; } +s8 { yylval.reg = strdup(yytext); return T_REGISTER; } +s9 { yylval.reg = strdup(yytext); return T_REGISTER; } +s10 { yylval.reg = strdup(yytext); return T_REGISTER; } +s11 { yylval.reg = strdup(yytext); return T_REGISTER; } +t3 { yylval.reg = strdup(yytext); return T_REGISTER; } +t4 { yylval.reg = strdup(yytext); return T_REGISTER; } +t5 { yylval.reg = strdup(yytext); return T_REGISTER; } +t6 { yylval.reg = strdup(yytext); return T_REGISTER; } +x0 { yylval.reg = strdup(yytext); return T_REGISTER; } +x1 { yylval.reg = strdup(yytext); return T_REGISTER; } +x2 { yylval.reg = strdup(yytext); return T_REGISTER; } +x3 { yylval.reg = strdup(yytext); return T_REGISTER; } +x4 { yylval.reg = strdup(yytext); return T_REGISTER; } +x5 { yylval.reg = strdup(yytext); return T_REGISTER; } +x6 { yylval.reg = strdup(yytext); return T_REGISTER; } +x7 { yylval.reg = strdup(yytext); return T_REGISTER; } +x8 { yylval.reg = strdup(yytext); return T_REGISTER; } +x9 { yylval.reg = strdup(yytext); return T_REGISTER; } +x10 { yylval.reg = strdup(yytext); return T_REGISTER; } +x11 { yylval.reg = strdup(yytext); return T_REGISTER; } +x12 { yylval.reg = strdup(yytext); return T_REGISTER; } +x13 { yylval.reg = strdup(yytext); return T_REGISTER; } +x14 { yylval.reg = strdup(yytext); return T_REGISTER; } +x15 { yylval.reg = strdup(yytext); return T_REGISTER; } +x16 { yylval.reg = strdup(yytext); return T_REGISTER; } +x17 { yylval.reg = strdup(yytext); return T_REGISTER; } +x18 { yylval.reg = strdup(yytext); return T_REGISTER; } +x19 { yylval.reg = strdup(yytext); return T_REGISTER; } +x20 { yylval.reg = strdup(yytext); return T_REGISTER; } +x21 { yylval.reg = strdup(yytext); return T_REGISTER; } +x22 { yylval.reg = strdup(yytext); return T_REGISTER; } +x23 { yylval.reg = strdup(yytext); return T_REGISTER; } +x24 { yylval.reg = strdup(yytext); return T_REGISTER; } +x25 { yylval.reg = strdup(yytext); return T_REGISTER; } +x26 { yylval.reg = strdup(yytext); return T_REGISTER; } +x27 { yylval.reg = strdup(yytext); return T_REGISTER; } +x28 { yylval.reg = strdup(yytext); return T_REGISTER; } +x29 { yylval.reg = strdup(yytext); return T_REGISTER; } +x30 { yylval.reg = strdup(yytext); return T_REGISTER; } +x31 { yylval.reg = strdup(yytext); return T_REGISTER; } .align { yylval.directive = strdup(yytext); return T_DIRECTIVE; } .globl { yylval.directive = strdup(yytext); return T_DIRECTIVE; } .local { yylval.directive = strdup(yytext); return T_DIRECTIVE; } @@ -53,20 +246,14 @@ extern size_t line_number; %tls_ie_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; } %tls_gd_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; } %got_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; } -[a-zA-Z\_\.]+[a-zA-Z0-9\_\.]* { yylval.modifier = strdup(yytext); return T_SYMBOL; } -\".*\" { - yylval.s_val = strdup(yytext+1); - char *const close_paren = strrchr(yylval.s_val, '"'); - if (close_paren != NULL) { - *close_paren = '\0'; +^[a-zA-Z0-9\.\_]+: { + yylval.symbol = strdup(yytext); + char *const colon = strrchr(yylval.symbol, ':'); + if (colon != NULL) { + *colon = '\0'; } - return T_STRING; + return T_LABEL; } -, { return T_COMMA; } -\( { return T_OPENPAREN; } -\) { return T_CLOSEPAREN; } -#.* ; -[ \t] ; -\n { ++line_number; return T_ENDL; } +[a-zA-Z\_\.]+[a-zA-Z0-9\_\.]* { yylval.modifier = strdup(yytext); return T_SYMBOL; } . ; %% diff --git a/src/parser.y b/src/parser.y index 5f7b4bb..6998ddd 100644 --- a/src/parser.y +++ b/src/parser.y @@ -4,7 +4,7 @@ #include <stdbool.h> #include <getopt.h> -size_t line_number = 1; +size_t line_number = 0; extern int yylex(); extern int yyparse(); @@ -18,18 +18,22 @@ void yyerror(const char *s); char *symbol; char *modifier; char *directive; + char *reg; + char *instruction; } -%token <i_val> T_INTEGER -%token <s_val> T_STRING -%token <symbol> T_SYMBOL -%token <directive> T_DIRECTIVE -%token <symbol> T_LABEL -%token <modifier> T_MODIFIER -%token T_ENDL -%token T_COMMA -%token T_OPENPAREN -%token T_CLOSEPAREN +%token <i_val> T_INTEGER +%token <s_val> T_STRING +%token <symbol> T_SYMBOL +%token <directive> T_DIRECTIVE +%token <symbol> T_LABEL +%token <modifier> T_MODIFIER +%token <reg> T_REGISTER +%token <instruction> T_INSTRUCTION +%token T_ENDL +%token T_COMMA +%token T_OPENPAREN +%token T_CLOSEPAREN %% asm: statements @@ -54,8 +58,9 @@ label: }; instructions: - rb_type - | i_type + r_type + | ib_type_symbol + | i_type_integer | s_type | u_type | j_type @@ -64,50 +69,74 @@ instructions: | pseudo_type ; -rb_type: - T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_SYMBOL T_ENDL +r_type: + T_INSTRUCTION T_REGISTER T_COMMA T_REGISTER T_COMMA T_REGISTER T_ENDL { - printf("Read instruction: %s(%s, %s, %s)\n", $1, $2, $4, $6); + printf("Read R-Type instruction: %s(%s, %s, %s)\n", $1, $2, $4, $6); }; -i_type: - T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_INTEGER T_ENDL +ib_type_symbol: + T_INSTRUCTION T_REGISTER T_COMMA T_REGISTER T_COMMA T_SYMBOL T_ENDL { - printf("Read instruction: %s(%s, %s, %d)\n", $1, $2, $4, $6); + printf("Read I/B-Type instruction: %s(%s, %s, %s)\n", $1, $2, $4, $6); + }; + +i_type_integer: + T_INSTRUCTION T_REGISTER T_COMMA T_REGISTER T_COMMA T_INTEGER T_ENDL + { + printf("Read I-Type instruction: %s(%s, %s, %d)\n", $1, $2, $4, $6); }; i_type_modifier: - T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL + T_INSTRUCTION T_REGISTER T_COMMA T_REGISTER T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL { - printf("Read instruction: %s(%s, %s, %s of %s)\n", $1, $2, $4, $6, $8); + printf("Read I-Type instruction: %s(%s, %s, %s of %s)\n", $1, $2, $4, $6, $8); }; s_type: - T_SYMBOL T_SYMBOL T_COMMA T_INTEGER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL + T_INSTRUCTION T_REGISTER T_COMMA T_INTEGER T_OPENPAREN T_REGISTER T_CLOSEPAREN T_ENDL { - printf("Read instruction: %s(%s, %s + %d)\n", $1, $2, $6, $4); + printf("Read S-Type instruction: %s(%s, %s + %d)\n", $1, $2, $6, $4); }; s_type_modifier: - T_SYMBOL T_SYMBOL T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL + T_INSTRUCTION T_REGISTER T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL { - printf("Read instruction: %s(%s, %s of %s)\n", $1, $2, $4, $6); + printf("Read S-Type instruction: %s(%s, %s of %s)\n", $1, $2, $4, $6); }; u_type: - T_SYMBOL T_SYMBOL T_COMMA T_INTEGER T_ENDL + T_INSTRUCTION T_REGISTER T_COMMA T_INTEGER T_ENDL { - printf("Read instruction: %s(%s, %d)\n", $1, $2, $4); + printf("Read U-Type instruction: %s(%s, %d)\n", $1, $2, $4); }; j_type: - T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_ENDL + T_INSTRUCTION T_REGISTER T_COMMA T_SYMBOL T_ENDL { - printf("Read instruction: %s(%s, %s)\n", $1, $2, $4); + printf("Read J-Type instruction: %s(%s, %s)\n", $1, $2, $4); }; pseudo_type: - T_SYMBOL T_SYMBOL T_ENDL + pseudo_one_reg + | pseudo_two_reg + | pseudo_one_label + ; + +pseudo_one_reg: + T_INSTRUCTION T_REGISTER T_ENDL + { + printf("Read Pseudo Instruction: %s(%s)\n", $1, $2); + }; + +pseudo_two_reg: + T_INSTRUCTION T_REGISTER T_COMMA T_REGISTER T_ENDL + { + printf("Read Pseudo Instruction: %s(%s, %s)\n", $1, $2, $4); + }; + +pseudo_one_label: + T_INSTRUCTION T_SYMBOL T_ENDL { printf("Read Pseudo Instruction: %s(%s)\n", $1, $2); }; |
