summaryrefslogtreecommitdiff
path: root/src/parser.y
diff options
context:
space:
mode:
authorJacob McDonnell <jacob@jacobmcdonnell.com>2026-03-15 20:59:54 -0400
committerJacob McDonnell <jacob@jacobmcdonnell.com>2026-03-15 20:59:54 -0400
commit7025f30be3b6cccf9f419daec9f0b7aeeaa2d6d3 (patch)
tree75bf8ff2b8bfcd5f7766203e63c2a4be67e99546 /src/parser.y
parentefb0a239c4aaee370d97caf216859a724d7f72bd (diff)
refactor: Explicit register names & instructions
Explicit tokenization of register names and instructions. This forces stricter syntax conformance and prevents errors from slipping through. This also prevents the user from using these keywords as names other than for their intended use.
Diffstat (limited to 'src/parser.y')
-rw-r--r--src/parser.y89
1 files changed, 59 insertions, 30 deletions
diff --git a/src/parser.y b/src/parser.y
index 5f7b4bb..6998ddd 100644
--- a/src/parser.y
+++ b/src/parser.y
@@ -4,7 +4,7 @@
#include <stdbool.h>
#include <getopt.h>
-size_t line_number = 1;
+size_t line_number = 0;
extern int yylex();
extern int yyparse();
@@ -18,18 +18,22 @@ void yyerror(const char *s);
char *symbol;
char *modifier;
char *directive;
+ char *reg;
+ char *instruction;
}
-%token <i_val> T_INTEGER
-%token <s_val> T_STRING
-%token <symbol> T_SYMBOL
-%token <directive> T_DIRECTIVE
-%token <symbol> T_LABEL
-%token <modifier> T_MODIFIER
-%token T_ENDL
-%token T_COMMA
-%token T_OPENPAREN
-%token T_CLOSEPAREN
+%token <i_val> T_INTEGER
+%token <s_val> T_STRING
+%token <symbol> T_SYMBOL
+%token <directive> T_DIRECTIVE
+%token <symbol> T_LABEL
+%token <modifier> T_MODIFIER
+%token <reg> T_REGISTER
+%token <instruction> T_INSTRUCTION
+%token T_ENDL
+%token T_COMMA
+%token T_OPENPAREN
+%token T_CLOSEPAREN
%%
asm:
statements
@@ -54,8 +58,9 @@ label:
};
instructions:
- rb_type
- | i_type
+ r_type
+ | ib_type_symbol
+ | i_type_integer
| s_type
| u_type
| j_type
@@ -64,50 +69,74 @@ instructions:
| pseudo_type
;
-rb_type:
- T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_SYMBOL T_ENDL
+r_type:
+ T_INSTRUCTION T_REGISTER T_COMMA T_REGISTER T_COMMA T_REGISTER T_ENDL
{
- printf("Read instruction: %s(%s, %s, %s)\n", $1, $2, $4, $6);
+ printf("Read R-Type instruction: %s(%s, %s, %s)\n", $1, $2, $4, $6);
};
-i_type:
- T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_INTEGER T_ENDL
+ib_type_symbol:
+ T_INSTRUCTION T_REGISTER T_COMMA T_REGISTER T_COMMA T_SYMBOL T_ENDL
{
- printf("Read instruction: %s(%s, %s, %d)\n", $1, $2, $4, $6);
+ printf("Read I/B-Type instruction: %s(%s, %s, %s)\n", $1, $2, $4, $6);
+ };
+
+i_type_integer:
+ T_INSTRUCTION T_REGISTER T_COMMA T_REGISTER T_COMMA T_INTEGER T_ENDL
+ {
+ printf("Read I-Type instruction: %s(%s, %s, %d)\n", $1, $2, $4, $6);
};
i_type_modifier:
- T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL
+ T_INSTRUCTION T_REGISTER T_COMMA T_REGISTER T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL
{
- printf("Read instruction: %s(%s, %s, %s of %s)\n", $1, $2, $4, $6, $8);
+ printf("Read I-Type instruction: %s(%s, %s, %s of %s)\n", $1, $2, $4, $6, $8);
};
s_type:
- T_SYMBOL T_SYMBOL T_COMMA T_INTEGER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL
+ T_INSTRUCTION T_REGISTER T_COMMA T_INTEGER T_OPENPAREN T_REGISTER T_CLOSEPAREN T_ENDL
{
- printf("Read instruction: %s(%s, %s + %d)\n", $1, $2, $6, $4);
+ printf("Read S-Type instruction: %s(%s, %s + %d)\n", $1, $2, $6, $4);
};
s_type_modifier:
- T_SYMBOL T_SYMBOL T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL
+ T_INSTRUCTION T_REGISTER T_COMMA T_MODIFIER T_OPENPAREN T_SYMBOL T_CLOSEPAREN T_ENDL
{
- printf("Read instruction: %s(%s, %s of %s)\n", $1, $2, $4, $6);
+ printf("Read S-Type instruction: %s(%s, %s of %s)\n", $1, $2, $4, $6);
};
u_type:
- T_SYMBOL T_SYMBOL T_COMMA T_INTEGER T_ENDL
+ T_INSTRUCTION T_REGISTER T_COMMA T_INTEGER T_ENDL
{
- printf("Read instruction: %s(%s, %d)\n", $1, $2, $4);
+ printf("Read U-Type instruction: %s(%s, %d)\n", $1, $2, $4);
};
j_type:
- T_SYMBOL T_SYMBOL T_COMMA T_SYMBOL T_ENDL
+ T_INSTRUCTION T_REGISTER T_COMMA T_SYMBOL T_ENDL
{
- printf("Read instruction: %s(%s, %s)\n", $1, $2, $4);
+ printf("Read J-Type instruction: %s(%s, %s)\n", $1, $2, $4);
};
pseudo_type:
- T_SYMBOL T_SYMBOL T_ENDL
+ pseudo_one_reg
+ | pseudo_two_reg
+ | pseudo_one_label
+ ;
+
+pseudo_one_reg:
+ T_INSTRUCTION T_REGISTER T_ENDL
+ {
+ printf("Read Pseudo Instruction: %s(%s)\n", $1, $2);
+ };
+
+pseudo_two_reg:
+ T_INSTRUCTION T_REGISTER T_COMMA T_REGISTER T_ENDL
+ {
+ printf("Read Pseudo Instruction: %s(%s, %s)\n", $1, $2, $4);
+ };
+
+pseudo_one_label:
+ T_INSTRUCTION T_SYMBOL T_ENDL
{
printf("Read Pseudo Instruction: %s(%s)\n", $1, $2);
};