summaryrefslogtreecommitdiff
path: root/src/lexer.l
diff options
context:
space:
mode:
authorJacob McDonnell <jacob@jacobmcdonnell.com>2026-03-15 20:59:54 -0400
committerJacob McDonnell <jacob@jacobmcdonnell.com>2026-03-15 20:59:54 -0400
commit7025f30be3b6cccf9f419daec9f0b7aeeaa2d6d3 (patch)
tree75bf8ff2b8bfcd5f7766203e63c2a4be67e99546 /src/lexer.l
parentefb0a239c4aaee370d97caf216859a724d7f72bd (diff)
refactor: Explicit register names & instructions
Explicit tokenization of register names and instructions. This forces stricter syntax conformance and prevents errors from slipping through. This also prevents the user from using these keywords as names other than for their intended use.
Diffstat (limited to 'src/lexer.l')
-rw-r--r--src/lexer.l229
1 files changed, 208 insertions, 21 deletions
diff --git a/src/lexer.l b/src/lexer.l
index ac579e7..6daaf6b 100644
--- a/src/lexer.l
+++ b/src/lexer.l
@@ -7,16 +7,209 @@ extern size_t line_number;
%}
%option noyywrap
%%
-0x[0-9A-Fa-f]+ { yylval.i_val = atoi(yytext); return T_INTEGER; }
--*[0-9]+ { yylval.i_val = atoi(yytext); return T_INTEGER; }
-^[a-zA-Z0-9\.\_]+: {
- yylval.symbol = strdup(yytext);
- char *const colon = strrchr(yylval.symbol, ':');
- if (colon != NULL) {
- *colon = '\0';
+\".*\" {
+ yylval.s_val = strdup(yytext+1);
+ char *const close_paren = strrchr(yylval.s_val, '"');
+ if (close_paren != NULL) {
+ *close_paren = '\0';
}
- return T_LABEL;
+ return T_STRING;
}
+, { return T_COMMA; }
+\( { return T_OPENPAREN; }
+\) { return T_CLOSEPAREN; }
+#.* ;
+[ \t] ;
+\n { ++line_number; return T_ENDL; }
+0x[0-9A-Fa-f]+ { yylval.i_val = atoi(yytext); return T_INTEGER; }
+-*[0-9]+ { yylval.i_val = atoi(yytext); return T_INTEGER; }
+lb { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+lh { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+lw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+ld { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+lbu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+lhu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+lwu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sb { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sh { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sd { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+li { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+lui { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+auipc { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+mv { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sext.b { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sext.h { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sext.w { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+zext.b { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+zext.h { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+zext.w { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+rev8 { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+czero.eqz { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+czero.nez { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+addi { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+add { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sh1add { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sh2add { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sh3add { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+add.wu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sh1add.wu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sh2add.wu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sh3add.wu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+addiw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+addw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sub { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+subw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+neg { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+negw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+mul { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+mulw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+mulh { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+mulhu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+mulhsu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+div { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+divu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+rem { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+remu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+min { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+max { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+minu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+maxu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+seqz { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+snez { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+slti { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+slt { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sltiu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sltu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bexti { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bext { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+andi { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+and { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+andn { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bclri { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bclr { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+ori { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+or { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+orn { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bseti { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bset { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+xori { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+xor { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+xnor { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+binvi { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+binv { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+not { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+orc.b { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+slli { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sll { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+slliw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sllw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+slli.wu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+srli { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+srl { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+srliw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+srlw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+srai { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sra { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sraiw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+sraw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+rori { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+ror { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+rol { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+roriw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+rorw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+rolw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+clz { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+clzw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+ctz { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+ctzw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+cpop { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+cpopw { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+j { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+jal { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+jr { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+jalr { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+call { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+tail { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+ret { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+beq { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bne { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+blt { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bgt { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bge { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+ble { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bltu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bgtu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bgeu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+bleu { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+nop { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+ecall { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+ebreak { yylval.instruction = strdup(yytext); return T_INSTRUCTION; }
+zero { yylval.reg = strdup(yytext); return T_REGISTER; }
+ra { yylval.reg = strdup(yytext); return T_REGISTER; }
+sp { yylval.reg = strdup(yytext); return T_REGISTER; }
+gp { yylval.reg = strdup(yytext); return T_REGISTER; }
+tp { yylval.reg = strdup(yytext); return T_REGISTER; }
+t0 { yylval.reg = strdup(yytext); return T_REGISTER; }
+t1 { yylval.reg = strdup(yytext); return T_REGISTER; }
+t2 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s0 { yylval.reg = strdup(yytext); return T_REGISTER; }
+fp { yylval.reg = strdup(yytext); return T_REGISTER; }
+s1 { yylval.reg = strdup(yytext); return T_REGISTER; }
+a0 { yylval.reg = strdup(yytext); return T_REGISTER; }
+a1 { yylval.reg = strdup(yytext); return T_REGISTER; }
+a2 { yylval.reg = strdup(yytext); return T_REGISTER; }
+a3 { yylval.reg = strdup(yytext); return T_REGISTER; }
+a4 { yylval.reg = strdup(yytext); return T_REGISTER; }
+a5 { yylval.reg = strdup(yytext); return T_REGISTER; }
+a6 { yylval.reg = strdup(yytext); return T_REGISTER; }
+a7 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s2 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s3 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s4 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s5 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s6 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s7 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s8 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s9 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s10 { yylval.reg = strdup(yytext); return T_REGISTER; }
+s11 { yylval.reg = strdup(yytext); return T_REGISTER; }
+t3 { yylval.reg = strdup(yytext); return T_REGISTER; }
+t4 { yylval.reg = strdup(yytext); return T_REGISTER; }
+t5 { yylval.reg = strdup(yytext); return T_REGISTER; }
+t6 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x0 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x1 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x2 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x3 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x4 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x5 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x6 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x7 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x8 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x9 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x10 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x11 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x12 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x13 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x14 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x15 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x16 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x17 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x18 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x19 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x20 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x21 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x22 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x23 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x24 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x25 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x26 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x27 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x28 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x29 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x30 { yylval.reg = strdup(yytext); return T_REGISTER; }
+x31 { yylval.reg = strdup(yytext); return T_REGISTER; }
.align { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
.globl { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
.local { yylval.directive = strdup(yytext); return T_DIRECTIVE; }
@@ -53,20 +246,14 @@ extern size_t line_number;
%tls_ie_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; }
%tls_gd_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; }
%got_pcrel_hi { yylval.modifier = strdup(yytext); return T_MODIFIER; }
-[a-zA-Z\_\.]+[a-zA-Z0-9\_\.]* { yylval.modifier = strdup(yytext); return T_SYMBOL; }
-\".*\" {
- yylval.s_val = strdup(yytext+1);
- char *const close_paren = strrchr(yylval.s_val, '"');
- if (close_paren != NULL) {
- *close_paren = '\0';
+^[a-zA-Z0-9\.\_]+: {
+ yylval.symbol = strdup(yytext);
+ char *const colon = strrchr(yylval.symbol, ':');
+ if (colon != NULL) {
+ *colon = '\0';
}
- return T_STRING;
+ return T_LABEL;
}
-, { return T_COMMA; }
-\( { return T_OPENPAREN; }
-\) { return T_CLOSEPAREN; }
-#.* ;
-[ \t] ;
-\n { ++line_number; return T_ENDL; }
+[a-zA-Z\_\.]+[a-zA-Z0-9\_\.]* { yylval.modifier = strdup(yytext); return T_SYMBOL; }
. ;
%%