summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacob McDonnell <jacob@jacobmcdonnell.com>2026-03-14 18:44:18 -0400
committerJacob McDonnell <jacob@jacobmcdonnell.com>2026-03-14 18:44:18 -0400
commit0c81ee06d19d1d7f67dab3c2c9da268b0c55e3c0 (patch)
tree9a10aae3c5de847ca1235c048d886c5b64929d80
parentecffc124bbc5d6c9c089eb6d914565119d254a4d (diff)
feat: Initial parsing of instructions
Initial ability to parse basic instructions. Currently hex numbers are broken and immediates with labels (movl $str, %ecx) are broken.
-rw-r--r--.gitignore4
-rw-r--r--Makefile36
-rw-r--r--examples/instructions.s8
-rw-r--r--src/lexer.l28
-rw-r--r--src/parser.y101
5 files changed, 177 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6cbb1b5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+jas
+src/*.o
+src/parser.tab.*
+src/lex.yy.c
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..c10ffc6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,36 @@
+CC = gcc-15
+SRC_DIR = src
+SRCS = $(SRC_DIR)/lex.yy.c \
+ $(SRC_DIR)/parser.tab.c
+PROG = jas
+OBJS = $(patsubst %.c, %.o, \
+ $(patsubst %.cpp, %.o, \
+ $(patsubst %.cxx, %.o, \
+ $(patsubst %.cc, %.o, $(SRCS)))))
+
+$(PROG): $(OBJS)
+ $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
+
+$(SRC_DIR)/parser.tab.c: $(SRC_DIR)/parser.y
+ bison $(BISONFLAGS) -d $< -o $@
+
+$(SRC_DIR)/lex.yy.c: $(SRC_DIR)/lexer.l $(SRC_DIR)/parser.tab.c
+ flex $(FLEXFLAGS) -o $@ -l $<
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $< -o $@
+
+%.o: %.cpp
+ $(CXX) $(CXXFLAGS) -c $< -o $@
+
+%.o: %.cxx
+ $(CXX) $(CXXFLAGS) -c $< -o $@
+
+%.o: %.cc
+ $(CXX) $(CXXFLAGS) -c $< -o $@
+
+clean:
+ rm -rf $(PROG) $(OBJS) $(SRC_DIR)/lex.yy.c $(SRC_DIR)/parser.tab.c $(SRC_DIR)/parser.tab.h
+
+.PHONY: all clean
+
diff --git a/examples/instructions.s b/examples/instructions.s
new file mode 100644
index 0000000..f16e6c1
--- /dev/null
+++ b/examples/instructions.s
@@ -0,0 +1,8 @@
+movl $4, %eax
+movl $1, %ebx
+movl $1, %eax
+movl $0, %ebx
+int $0x80
+
+movl $str, %ecx
+movl $str_len, %edx
diff --git a/src/lexer.l b/src/lexer.l
new file mode 100644
index 0000000..46f6bc6
--- /dev/null
+++ b/src/lexer.l
@@ -0,0 +1,28 @@
+%{
+#include <string.h>
+#include <stdlib.h>
+#include "parser.tab.h"
+extern int yylex();
+extern size_t line_number;
+%}
+%option noyywrap
+%%
+^[a-zA-Z]+ { yylval.symbol = strdup(yytext); return T_INSTRUCTION; }
+%*[a-zA-Z\_\-]+[a-zA-Z0-9\_\-]* { yylval.symbol = strdup(yytext); return T_LABEL; }
+0x[0-9A-Fa-f]+ { yylval.i_val = atoi(yytext); return T_INTEGER; }
+[0-9]+ { yylval.i_val = atoi(yytext); return T_INTEGER; }
+-*[0-9]*\.*[0-9]* { yylval.i_val = atof(yytext); return T_FLOAT; }
+\".*\" {
+ yylval.s_val = strdup(yytext+1);
+ char *const close_paren = strrchr(yylval.s_val, '"');
+ if (close_paren != NULL) {
+ *close_paren = '\0';
+ }
+ return T_STRING;
+}
+, { return T_COMMA; }
+\$ { return T_IMMEDIATE; }
+[ \t] ;
+\n { ++line_number; return T_ENDL; }
+. ;
+%%
diff --git a/src/parser.y b/src/parser.y
new file mode 100644
index 0000000..719643d
--- /dev/null
+++ b/src/parser.y
@@ -0,0 +1,101 @@
+%{
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <getopt.h>
+
+size_t line_number = 1;
+
+extern int yylex();
+extern int yyparse();
+extern FILE *yyin;
+void yyerror(const char *s);
+%}
+
+%union {
+ int i_val;
+ float f_val;
+ char *s_val;
+ char *symbol;
+}
+
+%token <i_val> T_INTEGER
+%token <f_val> T_FLOAT
+%token <s_val> T_STRING
+%token <symbol> T_LABEL
+%token <symbol> T_INSTRUCTION
+%token T_ENDL
+%token T_COMMA
+%token T_IMMEDIATE
+%%
+asm:
+ statements
+ ;
+
+statements:
+ statements statement
+ | statement
+ ;
+
+statement:
+ instruction_one_param_symbol T_ENDL
+ | instruction_one_param_immediate T_ENDL
+ | instruction_two_param_symbols T_ENDL
+ | instruction_symbol_immediate T_ENDL
+ | instruction_immediate_symbol T_ENDL
+ | T_ENDL
+ ;
+
+instruction_one_param_symbol:
+ T_INSTRUCTION T_LABEL {
+ printf("Found instruction %s(%s)\n", $1, $2);
+ };
+
+instruction_one_param_immediate:
+ T_INSTRUCTION T_IMMEDIATE T_INTEGER {
+ printf("Found instruction %s(%d)\n", $1, $3);
+ };
+instruction_two_param_symbols:
+ T_INSTRUCTION T_LABEL T_COMMA T_LABEL {
+ printf("Found instruction %s(%s, %s)\n", $1, $2, $4);
+ };
+
+instruction_symbol_immediate:
+ T_INSTRUCTION T_LABEL T_COMMA T_IMMEDIATE T_INTEGER {
+ printf("Found instruction %s(%s, %d)\n", $1, $2, $5);
+ }
+
+instruction_immediate_symbol:
+ T_INSTRUCTION T_IMMEDIATE T_INTEGER T_COMMA T_LABEL {
+ printf("Found instruction %s(%d, %s)\n", $1, $3, $5);
+ }
+%%
+
+void yyerror(const char *s) {
+ printf("Parser Error: %lu: %s\n", line_number, s);
+}
+
+int main(int argc, char *argv[]) {
+ FILE *input = stdin;
+
+ int j = 0;
+ while (j != -1) {
+ j = getopt(argc, argv, "o:");
+
+ switch (j) {
+ case 'o':
+ input = fopen(optarg, "r");
+ if (input == NULL) {
+ perror(argv[0]);
+ return -1;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ yyin = input;
+ yyparse();
+}
+