diff options
| author | Jacob McDonnell <jacob@jacobmcdonnell.com> | 2026-03-14 18:44:18 -0400 |
|---|---|---|
| committer | Jacob McDonnell <jacob@jacobmcdonnell.com> | 2026-03-14 18:44:18 -0400 |
| commit | 0c81ee06d19d1d7f67dab3c2c9da268b0c55e3c0 (patch) | |
| tree | 9a10aae3c5de847ca1235c048d886c5b64929d80 | |
| parent | ecffc124bbc5d6c9c089eb6d914565119d254a4d (diff) | |
feat: Initial parsing of instructions
Initial ability to parse basic instructions. Currently hex numbers are
broken and immediates with labels (movl $str, %ecx) are broken.
| -rw-r--r-- | .gitignore | 4 | ||||
| -rw-r--r-- | Makefile | 36 | ||||
| -rw-r--r-- | examples/instructions.s | 8 | ||||
| -rw-r--r-- | src/lexer.l | 28 | ||||
| -rw-r--r-- | src/parser.y | 101 |
5 files changed, 177 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6cbb1b5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +jas +src/*.o +src/parser.tab.* +src/lex.yy.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c10ffc6 --- /dev/null +++ b/Makefile @@ -0,0 +1,36 @@ +CC = gcc-15 +SRC_DIR = src +SRCS = $(SRC_DIR)/lex.yy.c \ + $(SRC_DIR)/parser.tab.c +PROG = jas +OBJS = $(patsubst %.c, %.o, \ + $(patsubst %.cpp, %.o, \ + $(patsubst %.cxx, %.o, \ + $(patsubst %.cc, %.o, $(SRCS))))) + +$(PROG): $(OBJS) + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) + +$(SRC_DIR)/parser.tab.c: $(SRC_DIR)/parser.y + bison $(BISONFLAGS) -d $< -o $@ + +$(SRC_DIR)/lex.yy.c: $(SRC_DIR)/lexer.l $(SRC_DIR)/parser.tab.c + flex $(FLEXFLAGS) -o $@ -l $< + +%.o: %.c + $(CC) $(CFLAGS) -c $< -o $@ + +%.o: %.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + +%.o: %.cxx + $(CXX) $(CXXFLAGS) -c $< -o $@ + +%.o: %.cc + $(CXX) $(CXXFLAGS) -c $< -o $@ + +clean: + rm -rf $(PROG) $(OBJS) $(SRC_DIR)/lex.yy.c $(SRC_DIR)/parser.tab.c $(SRC_DIR)/parser.tab.h + +.PHONY: all clean + diff --git a/examples/instructions.s b/examples/instructions.s new file mode 100644 index 0000000..f16e6c1 --- /dev/null +++ b/examples/instructions.s @@ -0,0 +1,8 @@ +movl $4, %eax +movl $1, %ebx +movl $1, %eax +movl $0, %ebx +int $0x80 + +movl $str, %ecx +movl $str_len, %edx diff --git a/src/lexer.l b/src/lexer.l new file mode 100644 index 0000000..46f6bc6 --- /dev/null +++ b/src/lexer.l @@ -0,0 +1,28 @@ +%{ +#include <string.h> +#include <stdlib.h> +#include "parser.tab.h" +extern int yylex(); +extern size_t line_number; +%} +%option noyywrap +%% +^[a-zA-Z]+ { yylval.symbol = strdup(yytext); return T_INSTRUCTION; } +%*[a-zA-Z\_\-]+[a-zA-Z0-9\_\-]* { yylval.symbol = strdup(yytext); return T_LABEL; } +0x[0-9A-Fa-f]+ { yylval.i_val = atoi(yytext); return T_INTEGER; } +[0-9]+ { yylval.i_val = atoi(yytext); return T_INTEGER; } +-*[0-9]*\.*[0-9]* { yylval.i_val = atof(yytext); return T_FLOAT; } +\".*\" { + yylval.s_val = strdup(yytext+1); + char *const close_paren = strrchr(yylval.s_val, '"'); + if (close_paren != NULL) { + *close_paren = '\0'; + } + return T_STRING; +} +, { return T_COMMA; } +\$ { return T_IMMEDIATE; } +[ \t] ; +\n { ++line_number; return T_ENDL; } +. ; +%% diff --git a/src/parser.y b/src/parser.y new file mode 100644 index 0000000..719643d --- /dev/null +++ b/src/parser.y @@ -0,0 +1,101 @@ +%{ +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <getopt.h> + +size_t line_number = 1; + +extern int yylex(); +extern int yyparse(); +extern FILE *yyin; +void yyerror(const char *s); +%} + +%union { + int i_val; + float f_val; + char *s_val; + char *symbol; +} + +%token <i_val> T_INTEGER +%token <f_val> T_FLOAT +%token <s_val> T_STRING +%token <symbol> T_LABEL +%token <symbol> T_INSTRUCTION +%token T_ENDL +%token T_COMMA +%token T_IMMEDIATE +%% +asm: + statements + ; + +statements: + statements statement + | statement + ; + +statement: + instruction_one_param_symbol T_ENDL + | instruction_one_param_immediate T_ENDL + | instruction_two_param_symbols T_ENDL + | instruction_symbol_immediate T_ENDL + | instruction_immediate_symbol T_ENDL + | T_ENDL + ; + +instruction_one_param_symbol: + T_INSTRUCTION T_LABEL { + printf("Found instruction %s(%s)\n", $1, $2); + }; + +instruction_one_param_immediate: + T_INSTRUCTION T_IMMEDIATE T_INTEGER { + printf("Found instruction %s(%d)\n", $1, $3); + }; +instruction_two_param_symbols: + T_INSTRUCTION T_LABEL T_COMMA T_LABEL { + printf("Found instruction %s(%s, %s)\n", $1, $2, $4); + }; + +instruction_symbol_immediate: + T_INSTRUCTION T_LABEL T_COMMA T_IMMEDIATE T_INTEGER { + printf("Found instruction %s(%s, %d)\n", $1, $2, $5); + } + +instruction_immediate_symbol: + T_INSTRUCTION T_IMMEDIATE T_INTEGER T_COMMA T_LABEL { + printf("Found instruction %s(%d, %s)\n", $1, $3, $5); + } +%% + +void yyerror(const char *s) { + printf("Parser Error: %lu: %s\n", line_number, s); +} + +int main(int argc, char *argv[]) { + FILE *input = stdin; + + int j = 0; + while (j != -1) { + j = getopt(argc, argv, "o:"); + + switch (j) { + case 'o': + input = fopen(optarg, "r"); + if (input == NULL) { + perror(argv[0]); + return -1; + } + break; + default: + break; + } + } + + yyin = input; + yyparse(); +} + |
