From 05d10fb3a941aa384340d2999033aacb31e5f32f Mon Sep 17 00:00:00 2001 From: Jacob McDonnell Date: Thu, 23 May 2024 18:12:40 -0400 Subject: Initial Implementation of ELF --- README.md | 18 +++++-- assemble.go | 105 +++++------------------------------------ elf.go | 125 ++++++++++++++++++++++++++++++++++++++++++++++++ qdma.go | 94 ++++++++++++++++++++++++++++++++++++- symbol.go | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----- test6.asm | 16 +++++-- test6.bin | Bin 28 -> 246 bytes 7 files changed, 395 insertions(+), 117 deletions(-) create mode 100755 elf.go diff --git a/README.md b/README.md index 453c4bd..e3335f3 100755 --- a/README.md +++ b/README.md @@ -4,9 +4,8 @@ QDMA is a simple MIPS assembler written in Go for [QDME](https://github.com/JacobMcDonnell/QDME), which is my simple MIPS -emulator. Currently this assembler can only handle standard instructions, -which means no sections and no predefined data outside of an I-Type -instruction. +emulator. QDMA implements a partial version of the Executable and Linkable +Format (ELF), [See Partial ELF for more information](#Partial-ELF). ## TODO @@ -14,7 +13,7 @@ instruction. - ~Handle Labels and Jumps~ -- Encode Data Section into Binary +- ~Encode Data Section into Binary~ - ~Ignore Comments~ @@ -62,3 +61,14 @@ Here is a list of supported instructions. |lw |I |rs = imm(rt)| |sw |I |imm(rt) = rs| +## Partial ELF + +QDMA is partiallialy complient with ELF, meaning that it supports segments but +not sections. The supported segments are `.text`, `.data`, `.rodata`, and +`.bss`. `.text` is for the instructions. `.data` and `.rodata` are for defined +data types with initial values, `.rodata` being read only. Supported data types +are `.word` for a 4 byte value, `.half` for a 2 byte value, `.byte` for a byte, +`.asciiz` for a null terminated string, and `.ascii` for a non null terminated +string. `.bss` is for variables where there is no initial value. In this +section you define the space that a variable will take up in bytes using +`.space`. diff --git a/assemble.go b/assemble.go index 98af916..e3bb2e5 100755 --- a/assemble.go +++ b/assemble.go @@ -106,7 +106,8 @@ func Encode(inst []string, pc int32) ([]byte, error) { 16) | (RegNums[inst[1]] << 11) | function.opcode } else if function.opcode == 2 || function.opcode == 3 { label, _ := labels[inst[1]] - ret = (function.opcode << 26) | uint32(label&0x03FFFFFF) + addr := label.pos + SectionPos[label.section] + ret = (function.opcode << 26) | uint32(addr&0x03FFFFFF) } else if function.opcode == 35 || function.opcode == 43 { immReg, err := regexp.Compile("^[0-9]+") if err != nil { @@ -125,9 +126,10 @@ func Encode(inst []string, pc int32) ([]byte, error) { ret = (function.opcode << 26) | (RegNums[reg] << 21) | (RegNums[inst[1]] << 16) | (0xFFFF & uint32(imm)) } else { - addr, isLabel := labels[inst[3]] + label, isLabel := labels[inst[3]] + addr := int32(label.pos + SectionPos[label.section]) if isLabel { - imm = int16(int32(addr) - pc - 12) + imm = int16(addr - pc - 12) } else { i, err := strconv.Atoi(inst[3]) if err != nil { @@ -142,95 +144,20 @@ func Encode(inst []string, pc int32) ([]byte, error) { return bytes, nil } -func EncodeData(line []string) ([]byte, error) { - var size int - var err error = nil - var data int64 - isString := false - isData := false - nullTerm := false - - line[1] = strings.ReplaceAll(line[1], "\"", "") - - switch line[0] { - case ".space": - size, err = strconv.Atoi(line[1]) - case ".word": - size = 4 - isData = true - case ".byte": - size = 1 - isData = true - case ".half": - size = 2 - isData = true - case ".asciiz": - size = len(line[1]) + 1 - isString = true - nullTerm = true - case ".ascii": - size = len(line[1]) - isString = true - } - if err != nil { - return nil, err - } - bytes := make([]byte, size) - - if isData { - if strings.Contains(line[1], "0x") { - line[1] = strings.ReplaceAll(line[1], "0x", "") - var t uint64 - t, err = strconv.ParseUint(line[1], 16, size*8) - data = int64(t) - } else { - data, err = strconv.ParseInt(line[1], 10, size*8) - } - if err != nil { - return nil, err - } - switch size { - case 1: - bytes[0] = uint8(data) - case 2: - binary.NativeEndian.PutUint16(bytes, uint16(data)) - case 4: - binary.NativeEndian.PutUint32(bytes, uint32(data)) - } - } else if isString { - if nullTerm { - for i, b := range []byte(line[1]) { - bytes[i] = b - } - bytes[size-1] = 0 - } else { - bytes = []byte(line[1]) - } - } - - return bytes, nil -} - -func Assemble(path string) { +func Assemble(path string, out *os.File) { file, err := os.Open(path) if err != nil { panic(err) } defer file.Close() - out, err := os.Create(strings.ReplaceAll(path, ".asm", ".bin")) - if err != nil { - panic(err) - } - defer out.Close() - scanner := bufio.NewScanner(file) for i := 0; scanner.Scan(); i += 4 { var s string = scanner.Text() if s == "" { i -= 4 continue - } else if s == ".data" { + } else if s == ".data" || s == ".rodata" || s == ".bss" { isText = false continue } else if s == ".text" { @@ -238,6 +165,10 @@ func Assemble(path string) { continue } + if !isText { + continue + } + inst, err := Parse(s) if err != nil { panic(err) @@ -253,12 +184,7 @@ func Assemble(path string) { continue } - var bytes []byte - if isText { - bytes, err = Encode(inst, int32(i)) - } else { - bytes, err = EncodeData(inst) - } + bytes, err := Encode(inst, int32(i)) if err != nil { fmt.Fprintf(os.Stderr, "while encoding %s: %v", s, err) panic(err) @@ -268,13 +194,6 @@ func Assemble(path string) { } } - /* - // Write the new starting PC into the header - if _, err := out.Seek(0, io.SeekStart); err != nil { - panic(err) - } - */ - if err := scanner.Err(); err != nil { panic(err) } diff --git a/elf.go b/elf.go new file mode 100755 index 0000000..ea90180 --- /dev/null +++ b/elf.go @@ -0,0 +1,125 @@ +package main + +import ( + "encoding/binary" +) + +const ( + EI_NIDENT = 16 + ET_NONE = 0 + ET_REL = 1 + ET_EXEC = 2 + ET_DYN = 3 + ET_CORE = 4 + MIPS = 0x08 + EHSIZE = 52 + PHENSIZE = 0x20 + SHENTSIZE = 0x28 + PT_NULL = 0 + PT_LOAD = 1 + PT_DYNAMIC = 2 + PT_INTERP = 3 + PT_NOTE = 4 + PT_SHLIB = 5 + PT_PHDR = 6 + PT_TLS = 7 + PF_READ = 4 + PF_WRITE = 2 + PF_EXEC = 1 +) + +type ElfHeader struct { + e_ident [EI_NIDENT]byte + e_type uint16 + e_machine uint16 + e_version uint32 + e_entry uint32 + e_phoff uint32 + e_shoff uint32 + e_flags uint32 + e_ehsize uint16 + e_phentsize uint16 + e_phnum uint16 + e_shentsize uint16 + e_shnum uint16 + e_shstrndx uint16 +} + +type ProgramHeader struct { + ptype uint32 + offset uint32 + vaddr uint32 + paddr uint32 + filesz uint32 + memsz uint32 + flags uint32 + align uint32 +} + +func EHInit(e_entry uint32, e_phnum uint16) ElfHeader { + var e ElfHeader + e.e_ident = [EI_NIDENT]byte{0x7f, 0x45, 0x4c, 0x46, // ELF magic number + 0x01, // 32-bit format + 0x01, // Little Endian + 0x01, // ELF Version + 0x00, // Target OS + 0x00, // Target ABI + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // Padding + e.e_type = ET_EXEC + e.e_machine = MIPS + e.e_version = 0x01 + e.e_entry = e_entry + e.e_phoff = EHSIZE // Program headers will be after ELF Header + e.e_shoff = 0 + e.e_ehsize = EHSIZE + e.e_phentsize = PHENSIZE + e.e_phnum = e_phnum + e.e_shentsize = SHENTSIZE + e.e_shnum = 0 + e.e_shstrndx = 0 + return e +} + +/* NOTES: On the ToBytes methods + * I understand that these two methods are not the prettiest, but I need + * something to convert these structs to to byte arrays. From what I've read so + * far there seems to be no way to do this. It appears that encoding/gob + * ignores the size of data types, which is needed, and json serialization + * would not produce the proper output. + */ + +func (e *ElfHeader) ToBytes() []byte { + bytes := make([]byte, EHSIZE) + i := 0 + for _, b := range e.e_ident { + bytes[i] = b + i++ + } + binary.NativeEndian.PutUint16(bytes[EI_NIDENT:], e.e_type) + binary.NativeEndian.PutUint16(bytes[18:], e.e_machine) + binary.NativeEndian.PutUint32(bytes[20:], e.e_version) + binary.NativeEndian.PutUint32(bytes[24:], e.e_entry) + binary.NativeEndian.PutUint32(bytes[28:], e.e_phoff) + binary.NativeEndian.PutUint32(bytes[32:], e.e_shoff) + binary.NativeEndian.PutUint32(bytes[36:], e.e_flags) + binary.NativeEndian.PutUint16(bytes[40:], e.e_ehsize) + binary.NativeEndian.PutUint16(bytes[42:], e.e_phentsize) + binary.NativeEndian.PutUint16(bytes[44:], e.e_phnum) + binary.NativeEndian.PutUint16(bytes[46:], e.e_shentsize) + binary.NativeEndian.PutUint16(bytes[48:], e.e_shnum) + binary.NativeEndian.PutUint16(bytes[50:], e.e_shstrndx) + return bytes +} + +func (p *ProgramHeader) ToBytes() []byte { + bytes := make([]byte, PHENSIZE) + binary.NativeEndian.PutUint32(bytes[0:], p.ptype) + binary.NativeEndian.PutUint32(bytes[4:], p.offset) + binary.NativeEndian.PutUint32(bytes[8:], p.vaddr) + binary.NativeEndian.PutUint32(bytes[12:], p.paddr) + binary.NativeEndian.PutUint32(bytes[16:], p.filesz) + binary.NativeEndian.PutUint32(bytes[20:], p.memsz) + binary.NativeEndian.PutUint32(bytes[24:], p.flags) + binary.NativeEndian.PutUint32(bytes[28:], p.align) + return bytes +} diff --git a/qdma.go b/qdma.go index b28dfe9..5a7a7a4 100755 --- a/qdma.go +++ b/qdma.go @@ -2,11 +2,101 @@ package main import ( "os" + "strings" ) func main() { for _, arg := range os.Args[1:] { - LabelFind(arg) // First pass to find all labels. - Assemble(arg) // Second pass to assemble the instructions. + tempData, err := os.CreateTemp("", "data*") + Check(err) + dName := tempData.Name() + defer os.Remove(dName) + + tempRoData, err := os.CreateTemp("", "rodata*") + Check(err) + rName := tempRoData.Name() + defer os.Remove(rName) + + tempText, err := os.CreateTemp("", "text*") + Check(err) + tName := tempText.Name() + defer os.Remove(tName) + + // First pass to find the address of all labels and encode the data + LabelFind(arg, tempData, tempRoData) + + var names [3]string + var PHeaders [4]ProgramHeader + i := 0 + n := 0 + + if SectionPos[TEXT] > 0 { + PHeaders[i] = ProgramHeader{PT_LOAD, 0, 0, 0, + uint32(SectionPos[TEXT]), uint32(SectionPos[TEXT]), PF_READ + + PF_EXEC, 0} + names[i] = tName + i++ + n++ + // Second pass to assemble the instructions. + Assemble(arg, tempText) + } + + if SectionPos[DATA] > 0 { + PHeaders[i] = ProgramHeader{PT_LOAD, 0, 0, 0, + uint32(SectionPos[DATA]), uint32(SectionPos[DATA]), PF_READ + + PF_WRITE, 0} + names[i] = dName + i++ + n++ + } + + if SectionPos[RODATA] > 0 { + PHeaders[i] = ProgramHeader{PT_LOAD, 0, 0, 0, + uint32(SectionPos[RODATA]), uint32(SectionPos[RODATA]), PF_READ, 0} + names[i] = rName + i++ + n++ + } + + if SectionPos[BSS] > 0 { + PHeaders[i] = ProgramHeader{PT_LOAD, 0, 0, 0, 0, + uint32(SectionPos[BSS]), PF_READ + PF_WRITE, 0} + i++ + } + + var fOffset, mOffset uint32 = 0, 0 + for j, p := range PHeaders[:i] { + PHeaders[j].offset = uint32(i*PHENSIZE) + fOffset + fOffset += p.filesz + PHeaders[j].vaddr = mOffset + mOffset += p.memsz + } + + EHeader := EHInit(0, uint16(i)) + WriteBinary(arg, EHeader, PHeaders[:i], names[:n]) + } +} + +func WriteBinary(path string, e ElfHeader, ps []ProgramHeader, ns []string) { + out, err := os.Create(strings.ReplaceAll(path, ".asm", ".bin")) + Check(err) + defer out.Close() + _, err = out.Write(e.ToBytes()) + Check(err) + for _, p := range ps { + _, err = out.Write(p.ToBytes()) + Check(err) + } + for _, name := range ns { + b, err := os.ReadFile(name) + Check(err) + _, err = out.Write(b) + Check(err) + } +} + +func Check(err error) { + if err != nil { + panic(err) } } diff --git a/symbol.go b/symbol.go index f63bb8e..5a71473 100755 --- a/symbol.go +++ b/symbol.go @@ -2,15 +2,34 @@ package main import ( "bufio" + "encoding/binary" "os" "regexp" + "strconv" "strings" ) +type PosPair struct { + pos uint + section uint8 +} + // Map for labels and their addresses -var labels map[string]uint = make(map[string]uint) +var labels map[string]PosPair = make(map[string]PosPair) + +var SectionPos [4]uint + +const ( + TEXT = 0 + DATA = 1 + RODATA = 2 + BSS = 3 +) + +func LabelFind(path string, dTemp, rTemp *os.File) { + // This will track the current section, default is .text + var Section uint8 = TEXT -func LabelFind(path string) { file, err := os.Open(path) if err != nil { panic(err) @@ -18,23 +37,31 @@ func LabelFind(path string) { defer file.Close() input := bufio.NewScanner(file) - - for i := 0; input.Scan(); i += 4 { - k := input.Text() - if k == "" { - i -= 4 + for input.Scan() { + i := &SectionPos[Section] + switch input.Text() { + case "": + continue + case ".text": + Section = TEXT + continue + case ".data": + Section = DATA + continue + case ".rodata": + Section = RODATA continue - } else if k == ".text" || k == ".data" { - i += 4 + case ".bss": + Section = BSS continue } - s, err := Parse(k) + + s, err := Parse(input.Text()) if err != nil { panic(err) } if len(s) == 1 && s[0] == "" { - i -= 4 continue } @@ -43,10 +70,111 @@ func LabelFind(path string) { panic(err) } if hasLabel { - labels[strings.ReplaceAll(s[0], ":", "")] = uint(i) + labels[strings.ReplaceAll(s[0], ":", "")] = PosPair{*i, Section} if len(s) == 1 { - i -= 4 + continue + } else { + s = s[1:] + } + } + + offset, err := CalcOffset(s, Section, dTemp, rTemp) + if err != nil { + panic(err) + } + *i += offset + } +} + +func CalcOffset(s []string, Section uint8, dTemp, rTemp *os.File) (uint, error) { + var offset uint + if Section != TEXT { + b, err := EncodeData(s) + if err != nil { + return 0, err + } + if Section == DATA { + _, err := dTemp.Write(b) + if err != nil { + return 0, err + } + } else if Section == RODATA { + _, err := rTemp.Write(b) + if err != nil { + return 0, err + } + } + offset = uint(len(b)) + } else { + offset = 4 + } + return offset, nil +} + +func EncodeData(line []string) ([]byte, error) { + var size int + var err error = nil + var data int64 + isString := false + isData := false + nullTerm := false + + line[1] = strings.ReplaceAll(line[1], "\"", "") + + switch line[0] { + case ".space": + size, err = strconv.Atoi(line[1]) + case ".word": + size = 4 + isData = true + case ".byte": + size = 1 + isData = true + case ".half": + size = 2 + isData = true + case ".asciiz": + size = len(line[1]) + 1 + isString = true + nullTerm = true + case ".ascii": + size = len(line[1]) + isString = true + } + if err != nil { + return nil, err + } + bytes := make([]byte, size) + + if isData { + if strings.Contains(line[1], "0x") { + line[1] = strings.ReplaceAll(line[1], "0x", "") + var t uint64 + t, err = strconv.ParseUint(line[1], 16, size*8) + data = int64(t) + } else { + data, err = strconv.ParseInt(line[1], 10, size*8) + } + if err != nil { + return nil, err + } + switch size { + case 1: + bytes[0] = uint8(data) + case 2: + binary.NativeEndian.PutUint16(bytes, uint16(data)) + case 4: + binary.NativeEndian.PutUint32(bytes, uint32(data)) + } + } else if isString { + if nullTerm { + for i, b := range []byte(line[1]) { + bytes[i] = b } + bytes[size-1] = 0 + } else { + bytes = []byte(line[1]) } } + return bytes, nil } diff --git a/test6.asm b/test6.asm index 3a9a21f..3c9ab62 100755 --- a/test6.asm +++ b/test6.asm @@ -10,9 +10,15 @@ syscall .data x: .word 0xFFFFFFFF -#fib: .space 40 -#h: .half 255 -#c: .byte 0 -#dogs: .ascii "Dogs are cool\n" -#cats: .asciiz "cats are cool\n" +cats: .asciiz "cats are cool\n" +c: .byte 0 + +.rodata + +g: .word 50 +dogs: .ascii "Dogs are cool\n" +h: .half 255 + +.bss +fib: .space 40 diff --git a/test6.bin b/test6.bin index 8e47a8d..e81d82c 100755 Binary files a/test6.bin and b/test6.bin differ -- cgit v1.2.3