summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xREADME.md18
-rwxr-xr-xassemble.go105
-rwxr-xr-xelf.go125
-rwxr-xr-xqdma.go94
-rwxr-xr-xsymbol.go154
-rwxr-xr-xtest6.asm16
-rwxr-xr-xtest6.binbin28 -> 246 bytes
7 files changed, 395 insertions, 117 deletions
diff --git a/README.md b/README.md
index 453c4bd..e3335f3 100755
--- a/README.md
+++ b/README.md
@@ -4,9 +4,8 @@
QDMA is a simple MIPS assembler written in Go for
[QDME](https://github.com/JacobMcDonnell/QDME), which is my simple MIPS
-emulator. Currently this assembler can only handle standard instructions,
-which means no sections and no predefined data outside of an I-Type
-instruction.
+emulator. QDMA implements a partial version of the Executable and Linkable
+Format (ELF), [See Partial ELF for more information](#Partial-ELF).
## TODO
@@ -14,7 +13,7 @@ instruction.
- ~Handle Labels and Jumps~
-- Encode Data Section into Binary
+- ~Encode Data Section into Binary~
- ~Ignore Comments~
@@ -62,3 +61,14 @@ Here is a list of supported instructions.
|lw |I |rs = imm(rt)|
|sw |I |imm(rt) = rs|
+## Partial ELF
+
+QDMA is partiallialy complient with ELF, meaning that it supports segments but
+not sections. The supported segments are `.text`, `.data`, `.rodata`, and
+`.bss`. `.text` is for the instructions. `.data` and `.rodata` are for defined
+data types with initial values, `.rodata` being read only. Supported data types
+are `.word` for a 4 byte value, `.half` for a 2 byte value, `.byte` for a byte,
+`.asciiz` for a null terminated string, and `.ascii` for a non null terminated
+string. `.bss` is for variables where there is no initial value. In this
+section you define the space that a variable will take up in bytes using
+`.space`.
diff --git a/assemble.go b/assemble.go
index 98af916..e3bb2e5 100755
--- a/assemble.go
+++ b/assemble.go
@@ -106,7 +106,8 @@ func Encode(inst []string, pc int32) ([]byte, error) {
16) | (RegNums[inst[1]] << 11) | function.opcode
} else if function.opcode == 2 || function.opcode == 3 {
label, _ := labels[inst[1]]
- ret = (function.opcode << 26) | uint32(label&0x03FFFFFF)
+ addr := label.pos + SectionPos[label.section]
+ ret = (function.opcode << 26) | uint32(addr&0x03FFFFFF)
} else if function.opcode == 35 || function.opcode == 43 {
immReg, err := regexp.Compile("^[0-9]+")
if err != nil {
@@ -125,9 +126,10 @@ func Encode(inst []string, pc int32) ([]byte, error) {
ret = (function.opcode << 26) | (RegNums[reg] << 21) |
(RegNums[inst[1]] << 16) | (0xFFFF & uint32(imm))
} else {
- addr, isLabel := labels[inst[3]]
+ label, isLabel := labels[inst[3]]
+ addr := int32(label.pos + SectionPos[label.section])
if isLabel {
- imm = int16(int32(addr) - pc - 12)
+ imm = int16(addr - pc - 12)
} else {
i, err := strconv.Atoi(inst[3])
if err != nil {
@@ -142,95 +144,20 @@ func Encode(inst []string, pc int32) ([]byte, error) {
return bytes, nil
}
-func EncodeData(line []string) ([]byte, error) {
- var size int
- var err error = nil
- var data int64
- isString := false
- isData := false
- nullTerm := false
-
- line[1] = strings.ReplaceAll(line[1], "\"", "")
-
- switch line[0] {
- case ".space":
- size, err = strconv.Atoi(line[1])
- case ".word":
- size = 4
- isData = true
- case ".byte":
- size = 1
- isData = true
- case ".half":
- size = 2
- isData = true
- case ".asciiz":
- size = len(line[1]) + 1
- isString = true
- nullTerm = true
- case ".ascii":
- size = len(line[1])
- isString = true
- }
- if err != nil {
- return nil, err
- }
- bytes := make([]byte, size)
-
- if isData {
- if strings.Contains(line[1], "0x") {
- line[1] = strings.ReplaceAll(line[1], "0x", "")
- var t uint64
- t, err = strconv.ParseUint(line[1], 16, size*8)
- data = int64(t)
- } else {
- data, err = strconv.ParseInt(line[1], 10, size*8)
- }
- if err != nil {
- return nil, err
- }
- switch size {
- case 1:
- bytes[0] = uint8(data)
- case 2:
- binary.NativeEndian.PutUint16(bytes, uint16(data))
- case 4:
- binary.NativeEndian.PutUint32(bytes, uint32(data))
- }
- } else if isString {
- if nullTerm {
- for i, b := range []byte(line[1]) {
- bytes[i] = b
- }
- bytes[size-1] = 0
- } else {
- bytes = []byte(line[1])
- }
- }
-
- return bytes, nil
-}
-
-func Assemble(path string) {
+func Assemble(path string, out *os.File) {
file, err := os.Open(path)
if err != nil {
panic(err)
}
defer file.Close()
- out, err := os.Create(strings.ReplaceAll(path, ".asm", ".bin"))
- if err != nil {
- panic(err)
- }
- defer out.Close()
-
scanner := bufio.NewScanner(file)
for i := 0; scanner.Scan(); i += 4 {
var s string = scanner.Text()
if s == "" {
i -= 4
continue
- } else if s == ".data" {
+ } else if s == ".data" || s == ".rodata" || s == ".bss" {
isText = false
continue
} else if s == ".text" {
@@ -238,6 +165,10 @@ func Assemble(path string) {
continue
}
+ if !isText {
+ continue
+ }
+
inst, err := Parse(s)
if err != nil {
panic(err)
@@ -253,12 +184,7 @@ func Assemble(path string) {
continue
}
- var bytes []byte
- if isText {
- bytes, err = Encode(inst, int32(i))
- } else {
- bytes, err = EncodeData(inst)
- }
+ bytes, err := Encode(inst, int32(i))
if err != nil {
fmt.Fprintf(os.Stderr, "while encoding %s: %v", s, err)
panic(err)
@@ -268,13 +194,6 @@ func Assemble(path string) {
}
}
- /*
- // Write the new starting PC into the header
- if _, err := out.Seek(0, io.SeekStart); err != nil {
- panic(err)
- }
- */
-
if err := scanner.Err(); err != nil {
panic(err)
}
diff --git a/elf.go b/elf.go
new file mode 100755
index 0000000..ea90180
--- /dev/null
+++ b/elf.go
@@ -0,0 +1,125 @@
+package main
+
+import (
+ "encoding/binary"
+)
+
+const (
+ EI_NIDENT = 16
+ ET_NONE = 0
+ ET_REL = 1
+ ET_EXEC = 2
+ ET_DYN = 3
+ ET_CORE = 4
+ MIPS = 0x08
+ EHSIZE = 52
+ PHENSIZE = 0x20
+ SHENTSIZE = 0x28
+ PT_NULL = 0
+ PT_LOAD = 1
+ PT_DYNAMIC = 2
+ PT_INTERP = 3
+ PT_NOTE = 4
+ PT_SHLIB = 5
+ PT_PHDR = 6
+ PT_TLS = 7
+ PF_READ = 4
+ PF_WRITE = 2
+ PF_EXEC = 1
+)
+
+type ElfHeader struct {
+ e_ident [EI_NIDENT]byte
+ e_type uint16
+ e_machine uint16
+ e_version uint32
+ e_entry uint32
+ e_phoff uint32
+ e_shoff uint32
+ e_flags uint32
+ e_ehsize uint16
+ e_phentsize uint16
+ e_phnum uint16
+ e_shentsize uint16
+ e_shnum uint16
+ e_shstrndx uint16
+}
+
+type ProgramHeader struct {
+ ptype uint32
+ offset uint32
+ vaddr uint32
+ paddr uint32
+ filesz uint32
+ memsz uint32
+ flags uint32
+ align uint32
+}
+
+func EHInit(e_entry uint32, e_phnum uint16) ElfHeader {
+ var e ElfHeader
+ e.e_ident = [EI_NIDENT]byte{0x7f, 0x45, 0x4c, 0x46, // ELF magic number
+ 0x01, // 32-bit format
+ 0x01, // Little Endian
+ 0x01, // ELF Version
+ 0x00, // Target OS
+ 0x00, // Target ABI
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // Padding
+ e.e_type = ET_EXEC
+ e.e_machine = MIPS
+ e.e_version = 0x01
+ e.e_entry = e_entry
+ e.e_phoff = EHSIZE // Program headers will be after ELF Header
+ e.e_shoff = 0
+ e.e_ehsize = EHSIZE
+ e.e_phentsize = PHENSIZE
+ e.e_phnum = e_phnum
+ e.e_shentsize = SHENTSIZE
+ e.e_shnum = 0
+ e.e_shstrndx = 0
+ return e
+}
+
+/* NOTES: On the ToBytes methods
+ * I understand that these two methods are not the prettiest, but I need
+ * something to convert these structs to to byte arrays. From what I've read so
+ * far there seems to be no way to do this. It appears that encoding/gob
+ * ignores the size of data types, which is needed, and json serialization
+ * would not produce the proper output.
+ */
+
+func (e *ElfHeader) ToBytes() []byte {
+ bytes := make([]byte, EHSIZE)
+ i := 0
+ for _, b := range e.e_ident {
+ bytes[i] = b
+ i++
+ }
+ binary.NativeEndian.PutUint16(bytes[EI_NIDENT:], e.e_type)
+ binary.NativeEndian.PutUint16(bytes[18:], e.e_machine)
+ binary.NativeEndian.PutUint32(bytes[20:], e.e_version)
+ binary.NativeEndian.PutUint32(bytes[24:], e.e_entry)
+ binary.NativeEndian.PutUint32(bytes[28:], e.e_phoff)
+ binary.NativeEndian.PutUint32(bytes[32:], e.e_shoff)
+ binary.NativeEndian.PutUint32(bytes[36:], e.e_flags)
+ binary.NativeEndian.PutUint16(bytes[40:], e.e_ehsize)
+ binary.NativeEndian.PutUint16(bytes[42:], e.e_phentsize)
+ binary.NativeEndian.PutUint16(bytes[44:], e.e_phnum)
+ binary.NativeEndian.PutUint16(bytes[46:], e.e_shentsize)
+ binary.NativeEndian.PutUint16(bytes[48:], e.e_shnum)
+ binary.NativeEndian.PutUint16(bytes[50:], e.e_shstrndx)
+ return bytes
+}
+
+func (p *ProgramHeader) ToBytes() []byte {
+ bytes := make([]byte, PHENSIZE)
+ binary.NativeEndian.PutUint32(bytes[0:], p.ptype)
+ binary.NativeEndian.PutUint32(bytes[4:], p.offset)
+ binary.NativeEndian.PutUint32(bytes[8:], p.vaddr)
+ binary.NativeEndian.PutUint32(bytes[12:], p.paddr)
+ binary.NativeEndian.PutUint32(bytes[16:], p.filesz)
+ binary.NativeEndian.PutUint32(bytes[20:], p.memsz)
+ binary.NativeEndian.PutUint32(bytes[24:], p.flags)
+ binary.NativeEndian.PutUint32(bytes[28:], p.align)
+ return bytes
+}
diff --git a/qdma.go b/qdma.go
index b28dfe9..5a7a7a4 100755
--- a/qdma.go
+++ b/qdma.go
@@ -2,11 +2,101 @@ package main
import (
"os"
+ "strings"
)
func main() {
for _, arg := range os.Args[1:] {
- LabelFind(arg) // First pass to find all labels.
- Assemble(arg) // Second pass to assemble the instructions.
+ tempData, err := os.CreateTemp("", "data*")
+ Check(err)
+ dName := tempData.Name()
+ defer os.Remove(dName)
+
+ tempRoData, err := os.CreateTemp("", "rodata*")
+ Check(err)
+ rName := tempRoData.Name()
+ defer os.Remove(rName)
+
+ tempText, err := os.CreateTemp("", "text*")
+ Check(err)
+ tName := tempText.Name()
+ defer os.Remove(tName)
+
+ // First pass to find the address of all labels and encode the data
+ LabelFind(arg, tempData, tempRoData)
+
+ var names [3]string
+ var PHeaders [4]ProgramHeader
+ i := 0
+ n := 0
+
+ if SectionPos[TEXT] > 0 {
+ PHeaders[i] = ProgramHeader{PT_LOAD, 0, 0, 0,
+ uint32(SectionPos[TEXT]), uint32(SectionPos[TEXT]), PF_READ +
+ PF_EXEC, 0}
+ names[i] = tName
+ i++
+ n++
+ // Second pass to assemble the instructions.
+ Assemble(arg, tempText)
+ }
+
+ if SectionPos[DATA] > 0 {
+ PHeaders[i] = ProgramHeader{PT_LOAD, 0, 0, 0,
+ uint32(SectionPos[DATA]), uint32(SectionPos[DATA]), PF_READ +
+ PF_WRITE, 0}
+ names[i] = dName
+ i++
+ n++
+ }
+
+ if SectionPos[RODATA] > 0 {
+ PHeaders[i] = ProgramHeader{PT_LOAD, 0, 0, 0,
+ uint32(SectionPos[RODATA]), uint32(SectionPos[RODATA]), PF_READ, 0}
+ names[i] = rName
+ i++
+ n++
+ }
+
+ if SectionPos[BSS] > 0 {
+ PHeaders[i] = ProgramHeader{PT_LOAD, 0, 0, 0, 0,
+ uint32(SectionPos[BSS]), PF_READ + PF_WRITE, 0}
+ i++
+ }
+
+ var fOffset, mOffset uint32 = 0, 0
+ for j, p := range PHeaders[:i] {
+ PHeaders[j].offset = uint32(i*PHENSIZE) + fOffset
+ fOffset += p.filesz
+ PHeaders[j].vaddr = mOffset
+ mOffset += p.memsz
+ }
+
+ EHeader := EHInit(0, uint16(i))
+ WriteBinary(arg, EHeader, PHeaders[:i], names[:n])
+ }
+}
+
+func WriteBinary(path string, e ElfHeader, ps []ProgramHeader, ns []string) {
+ out, err := os.Create(strings.ReplaceAll(path, ".asm", ".bin"))
+ Check(err)
+ defer out.Close()
+ _, err = out.Write(e.ToBytes())
+ Check(err)
+ for _, p := range ps {
+ _, err = out.Write(p.ToBytes())
+ Check(err)
+ }
+ for _, name := range ns {
+ b, err := os.ReadFile(name)
+ Check(err)
+ _, err = out.Write(b)
+ Check(err)
+ }
+}
+
+func Check(err error) {
+ if err != nil {
+ panic(err)
}
}
diff --git a/symbol.go b/symbol.go
index f63bb8e..5a71473 100755
--- a/symbol.go
+++ b/symbol.go
@@ -2,15 +2,34 @@ package main
import (
"bufio"
+ "encoding/binary"
"os"
"regexp"
+ "strconv"
"strings"
)
+type PosPair struct {
+ pos uint
+ section uint8
+}
+
// Map for labels and their addresses
-var labels map[string]uint = make(map[string]uint)
+var labels map[string]PosPair = make(map[string]PosPair)
+
+var SectionPos [4]uint
+
+const (
+ TEXT = 0
+ DATA = 1
+ RODATA = 2
+ BSS = 3
+)
+
+func LabelFind(path string, dTemp, rTemp *os.File) {
+ // This will track the current section, default is .text
+ var Section uint8 = TEXT
-func LabelFind(path string) {
file, err := os.Open(path)
if err != nil {
panic(err)
@@ -18,23 +37,31 @@ func LabelFind(path string) {
defer file.Close()
input := bufio.NewScanner(file)
-
- for i := 0; input.Scan(); i += 4 {
- k := input.Text()
- if k == "" {
- i -= 4
+ for input.Scan() {
+ i := &SectionPos[Section]
+ switch input.Text() {
+ case "":
+ continue
+ case ".text":
+ Section = TEXT
+ continue
+ case ".data":
+ Section = DATA
+ continue
+ case ".rodata":
+ Section = RODATA
continue
- } else if k == ".text" || k == ".data" {
- i += 4
+ case ".bss":
+ Section = BSS
continue
}
- s, err := Parse(k)
+
+ s, err := Parse(input.Text())
if err != nil {
panic(err)
}
if len(s) == 1 && s[0] == "" {
- i -= 4
continue
}
@@ -43,10 +70,111 @@ func LabelFind(path string) {
panic(err)
}
if hasLabel {
- labels[strings.ReplaceAll(s[0], ":", "")] = uint(i)
+ labels[strings.ReplaceAll(s[0], ":", "")] = PosPair{*i, Section}
if len(s) == 1 {
- i -= 4
+ continue
+ } else {
+ s = s[1:]
+ }
+ }
+
+ offset, err := CalcOffset(s, Section, dTemp, rTemp)
+ if err != nil {
+ panic(err)
+ }
+ *i += offset
+ }
+}
+
+func CalcOffset(s []string, Section uint8, dTemp, rTemp *os.File) (uint, error) {
+ var offset uint
+ if Section != TEXT {
+ b, err := EncodeData(s)
+ if err != nil {
+ return 0, err
+ }
+ if Section == DATA {
+ _, err := dTemp.Write(b)
+ if err != nil {
+ return 0, err
+ }
+ } else if Section == RODATA {
+ _, err := rTemp.Write(b)
+ if err != nil {
+ return 0, err
+ }
+ }
+ offset = uint(len(b))
+ } else {
+ offset = 4
+ }
+ return offset, nil
+}
+
+func EncodeData(line []string) ([]byte, error) {
+ var size int
+ var err error = nil
+ var data int64
+ isString := false
+ isData := false
+ nullTerm := false
+
+ line[1] = strings.ReplaceAll(line[1], "\"", "")
+
+ switch line[0] {
+ case ".space":
+ size, err = strconv.Atoi(line[1])
+ case ".word":
+ size = 4
+ isData = true
+ case ".byte":
+ size = 1
+ isData = true
+ case ".half":
+ size = 2
+ isData = true
+ case ".asciiz":
+ size = len(line[1]) + 1
+ isString = true
+ nullTerm = true
+ case ".ascii":
+ size = len(line[1])
+ isString = true
+ }
+ if err != nil {
+ return nil, err
+ }
+ bytes := make([]byte, size)
+
+ if isData {
+ if strings.Contains(line[1], "0x") {
+ line[1] = strings.ReplaceAll(line[1], "0x", "")
+ var t uint64
+ t, err = strconv.ParseUint(line[1], 16, size*8)
+ data = int64(t)
+ } else {
+ data, err = strconv.ParseInt(line[1], 10, size*8)
+ }
+ if err != nil {
+ return nil, err
+ }
+ switch size {
+ case 1:
+ bytes[0] = uint8(data)
+ case 2:
+ binary.NativeEndian.PutUint16(bytes, uint16(data))
+ case 4:
+ binary.NativeEndian.PutUint32(bytes, uint32(data))
+ }
+ } else if isString {
+ if nullTerm {
+ for i, b := range []byte(line[1]) {
+ bytes[i] = b
}
+ bytes[size-1] = 0
+ } else {
+ bytes = []byte(line[1])
}
}
+ return bytes, nil
}
diff --git a/test6.asm b/test6.asm
index 3a9a21f..3c9ab62 100755
--- a/test6.asm
+++ b/test6.asm
@@ -10,9 +10,15 @@ syscall
.data
x: .word 0xFFFFFFFF
-#fib: .space 40
-#h: .half 255
-#c: .byte 0
-#dogs: .ascii "Dogs are cool\n"
-#cats: .asciiz "cats are cool\n"
+cats: .asciiz "cats are cool\n"
+c: .byte 0
+
+.rodata
+
+g: .word 50
+dogs: .ascii "Dogs are cool\n"
+h: .half 255
+
+.bss
+fib: .space 40
diff --git a/test6.bin b/test6.bin
index 8e47a8d..e81d82c 100755
--- a/test6.bin
+++ b/test6.bin
Binary files differ