diff --git a/Makefile b/Makefile index a66306f..4360200 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,9 @@ all: $(PROGRAMS) m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o $(LD) $(LDFLAGS) -o $@ $^ -as : src/as/as.o src/as/parser.o src/as/lexer.o src/as/instr_encode.o src/as/error.o +as : src/as/as.o src/as/parser.o src/as/lexer.o \ + src/as/codegen.o src/as/error.o src/as/symtab.o \ + src/as/ast.o lib/libm16.a $(LD) $(LDFLAGS) -o $@ $^ lib/libm16.a : lib/src/vector.o diff --git a/src/as/parser.c b/src/as/parser.c index 7b95283..eca613b 100644 --- a/src/as/parser.c +++ b/src/as/parser.c @@ -21,7 +21,8 @@ #include #include #include "error.h" -#include "instr_encode.h" +#include "codegen.h" +#include "ast.h" #include "lexer.h" #include "parser.h" @@ -37,26 +38,30 @@ static int match_type(struct lexer* lex, enum token_type type) { return lex->token.type == type ? 0 : -1; } -// Same as match_type() but extracts a number from the token. -static int match_type_num(struct lexer* lex, enum token_type type, - void* out, size_t size) { +// Same as match_type() but also generates a operand in the AST. +static int match_operand(struct lexer* lex, enum token_type type, struct ast *ast) { if (match_type(lex, type) < 0) return -1; - memcpy(out, &lex->token.value.n, size); + if (type == TOKEN_REG) { + ast_instr_operand(ast, DATATYPE_REGISTER, lex->token.value.n); + } else { + ast_instr_operand(ast, DATATYPE_NUMBER, lex->token.value.n); + } + return 0; } /* * Helper macros for matching tokens. */ -#define match_reg(pos, out) \ - if (match_type_num(lex, TOKEN_REG, out, sizeof(uint8_t))) \ +#define match_reg(pos, ast) \ + if (match_operand(lex, TOKEN_REG, ast) < 0) \ return asm_error((lex)->lineno, "Expected number at argument %i", pos) -#define match_imm(pos, out) \ - if (match_type_num(lex, TOKEN_NUMBER, out, sizeof(int16_t)) < 0) \ +#define match_imm(pos, ast) \ + if (match_operand(lex, TOKEN_NUMBER, ast) < 0) \ return asm_error((lex)->lineno, "Expected number at argument %i", pos) #define match_arg(pos) \ @@ -72,54 +77,54 @@ static int match_type_num(struct lexer* lex, enum token_type type, */ // R-Type (rs : u8, r0 : u8, r1 : u8) -static int match_typeR(struct instr_R *instr, struct lexer* lex) { +static int match_typeR(struct lexer* lex, struct ast *ast) { - match_reg(1, &instr->rs); match_arg(1); - match_reg(2, &instr->r0); match_arg(2); - match_reg(3, &instr->r1); + match_reg(1, ast); match_arg(1); + match_reg(2, ast); match_arg(2); + match_reg(3, ast); match_end; return 0; } // RI-Type (rs : u8, r0 : u8, offset : s8) -static int match_typeRI(struct instr_RI *instr, struct lexer* lex) { +static int match_typeRI(struct lexer* lex, struct ast *ast) { - match_reg(1, &instr->rs); match_arg(1); - match_reg(2, &instr->r0); match_arg(2); - match_imm(3, &instr->offset); + match_reg(1, ast); match_arg(1); + match_reg(2, ast); match_arg(2); + match_imm(3, ast); match_end; return 1; } // I-Type (rs : u8, imm : s8) -static int match_typeI(struct instr_I *instr, struct lexer* lex) { +static int match_typeI(struct lexer* lex, struct ast *ast) { - match_reg(1, &instr->rs); match_arg(1); - match_imm(2, &instr->imm); + match_reg(1, ast); match_arg(1); + match_imm(2, ast); match_end; return 1; } // J-Type (rs : u8, addr : u16) -static int match_typeJ(struct instr_J *instr, struct lexer* lex) { +static int match_typeJ(struct lexer* lex, struct ast *ast) { - match_imm(1, &instr->addr); + match_imm(1, ast); match_end; return 1; } -#define opcode_guard(op, v) \ - if (op == OP_NONE) op = v +#define opcode_guard(op) \ + if (op_set == 0) { op_set = 1; ast_instr(ast, op); } /* * Parse a single line. */ -static int parse_line(struct lexer* lex, struct instr *instr) { +static int parse_line(struct lexer* lex, struct ast *ast) { - instr->opcode = OP_NONE; + int op_set = 0; if (lexer_get_next(lex) < 0) return -1; @@ -128,26 +133,26 @@ static int parse_line(struct lexer* lex, struct instr *instr) { switch(lex->token.type) { case TOKEN_EOI: return -1; case TOKEN_EOL: break; - case TOKEN_OPCODE_NOOP : instr->opcode = OP_NOOP; + case TOKEN_OPCODE_NOOP : ast_instr(ast, OP_NOOP); match_end; break; // Type-R - case TOKEN_OPCODE_ADD : opcode_guard(instr->opcode, OP_ADD); - return match_typeR(&instr->r, lex); + case TOKEN_OPCODE_ADD : opcode_guard(OP_ADD); + return match_typeR(lex, ast); // Type-I - case TOKEN_OPCODE_MOVL : opcode_guard(instr->opcode, OP_MOVL); - case TOKEN_OPCODE_MOVH : opcode_guard(instr->opcode, OP_MOVH); - case TOKEN_OPCODE_JR : opcode_guard(instr->opcode, OP_JR); - case TOKEN_OPCODE_INT : opcode_guard(instr->opcode, OP_INT); - return match_typeI(&instr->i, lex); + case TOKEN_OPCODE_MOVL : opcode_guard(OP_MOVL); + case TOKEN_OPCODE_MOVH : opcode_guard(OP_MOVH); + case TOKEN_OPCODE_JR : opcode_guard(OP_JR); + case TOKEN_OPCODE_INT : opcode_guard(OP_INT); + return match_typeI(lex, ast); // Type-RI - case TOKEN_OPCODE_LD : opcode_guard(instr->opcode, OP_LW); - case TOKEN_OPCODE_SW : opcode_guard(instr->opcode, OP_SW); - case TOKEN_OPCODE_BEQ : opcode_guard(instr->opcode, OP_BEQ); - return match_typeRI(&instr->ri, lex); + case TOKEN_OPCODE_LD : opcode_guard(OP_LW); + case TOKEN_OPCODE_SW : opcode_guard(OP_SW); + case TOKEN_OPCODE_BEQ : opcode_guard(OP_BEQ); + return match_typeRI(lex, ast); // Type-J - case TOKEN_OPCODE_JMP : opcode_guard(instr->opcode, OP_JMP); - return match_typeJ(&instr->j, lex); + case TOKEN_OPCODE_JMP : opcode_guard(OP_JMP); + return match_typeJ(lex, ast); case TOKEN_LABEL_DECL : asm_warn(lex->lineno, "labels are not supported yet. ignoring."); break; @@ -158,17 +163,6 @@ static int parse_line(struct lexer* lex, struct instr *instr) { return 0; } -static int gencode(FILE *fd, struct instr *instructions, int len) { - - uint16_t buf; // 2-bytes (16-bit) per instruction. - - for(int i = 0; i < len; i++) { - instr_encode(instructions + i, &buf); - - fwrite(&buf, sizeof(buf), 1, fd); - } -} - /* * Main parser function. */ @@ -176,23 +170,31 @@ int parse(FILE *source_fd, FILE *dest_fd) { int rc; struct lexer lex; - struct instr instr[256]; - int n = 0; + struct ast ast; + ast_init(&ast); lexer_init(&lex, source_fd); + // Parse and build AST. do { - rc = parse_line(&lex, instr + n); - - if (instr[n].opcode != OP_NONE) { - n++; - if (n >= 256) - // TODO: Dynamic allocs :) - return asm_error(-1, "Oops, parser ran out of memory."); - } + rc = parse_line(&lex, &ast); } while(rc >= 0); - gencode(dest_fd, instr, n); + // TODO: Second pass validation + // make sure all referenced labels are actually defined. + + // Code generation + for(int i = 0; i < ast.instr.size; i += sizeof(struct ast_instr)) { + struct ast_instr *instr = ast.instr.base + i; + uint8_t code[2] = { 0 }; + + codegen_emit(instr, ast.symbols, &code); + + fwrite(&code, sizeof(code), 1, dest_fd); + } + + // Cleanup + ast_free(&ast); return 0; }