1
0
Fork 0
mirror of https://github.com/pnx/m16vm synced 2026-06-16 03:44:55 +02:00

asm: build AST in parser.

This commit is contained in:
Henrik Hautakoski 2018-12-10 10:09:31 +01:00
parent 96220537de
commit 7646d63736
No known key found for this signature in database
GPG key ID: 839F3A7EAFAEAFAA
2 changed files with 67 additions and 63 deletions

View file

@ -10,7 +10,9 @@ all: $(PROGRAMS)
m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o
$(LD) $(LDFLAGS) -o $@ $^ $(LD) $(LDFLAGS) -o $@ $^
as : src/as/as.o src/as/parser.o src/as/lexer.o src/as/instr_encode.o src/as/error.o as : src/as/as.o src/as/parser.o src/as/lexer.o \
src/as/codegen.o src/as/error.o src/as/symtab.o \
src/as/ast.o lib/libm16.a
$(LD) $(LDFLAGS) -o $@ $^ $(LD) $(LDFLAGS) -o $@ $^
lib/libm16.a : lib/src/vector.o lib/libm16.a : lib/src/vector.o

View file

@ -21,7 +21,8 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "error.h" #include "error.h"
#include "instr_encode.h" #include "codegen.h"
#include "ast.h"
#include "lexer.h" #include "lexer.h"
#include "parser.h" #include "parser.h"
@ -37,26 +38,30 @@ static int match_type(struct lexer* lex, enum token_type type) {
return lex->token.type == type ? 0 : -1; return lex->token.type == type ? 0 : -1;
} }
// Same as match_type() but extracts a number from the token. // Same as match_type() but also generates a operand in the AST.
static int match_type_num(struct lexer* lex, enum token_type type, static int match_operand(struct lexer* lex, enum token_type type, struct ast *ast) {
void* out, size_t size) {
if (match_type(lex, type) < 0) if (match_type(lex, type) < 0)
return -1; return -1;
memcpy(out, &lex->token.value.n, size); if (type == TOKEN_REG) {
ast_instr_operand(ast, DATATYPE_REGISTER, lex->token.value.n);
} else {
ast_instr_operand(ast, DATATYPE_NUMBER, lex->token.value.n);
}
return 0; return 0;
} }
/* /*
* Helper macros for matching tokens. * Helper macros for matching tokens.
*/ */
#define match_reg(pos, out) \ #define match_reg(pos, ast) \
if (match_type_num(lex, TOKEN_REG, out, sizeof(uint8_t))) \ if (match_operand(lex, TOKEN_REG, ast) < 0) \
return asm_error((lex)->lineno, "Expected number at argument %i", pos) return asm_error((lex)->lineno, "Expected number at argument %i", pos)
#define match_imm(pos, out) \ #define match_imm(pos, ast) \
if (match_type_num(lex, TOKEN_NUMBER, out, sizeof(int16_t)) < 0) \ if (match_operand(lex, TOKEN_NUMBER, ast) < 0) \
return asm_error((lex)->lineno, "Expected number at argument %i", pos) return asm_error((lex)->lineno, "Expected number at argument %i", pos)
#define match_arg(pos) \ #define match_arg(pos) \
@ -72,54 +77,54 @@ static int match_type_num(struct lexer* lex, enum token_type type,
*/ */
// R-Type (rs : u8, r0 : u8, r1 : u8) // R-Type (rs : u8, r0 : u8, r1 : u8)
static int match_typeR(struct instr_R *instr, struct lexer* lex) { static int match_typeR(struct lexer* lex, struct ast *ast) {
match_reg(1, &instr->rs); match_arg(1); match_reg(1, ast); match_arg(1);
match_reg(2, &instr->r0); match_arg(2); match_reg(2, ast); match_arg(2);
match_reg(3, &instr->r1); match_reg(3, ast);
match_end; match_end;
return 0; return 0;
} }
// RI-Type (rs : u8, r0 : u8, offset : s8) // RI-Type (rs : u8, r0 : u8, offset : s8)
static int match_typeRI(struct instr_RI *instr, struct lexer* lex) { static int match_typeRI(struct lexer* lex, struct ast *ast) {
match_reg(1, &instr->rs); match_arg(1); match_reg(1, ast); match_arg(1);
match_reg(2, &instr->r0); match_arg(2); match_reg(2, ast); match_arg(2);
match_imm(3, &instr->offset); match_imm(3, ast);
match_end; match_end;
return 1; return 1;
} }
// I-Type (rs : u8, imm : s8) // I-Type (rs : u8, imm : s8)
static int match_typeI(struct instr_I *instr, struct lexer* lex) { static int match_typeI(struct lexer* lex, struct ast *ast) {
match_reg(1, &instr->rs); match_arg(1); match_reg(1, ast); match_arg(1);
match_imm(2, &instr->imm); match_imm(2, ast);
match_end; match_end;
return 1; return 1;
} }
// J-Type (rs : u8, addr : u16) // J-Type (rs : u8, addr : u16)
static int match_typeJ(struct instr_J *instr, struct lexer* lex) { static int match_typeJ(struct lexer* lex, struct ast *ast) {
match_imm(1, &instr->addr); match_imm(1, ast);
match_end; match_end;
return 1; return 1;
} }
#define opcode_guard(op, v) \ #define opcode_guard(op) \
if (op == OP_NONE) op = v if (op_set == 0) { op_set = 1; ast_instr(ast, op); }
/* /*
* Parse a single line. * Parse a single line.
*/ */
static int parse_line(struct lexer* lex, struct instr *instr) { static int parse_line(struct lexer* lex, struct ast *ast) {
instr->opcode = OP_NONE; int op_set = 0;
if (lexer_get_next(lex) < 0) if (lexer_get_next(lex) < 0)
return -1; return -1;
@ -128,26 +133,26 @@ static int parse_line(struct lexer* lex, struct instr *instr) {
switch(lex->token.type) { switch(lex->token.type) {
case TOKEN_EOI: return -1; case TOKEN_EOI: return -1;
case TOKEN_EOL: break; case TOKEN_EOL: break;
case TOKEN_OPCODE_NOOP : instr->opcode = OP_NOOP; case TOKEN_OPCODE_NOOP : ast_instr(ast, OP_NOOP);
match_end; match_end;
break; break;
// Type-R // Type-R
case TOKEN_OPCODE_ADD : opcode_guard(instr->opcode, OP_ADD); case TOKEN_OPCODE_ADD : opcode_guard(OP_ADD);
return match_typeR(&instr->r, lex); return match_typeR(lex, ast);
// Type-I // Type-I
case TOKEN_OPCODE_MOVL : opcode_guard(instr->opcode, OP_MOVL); case TOKEN_OPCODE_MOVL : opcode_guard(OP_MOVL);
case TOKEN_OPCODE_MOVH : opcode_guard(instr->opcode, OP_MOVH); case TOKEN_OPCODE_MOVH : opcode_guard(OP_MOVH);
case TOKEN_OPCODE_JR : opcode_guard(instr->opcode, OP_JR); case TOKEN_OPCODE_JR : opcode_guard(OP_JR);
case TOKEN_OPCODE_INT : opcode_guard(instr->opcode, OP_INT); case TOKEN_OPCODE_INT : opcode_guard(OP_INT);
return match_typeI(&instr->i, lex); return match_typeI(lex, ast);
// Type-RI // Type-RI
case TOKEN_OPCODE_LD : opcode_guard(instr->opcode, OP_LW); case TOKEN_OPCODE_LD : opcode_guard(OP_LW);
case TOKEN_OPCODE_SW : opcode_guard(instr->opcode, OP_SW); case TOKEN_OPCODE_SW : opcode_guard(OP_SW);
case TOKEN_OPCODE_BEQ : opcode_guard(instr->opcode, OP_BEQ); case TOKEN_OPCODE_BEQ : opcode_guard(OP_BEQ);
return match_typeRI(&instr->ri, lex); return match_typeRI(lex, ast);
// Type-J // Type-J
case TOKEN_OPCODE_JMP : opcode_guard(instr->opcode, OP_JMP); case TOKEN_OPCODE_JMP : opcode_guard(OP_JMP);
return match_typeJ(&instr->j, lex); return match_typeJ(lex, ast);
case TOKEN_LABEL_DECL : case TOKEN_LABEL_DECL :
asm_warn(lex->lineno, "labels are not supported yet. ignoring."); asm_warn(lex->lineno, "labels are not supported yet. ignoring.");
break; break;
@ -158,17 +163,6 @@ static int parse_line(struct lexer* lex, struct instr *instr) {
return 0; return 0;
} }
static int gencode(FILE *fd, struct instr *instructions, int len) {
uint16_t buf; // 2-bytes (16-bit) per instruction.
for(int i = 0; i < len; i++) {
instr_encode(instructions + i, &buf);
fwrite(&buf, sizeof(buf), 1, fd);
}
}
/* /*
* Main parser function. * Main parser function.
*/ */
@ -176,23 +170,31 @@ int parse(FILE *source_fd, FILE *dest_fd) {
int rc; int rc;
struct lexer lex; struct lexer lex;
struct instr instr[256]; struct ast ast;
int n = 0;
ast_init(&ast);
lexer_init(&lex, source_fd); lexer_init(&lex, source_fd);
// Parse and build AST.
do { do {
rc = parse_line(&lex, instr + n); rc = parse_line(&lex, &ast);
if (instr[n].opcode != OP_NONE) {
n++;
if (n >= 256)
// TODO: Dynamic allocs :)
return asm_error(-1, "Oops, parser ran out of memory.");
}
} while(rc >= 0); } while(rc >= 0);
gencode(dest_fd, instr, n); // TODO: Second pass validation
// make sure all referenced labels are actually defined.
// Code generation
for(int i = 0; i < ast.instr.size; i += sizeof(struct ast_instr)) {
struct ast_instr *instr = ast.instr.base + i;
uint8_t code[2] = { 0 };
codegen_emit(instr, ast.symbols, &code);
fwrite(&code, sizeof(code), 1, dest_fd);
}
// Cleanup
ast_free(&ast);
return 0; return 0;
} }