asm: build AST in parser.

2026-06-16 03:44:55 +02:00 · 2018-12-10 10:09:31 +01:00 · 2018-12-10 10:09:31 +01:00 · 7646d63736
commit 7646d63736
parent 96220537de
2 changed files with 67 additions and 63 deletions
--- a/4
+++ b/4
@ -10,7 +10,9 @@ all: $(PROGRAMS)
 m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o
 	$(LD) $(LDFLAGS) -o $@ $^
-as : src/as/as.o src/as/parser.o src/as/lexer.o src/as/instr_encode.o src/as/error.o
+as : src/as/as.o src/as/parser.o src/as/lexer.o \
 	src/as/codegen.o src/as/error.o src/as/symtab.o \
 	src/as/ast.o lib/libm16.a
 	$(LD) $(LDFLAGS) -o $@ $^
 lib/libm16.a : lib/src/vector.o
--- a/src/as/parser.c
+++ b/src/as/parser.c
@ -21,7 +21,8 @@
 #include <stdio.h>
 #include <string.h>
 #include "error.h"
-#include "instr_encode.h"
+#include "codegen.h"
 #include "ast.h"
 #include "lexer.h"
 #include "parser.h"
@ -37,26 +38,30 @@ static int match_type(struct lexer* lex, enum token_type type) {
 	return lex->token.type == type ? 0 : -1;
 }
-// Same as match_type() but extracts a number from the token.
+// Same as match_type() but also generates a operand in the AST.
-static int match_type_num(struct lexer* lex, enum token_type type,
+static int match_operand(struct lexer* lex, enum token_type type, struct ast *ast) {
 			void* out, size_t size) {
 	if (match_type(lex, type) < 0)
 		return -1;
-	memcpy(out, &lex->token.value.n, size);
+	if (type == TOKEN_REG) {
 		ast_instr_operand(ast, DATATYPE_REGISTER, lex->token.value.n);
 	} else {
 		ast_instr_operand(ast, DATATYPE_NUMBER, lex->token.value.n);
 	}
 	return 0;
 }
 /*
 * Helper macros for matching tokens.
 */
-#define match_reg(pos, out) \
+#define match_reg(pos, ast) \
-	if (match_type_num(lex, TOKEN_REG, out, sizeof(uint8_t))) \
+	if (match_operand(lex, TOKEN_REG, ast) < 0) \
 		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
-#define match_imm(pos, out)  \
+#define match_imm(pos, ast)  \
-	if (match_type_num(lex, TOKEN_NUMBER, out, sizeof(int16_t)) < 0) \
+	if (match_operand(lex, TOKEN_NUMBER, ast) < 0) \
 		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
 #define match_arg(pos) \
@ -72,54 +77,54 @@ static int match_type_num(struct lexer* lex, enum token_type type,
 */
 // R-Type (rs : u8, r0 : u8, r1 : u8)
-static int match_typeR(struct instr_R *instr, struct lexer* lex) {
+static int match_typeR(struct lexer* lex, struct ast *ast) {
-	match_reg(1, &instr->rs); match_arg(1);
+	match_reg(1, ast); match_arg(1);
-	match_reg(2, &instr->r0); match_arg(2);
+	match_reg(2, ast); match_arg(2);
-	match_reg(3, &instr->r1);
+	match_reg(3, ast);
 	match_end;
 	return 0;
 }
 // RI-Type (rs : u8, r0 : u8, offset : s8)
-static int match_typeRI(struct instr_RI *instr, struct lexer* lex) {
+static int match_typeRI(struct lexer* lex, struct ast *ast) {
-	match_reg(1, &instr->rs); match_arg(1);
+	match_reg(1, ast); match_arg(1);
-	match_reg(2, &instr->r0); match_arg(2);
+	match_reg(2, ast); match_arg(2);
-	match_imm(3, &instr->offset);
+	match_imm(3, ast);
 	match_end;
 	return 1;
 }
 // I-Type (rs : u8, imm : s8)
-static int match_typeI(struct instr_I *instr, struct lexer* lex) {
+static int match_typeI(struct lexer* lex, struct ast *ast) {
-	match_reg(1, &instr->rs); match_arg(1);
+	match_reg(1, ast); match_arg(1);
-	match_imm(2, &instr->imm);
+	match_imm(2, ast);
 	match_end;
 	return 1;
 }
 // J-Type (rs : u8, addr : u16)
-static int match_typeJ(struct instr_J *instr, struct lexer* lex) {
+static int match_typeJ(struct lexer* lex, struct ast *ast) {
-	match_imm(1, &instr->addr);
+	match_imm(1, ast);
 	match_end;
 	return 1;
 }
-#define opcode_guard(op, v) \
+#define opcode_guard(op) \
-	if (op == OP_NONE) op = v
+	if (op_set == 0) { op_set = 1; ast_instr(ast, op); }
 /*
 * Parse a single line.
 */
-static int parse_line(struct lexer* lex, struct instr *instr) {
+static int parse_line(struct lexer* lex, struct ast *ast) {
-	instr->opcode = OP_NONE;
+	int op_set = 0;
 	if (lexer_get_next(lex) < 0)
 		return -1;
@ -128,26 +133,26 @@ static int parse_line(struct lexer* lex, struct instr *instr) {
 	switch(lex->token.type) {
 	case TOKEN_EOI: return -1;
 	case TOKEN_EOL: break;
-	case TOKEN_OPCODE_NOOP : instr->opcode = OP_NOOP;
+	case TOKEN_OPCODE_NOOP : ast_instr(ast, OP_NOOP);
 		match_end;
 		break;
 	// Type-R
-	case TOKEN_OPCODE_ADD : opcode_guard(instr->opcode, OP_ADD);
+	case TOKEN_OPCODE_ADD : opcode_guard(OP_ADD);
-		return match_typeR(&instr->r, lex);
+		return match_typeR(lex, ast);
 	// Type-I
-	case TOKEN_OPCODE_MOVL : opcode_guard(instr->opcode, OP_MOVL);
+	case TOKEN_OPCODE_MOVL : opcode_guard(OP_MOVL);
-	case TOKEN_OPCODE_MOVH : opcode_guard(instr->opcode, OP_MOVH);
+	case TOKEN_OPCODE_MOVH : opcode_guard(OP_MOVH);
-	case TOKEN_OPCODE_JR   : opcode_guard(instr->opcode, OP_JR);
+	case TOKEN_OPCODE_JR   : opcode_guard(OP_JR);
-	case TOKEN_OPCODE_INT  : opcode_guard(instr->opcode, OP_INT);
+	case TOKEN_OPCODE_INT  : opcode_guard(OP_INT);
-		return match_typeI(&instr->i, lex);
+		return match_typeI(lex, ast);
 	// Type-RI
-	case TOKEN_OPCODE_LD  : opcode_guard(instr->opcode, OP_LW);
+	case TOKEN_OPCODE_LD  : opcode_guard(OP_LW);
-	case TOKEN_OPCODE_SW  : opcode_guard(instr->opcode, OP_SW);
+	case TOKEN_OPCODE_SW  : opcode_guard(OP_SW);
-	case TOKEN_OPCODE_BEQ : opcode_guard(instr->opcode, OP_BEQ);
+	case TOKEN_OPCODE_BEQ : opcode_guard(OP_BEQ);
-		return match_typeRI(&instr->ri, lex);
+		return match_typeRI(lex, ast);
 	// Type-J
-	case TOKEN_OPCODE_JMP : opcode_guard(instr->opcode, OP_JMP);
+	case TOKEN_OPCODE_JMP : opcode_guard(OP_JMP);
-		return match_typeJ(&instr->j, lex);
+		return match_typeJ(lex, ast);
 	case TOKEN_LABEL_DECL :
 		asm_warn(lex->lineno, "labels are not supported yet. ignoring.");
 		break;
@ -158,17 +163,6 @@ static int parse_line(struct lexer* lex, struct instr *instr) {
 	return 0;
 }
 static int gencode(FILE *fd, struct instr *instructions, int len) {
 	uint16_t buf; // 2-bytes (16-bit) per instruction.
 	for(int i = 0; i < len; i++) {
 		instr_encode(instructions + i, &buf);
 		fwrite(&buf, sizeof(buf), 1, fd);
 	}
 }
 /*
 * Main parser function.
 */
@ -176,23 +170,31 @@ int parse(FILE *source_fd, FILE *dest_fd) {
 	int rc;
 	struct lexer lex;
-	struct instr instr[256];
+	struct ast ast;
 	int n = 0;
 	ast_init(&ast);
 	lexer_init(&lex, source_fd);
 	// Parse and build AST.
 	do {
-		rc = parse_line(&lex, instr + n);
+		rc = parse_line(&lex, &ast);
 		if (instr[n].opcode != OP_NONE) {
 			n++;
 			if (n >= 256)
 				// TODO: Dynamic allocs :)
 				return asm_error(-1, "Oops, parser ran out of memory.");
 		}
 	} while(rc >= 0);
-	gencode(dest_fd, instr, n);
+	// TODO: Second pass validation
 	// make sure all referenced labels are actually defined.
 	// Code generation
 	for(int i = 0; i < ast.instr.size; i += sizeof(struct ast_instr)) {
 		struct ast_instr *instr = ast.instr.base + i;
 		uint8_t code[2] = { 0 };
 		codegen_emit(instr, ast.symbols, &code);
 		fwrite(&code, sizeof(code), 1, dest_fd);
 	}
 	// Cleanup
 	ast_free(&ast);
 	return 0;
 }