asm: adding the parser

2026-06-16 03:44:55 +02:00 · 2018-11-24 19:58:13 +01:00 · 2018-11-24 19:58:13 +01:00 · 3528c467ac
commit 3528c467ac
parent 16501e40be
4 changed files with 254 additions and 17 deletions
--- a/2
+++ b/2
@ -10,7 +10,7 @@ all: $(PROGRAMS)
 m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o
 	$(LD) $(LDFLAGS) -o $@ $^

-as : src/as/as.o src/as/lexer.o
+as : src/as/as.o src/as/parser.o src/as/lexer.o src/as/instr_encode.o src/as/error.o
 	$(LD) $(LDFLAGS) -o $@ $^

 clean :
--- a/src/as/as.c
+++ b/src/as/as.c
@ -19,36 +19,42 @@
 */
 #include <stdio.h>
 #include <stdlib.h>
-#include "lexer.h"
+#include "parser.h"

 int usage(char *program) {

-	fprintf(stderr, "Usage: %s <string>\n", program);
+	fprintf(stderr, "Usage: %s <inputfile> [ <outputfile ]\n", program);
 	return -1;
 }

 int main(int argc, char **argv) {

-	FILE *fd;
-	struct lexer lex;
+	FILE *fd_in;
+	FILE *fd_out = stdout;

 	if (argc < 2)
 		return usage(argv[0]);

-	fd = fopen(argv[1], "r");
-	if (fd == NULL)
+	fd_in = fopen(argv[1], "r");
+	if (fd_in == NULL) {
+		perror("Could not open input file");
 		return -1;
-
-	lexer_init(&lex, fd);
-
-	do {
-		if (lexer_get_next(&lex) < 0)
-			break;
-
-		lexer_print_token(&lex.token);
-	} while(lex.token.type != TOKEN_EOI);
+	}


-	fclose(fd);
+	// If we have a output file.
+	if (argc > 2) {
+		fd_out = fopen(argv[2], "w");
+		if (fd_out == NULL) {
+			perror("Could not open output file");
+			fclose(fd_in);
+			return -1;
+		}
+	}
+
+	parse(fd_in, fd_out);
+
+	fclose(fd_in);
+	fclose(fd_out);
 	return 0;
 }
--- a/src/as/parser.c
+++ b/src/as/parser.c
@ -0,0 +1,198 @@
+/* parser.c
+ *
+ *   Copyright (C) 2012   Henrik Hautakoski <henrik@fiktivkod.org>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include "error.h"
+#include "instr_encode.h"
+#include "lexer.h"
+#include "parser.h"
+
+/*
+ * Helper functions/macros for defining parser rules.
+ */
+
+// match the next token.
+// returns 0 if the token was of the correct type. -1 otherwise
+static int match_type(struct lexer* lex, enum token_type type) {
+
+	lexer_get_next(lex);
+	return lex->token.type == type ? 0 : -1;
+}
+
+// Same as match_type() but extracts a number from the token.
+static int match_type_num(struct lexer* lex, enum token_type type,
+			void* out, size_t size) {
+
+	if (match_type(lex, type) < 0)
+		return -1;
+
+	memcpy(out, &lex->token.value.n, size);
+	return 0;
+}
+
+/*
+ * Helper macros for matching tokens.
+ */
+#define match_reg(pos, out) \
+	if (match_type_num(lex, TOKEN_REG, out, sizeof(uint8_t))) \
+		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
+
+#define match_imm(pos, out)  \
+	if (match_type_num(lex, TOKEN_NUMBER, out, sizeof(int16_t)) < 0) \
+		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
+
+#define match_arg(pos) \
+	if (match_type(lex, TOKEN_ARG_SEP) < 0) \
+		return asm_error((lex)->lineno, "Expected separator after argument %i", pos)
+
+#define match_end \
+	if (match_type(lex, TOKEN_EOL) < 0) \
+		return asm_error(lex->lineno, "Expected newline")
+
+/*
+ * Functions for matching complete instructions.
+ */
+
+// R-Type (rs : u8, r0 : u8, r1 : u8)
+static int match_typeR(struct instr_R *instr, struct lexer* lex) {
+
+	match_reg(1, &instr->rs); match_arg(1);
+	match_reg(2, &instr->r0); match_arg(2);
+	match_reg(3, &instr->r1);
+	match_end;
+
+	return 0;
+}
+
+// RI-Type (rs : u8, r0 : u8, offset : s8)
+static int match_typeRI(struct instr_RI *instr, struct lexer* lex) {
+
+	match_reg(1, &instr->rs); match_arg(1);
+	match_reg(2, &instr->r0); match_arg(2);
+	match_imm(3, &instr->offset);
+	match_end;
+
+	return 1;
+}
+
+// I-Type (rs : u8, imm : s8)
+static int match_typeI(struct instr_I *instr, struct lexer* lex) {
+
+	match_reg(1, &instr->rs); match_arg(1);
+	match_imm(2, &instr->imm);
+	match_end;
+
+	return 1;
+}
+
+// J-Type (rs : u8, addr : u16)
+static int match_typeJ(struct instr_J *instr, struct lexer* lex) {
+
+	match_imm(1, &instr->addr);
+	match_end;
+	return 1;
+}
+
+#define opcode_guard(op, v) \
+	if (op == OP_NONE) op = v
+
+/*
+ * Parse a single line.
+ */
+static int parse_line(struct lexer* lex, struct instr *instr) {
+
+	instr->opcode = OP_NONE;
+
+	if (lexer_get_next(lex) < 0)
+		return -1;
+
+	// Opcode should come first.
+	switch(lex->token.type) {
+	case TOKEN_EOI: return -1;
+	case TOKEN_EOL: break;
+	case TOKEN_OPCODE_NOOP : instr->opcode = OP_NOOP;
+		match_end;
+		break;
+	// Type-R
+	case TOKEN_OPCODE_ADD : opcode_guard(instr->opcode, OP_ADD);
+		return match_typeR(&instr->r, lex);
+	// Type-I
+	case TOKEN_OPCODE_MOVL : opcode_guard(instr->opcode, OP_MOVL);
+	case TOKEN_OPCODE_MOVH : opcode_guard(instr->opcode, OP_MOVH);
+	case TOKEN_OPCODE_JR   : opcode_guard(instr->opcode, OP_JR);
+	case TOKEN_OPCODE_INT  : opcode_guard(instr->opcode, OP_INT);
+		return match_typeI(&instr->i, lex);
+	// Type-RI
+	case TOKEN_OPCODE_LD  : opcode_guard(instr->opcode, OP_LW);
+	case TOKEN_OPCODE_SW  : opcode_guard(instr->opcode, OP_SW);
+	case TOKEN_OPCODE_BEQ : opcode_guard(instr->opcode, OP_BEQ);
+		return match_typeRI(&instr->ri, lex);
+	// Type-J
+	case TOKEN_OPCODE_JMP : opcode_guard(instr->opcode, OP_JMP);
+		return match_typeJ(&instr->j, lex);
+	case TOKEN_LABEL_DECL :
+		asm_warn(lex->lineno, "labels are not supported yet. ignoring.");
+		break;
+	default:
+		return asm_error(lex->lineno, "Opcode or label expected");
+	}
+
+	return 0;
+}
+
+static int gencode(FILE *fd, struct instr *instructions, int len) {
+
+	uint16_t buf; // 2-bytes (16-bit) per instruction.
+
+	for(int i = 0; i < len; i++) {
+		instr_encode(instructions + i, &buf);
+
+		fwrite(&buf, sizeof(buf), 1, fd);
+	}
+}
+
+/*
+ * Main parser function.
+ */
+int parse(FILE *source_fd, FILE *dest_fd) {
+
+	int rc;
+	struct lexer lex;
+	struct instr instr[256];
+	int n = 0;
+
+	lexer_init(&lex, source_fd);
+
+	do {
+		rc = parse_line(&lex, instr + n);
+
+		if (instr[n].opcode != OP_NONE) {
+			n++;
+			if (n >= 256)
+				// TODO: Dynamic allocs :)
+				return asm_error(-1, "Oops, parser ran out of memory.");
+		}
+	} while(rc >= 0);
+
+	gencode(dest_fd, instr, n);
+
+	return 0;
+}
--- a/src/as/parser.h
+++ b/src/as/parser.h
@ -0,0 +1,33 @@
+/* parser.h
+ *
+ *   Copyright (C) 2012   Henrik Hautakoski <henrik@fiktivkod.org>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#ifndef ASM_PARSER_H
+#define ASM_PARSER_H
+
+#include <instr.h>
+#include <stdio.h>
+
+struct parse_state {
+	struct instr *instr_ptr;
+	unsigned int num;
+};
+
+int parse(FILE *source_fd, FILE *dest_fd);
+
+#endif /* ASM_PARSER_H */