From 377c008ebeb5894586397eab9ca7e059bb735b67 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Sat, 10 Nov 2018 12:05:10 +0100
Subject: [PATCH 01/32] asm: lexer implementation.

---
 .gitignore     |   1 +
 Makefile       |  14 +++-
 src/as/as.c    |  54 ++++++++++++
 src/as/lexer.c | 221 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/as/lexer.h |  95 +++++++++++++++++++++
 5 files changed, 381 insertions(+), 4 deletions(-)
 create mode 100644 src/as/as.c
 create mode 100644 src/as/lexer.c
 create mode 100644 src/as/lexer.h
diff --git a/.gitignore b/.gitignore
index 563e7eb..b979155 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 *.o
 m16vm
+/as
diff --git a/Makefile b/Makefile
index 422bd6f..45518e7 100644
--- a/Makefile
+++ b/Makefile
@@ -3,13 +3,19 @@ CC = gcc
 CFLAGS = -Iinclude -DMEM_SIZE=32 -DM16_DEBUG_MEM
 LD = $(CC)
 
-VM = m16vm
+PROGRAMS = m16vm as
 
-$(VM) : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o
-	$(LD) $(LDFLAGS)-o $@ $^
+all: $(PROGRAMS)
+
+m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o
+	$(LD) $(LDFLAGS) -o $@ $^
+
+as : src/as/as.o src/as/lexer.o
+	$(LD) $(LDFLAGS) -o $@ $^
 
 clean :
 	$(RM) src/*.o
+	$(RM) src/as/*.o
 
 distclean : clean
-	$(RM) $(VM)
+	$(RM) $(PROGRAMS)
diff --git a/src/as/as.c b/src/as/as.c
new file mode 100644
index 0000000..3e2052e
--- /dev/null
+++ b/src/as/as.c
@@ -0,0 +1,54 @@
+/* as.c
+ *
+ *   Copyright (C) 2012   Henrik Hautakoski <henrik@fiktivkod.org>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include "lexer.h"
+
+int usage(char *program) {
+
+	fprintf(stderr, "Usage: %s <string>\n", program);
+	return -1;
+}
+
+int main(int argc, char **argv) {
+
+	FILE *fd;
+	struct lexer lex;
+
+	if (argc < 2)
+		return usage(argv[0]);
+
+	fd = fopen(argv[1], "r");
+	if (fd == NULL)
+		return -1;
+
+	lexer_init(&lex, fd);
+
+	do {
+		if (lexer_get_next(&lex) < 0)
+			break;
+
+		lexer_print_token(&lex.token);
+	} while(lex.token.type != TOKEN_EOI);
+
+
+	fclose(fd);
+	return 0;
+}
diff --git a/src/as/lexer.c b/src/as/lexer.c
new file mode 100644
index 0000000..7a1f7aa
--- /dev/null
+++ b/src/as/lexer.c
@@ -0,0 +1,221 @@
+
+#include <stdio.h>
+#include <string.h>
+#include "lexer.h"
+
+/**
+ * macros for the grammar.
+ */
+
+// Numbers is defined as [0-9]
+#define number(x) ((x) >= '0' && (x) <= '9')
+
+// The first digit can however also contain '-'
+#define first_number(x) (number(x) || (x) == '-' )
+
+// First character in strings can be [a-z][A-Z] or '_'
+#define first_string(x)			\
+	(  ((x) >= 'a' && (x) <= 'z')   \
+	|| ((x) >= 'A' && (x) <= 'Z')	\
+	||  (x) == '_'			)
+
+// All characters after can also include numbers or ':'
+#define string(x) \
+	(first_string(x) || number(x))
+
+#define space(x) ((x) == ' ' || (x) == '\t' || (x) == '\r')
+
+
+/**
+ * Helper functions
+ */
+
+static int read_next(struct lexer *lex) {
+
+ 	int c, comment = 0;
+
+ 	while((c = fgetc(lex->fp)) != EOF) {
+
+ 		if (c == '\n')
+			break;
+
+ 		if (comment)
+ 			continue;
+
+ 		if (c == ';') {
+ 			comment = 1;
+ 		} else if (!space(c)) {
+ 			break;
+ 		}
+ 	}
+ 	return c;
+ }
+
+static int read_number(FILE *fp) {
+
+	int c, neg = 0, val = 0;
+	while((c = fgetc(fp)) != EOF) {
+		if (neg == 0 && c == '-') {
+			neg = 1;
+			continue;
+		}
+		if (!number(c)) {
+			ungetc(c, fp);
+			break;
+		}
+		val = (val * 10) + (c - '0');
+	}
+
+	if (neg)
+		return -1 * val;
+	return val;
+}
+
+static int read_string(FILE *fp) {
+
+	int c, label_decl = 0, i = 0;
+	char buf[64];
+
+	while((c = fgetc(fp)) != EOF && i < 64) {
+
+		if (string(c)) {
+			buf[i++] = c;
+		} else {
+			if (c == ':') {
+				label_decl = 1;
+			} else {
+				ungetc(c, fp);
+			}
+			break;
+		}
+	}
+	buf[i] = '\0';
+
+	if (label_decl) {
+		return TOKEN_LABEL_DECL;
+	} else if (!strcmp("noop", buf)) {
+		return TOKEN_OPCODE_NOOP;
+	} else if (!strcmp("add", buf)) {
+		return TOKEN_OPCODE_ADD;
+	} else if (!strcmp("movl", buf)) {
+		return TOKEN_OPCODE_MOVL;
+	} else if (!strcmp("movh", buf)) {
+		return TOKEN_OPCODE_MOVH;
+	} else if (!strcmp("ld", buf)) {
+		return TOKEN_OPCODE_LD;
+	} else if (!strcmp("sw", buf)) {
+		return TOKEN_OPCODE_SW;
+	} else if (!strcmp("beq", buf)) {
+		return TOKEN_OPCODE_BEQ;
+	} else if (!strcmp("jmp", buf)) {
+		return TOKEN_OPCODE_JMP;
+	} else if (!strcmp("jr", buf)) {
+		return TOKEN_OPCODE_JR;
+	} else if (!strcmp("int", buf)) {
+		return TOKEN_OPCODE_INT;
+	}
+	return TOKEN_LABEL;
+}
+
+/**
+ * Exposed functions
+ */
+
+void lexer_init(struct lexer *lex, FILE *fp) {
+
+	lex->lineno = 1;
+	lex->fp = fp;
+	lex->token.type = TOKEN_EOI;
+}
+
+int lexer_get_next(struct lexer *lex) {
+
+	uint16_t num;
+	int ch = read_next(lex);
+
+	if (lex->token.type == TOKEN_EOL)
+		lex->lineno++;
+
+	switch(ch) {
+	case EOF : lex->token.type = TOKEN_EOI;
+		break;
+	case '\n' :
+		lex->token.type = TOKEN_EOL;
+		break;
+	case ',' : lex->token.type = TOKEN_ARG_SEP;
+		break;
+	case '$' :
+		lex->token.type = TOKEN_REG;
+		num = read_number(lex->fp);
+		// Registers is 8-bit only.
+		if (num > 0xF) {
+			fprintf(stderr, "ERROR: Invalid register value '%i' on line: %i\n", num, lex->lineno);
+			return -1;
+		}
+		lex->token.value.n = num;
+		break;
+	default:
+		if (first_number(ch)) {
+			ungetc(ch, lex->fp);
+			lex->token.type = TOKEN_NUMBER;
+			lex->token.value.n = read_number(lex->fp);
+		} else if (first_string(ch)) {
+			ungetc(ch, lex->fp);
+			lex->token.type = read_string(lex->fp);
+		} else {
+			fprintf(stderr, "ERROR: Invalid character '%c' on line: %i\n", ch, lex->lineno);
+			return -1;
+		}
+	}
+
+	lex->token.lineno = lex->lineno;
+	return 0;
+}
+
+void lexer_print_token(struct token *token) {
+
+	static int lineno = 0;
+
+	if (token->lineno != lineno) {
+		lineno = token->lineno;
+		printf("\n%i: ", lineno);
+	}
+
+	switch(token->type) {
+	case TOKEN_OPCODE_NOOP : printf(" [OP NOOP] ");
+		break;
+	case TOKEN_OPCODE_ADD : printf(" [OP ADD] ");
+		break;
+	case TOKEN_OPCODE_MOVL : printf(" [OP MOVL] ");
+		break;
+	case TOKEN_OPCODE_MOVH : printf(" [OP MOVH] ");
+		break;
+	case TOKEN_OPCODE_LD : printf(" [OP LD] ");
+		break;
+	case TOKEN_OPCODE_SW : printf(" [OP SW] ");
+		break;
+	case TOKEN_OPCODE_BEQ : printf(" [OP BEQ] ");
+		break;
+	case TOKEN_OPCODE_JMP : printf(" [OP JMP] ");
+		break;
+	case TOKEN_OPCODE_JR : printf(" [OP JR] ");
+		break;
+	case TOKEN_OPCODE_INT : printf(" [OP INT] ");
+		break;
+	case TOKEN_LABEL : printf(" [LABEL] ");
+		break;
+	case TOKEN_LABEL_DECL : printf(" [LABEL DECL] ");
+		break;
+	case TOKEN_REG : printf(" [REG %i] ", token->value.n);
+		break;
+	case TOKEN_ARG_SEP : printf(" [SEP] ");
+		break;
+	case TOKEN_NUMBER : printf(" [NUM %i] ", token->value.n);
+		break;
+	case TOKEN_EOI : printf(" [EOI] ");
+		break;
+	case TOKEN_EOL : printf(" [EOL] ");
+		break;
+	default: printf(" [U] ");
+	}
+}
diff --git a/src/as/lexer.h b/src/as/lexer.h
new file mode 100644
index 0000000..e2a5319
--- /dev/null
+++ b/src/as/lexer.h
@@ -0,0 +1,95 @@
+/* lexer.h
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik@fiktivkod.org>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#ifndef ASM_LEXER_H
+#define ASM_LEXER_H
+
+#include <stdlib.h>
+#include <stdint.h>
+
+/**
+ * All token types.
+ */
+enum token_type {
+	TOKEN_EOI = -1,
+	TOKEN_EOL,		// Newline
+	TOKEN_OPCODE_NOOP,
+	TOKEN_OPCODE_ADD,
+	TOKEN_OPCODE_MOVL,
+	TOKEN_OPCODE_MOVH,
+	TOKEN_OPCODE_LD,
+	TOKEN_OPCODE_SW,
+	TOKEN_OPCODE_BEQ,
+	TOKEN_OPCODE_JMP,
+	TOKEN_OPCODE_JR,
+	TOKEN_OPCODE_INT,
+	TOKEN_LABEL,
+	TOKEN_LABEL_DECL,
+	TOKEN_REG,
+	TOKEN_NUMBER,
+	TOKEN_ARG_SEP
+};
+
+/**
+ * Token structure.
+ *
+ * Holds information about a single token.
+ */
+struct token {
+	// Line number where the token was extracted from.
+	uint16_t 	lineno;
+
+	enum token_type	type;
+
+	/*
+	 * Token value, depending on type
+	 * this can be a string or unsigned short
+	 */
+	union {
+		uint16_t n;
+		char	 s[32];
+	} value;
+};
+
+/**
+ * Lexer state
+ */
+struct lexer {
+	uint16_t 	lineno;		// current line number
+	FILE *		fp;		// File being lexed.
+	struct token 	token;		// Current token
+};
+
+/**
+ * Initialize the lexer with a file pointer to the file
+ * that should be lexed.
+ */
+void lexer_init(struct lexer *lex, FILE *fp);
+
+/**
+ * Advance the lexer to the next token.
+ */
+int lexer_get_next(struct lexer *lex);
+
+/**
+ * For debugging, prints the token to standard output.
+ */
+void lexer_print_token(struct token *token);
+
+#endif /* ASM_LEXER_H */

From d530dfda63ced48402e58849450fa0b04263ae61 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Sat, 24 Nov 2018 17:45:20 +0100
Subject: [PATCH 02/32] include/instr.h: adding a special OP_NONE opcode.

---
 include/instr.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/instr.h b/include/instr.h
index 4f776e5..4dfe36c 100644
--- a/include/instr.h
+++ b/include/instr.h
@@ -33,6 +33,7 @@
 #define OP_JMP  	7
 #define OP_JR   	8
 #define OP_INT 		15
+#define OP_NONE		0xFF
 
 /* Register type */
 struct instr_R {

From 95f2e38c2d54c5be37c54386831138e9eded144f Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Sat, 24 Nov 2018 19:57:10 +0100
Subject: [PATCH 03/32] asm: adding error module

---
 src/as/error.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 src/as/error.h | 27 +++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 src/as/error.c
 create mode 100644 src/as/error.h

diff --git a/src/as/error.c b/src/as/error.c
new file mode 100644
index 0000000..704658b
--- /dev/null
+++ b/src/as/error.c
@@ -0,0 +1,46 @@
+/* error.c
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik.hautakoski@gmail.com>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#include <stdio.h>
+#include <stdarg.h>
+#include "error.h"
+
+void asm_warn(int lineno, const char *fmt, ...) {
+
+	va_list vl;
+
+	fprintf(stderr, "Line %i: Warning: ", lineno);
+	va_start(vl, fmt);
+	vfprintf(stderr, fmt, vl);
+	va_end(vl);
+	fprintf(stderr, "\n");
+}
+
+int asm_error(int lineno, const char *fmt, ...) {
+
+	va_list vl;
+
+	fprintf(stderr, "Line %i: Error: ", lineno);
+	va_start(vl, fmt);
+	vfprintf(stderr, fmt, vl);
+	va_end(vl);
+	fprintf(stderr, "\n");
+
+	return -1;
+}
diff --git a/src/as/error.h b/src/as/error.h
new file mode 100644
index 0000000..70ebb71
--- /dev/null
+++ b/src/as/error.h
@@ -0,0 +1,27 @@
+/* error.h
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik.hautakoski@gmail.com>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#ifndef ASM_ERROR_H
+#define ASM_ERROR_H
+
+void asm_warn(int lineno, const char *fmt, ...);
+
+int asm_error(int lineno, const char *fmt, ...);
+
+#endif /* ASM_ERROR_H */

From 16501e40befcc136cd5e002878c1260ff0aa7efb Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Sat, 24 Nov 2018 19:57:36 +0100
Subject: [PATCH 04/32] asm: adding instr_encode module

---
 src/as/instr_encode.c | 50 +++++++++++++++++++++++++++++++++++++++++++
 src/as/instr_encode.h | 27 +++++++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 src/as/instr_encode.c
 create mode 100644 src/as/instr_encode.h

diff --git a/src/as/instr_encode.c b/src/as/instr_encode.c
new file mode 100644
index 0000000..1478928
--- /dev/null
+++ b/src/as/instr_encode.c
@@ -0,0 +1,50 @@
+/* instr_encode.c
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik@fiktivkod.org>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "instr_encode.h"
+
+void instr_encode(struct instr* instr, uint16_t *out) {
+
+	uint8_t *p = (uint8_t *) out;
+
+	*out = 0;
+	*p = instr->opcode << 4;
+
+	if (instr->opcode == OP_NOOP) {
+		// Do nothing.
+	} else if (instr->opcode == OP_JMP) {
+		*p |= (instr->j.addr >> 8);
+		*(p+1) = instr->j.addr;
+	} else {
+		*p |= instr->r.rs;
+
+		// I-Type
+		if (instr->opcode == OP_MOVL || instr->opcode == OP_MOVH
+			|| instr->opcode == OP_JR || instr->opcode == OP_INT) {
+
+			*(p+1) = instr->i.imm;
+		}
+		// R/RI-Type
+		else {
+			*(p+1) = (instr->r.r0 << 4) | instr->r.r1;
+		}
+	}
+}
diff --git a/src/as/instr_encode.h b/src/as/instr_encode.h
new file mode 100644
index 0000000..0f01304
--- /dev/null
+++ b/src/as/instr_encode.h
@@ -0,0 +1,27 @@
+/* instr_encode.h
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik.hautakoski@gmail.com>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#ifndef ASM_INSTR_ENCODE_H
+#define ASM_INSTR_ENCODE_H
+
+#include <instr.h>
+
+void instr_encode(struct instr* instr, uint16_t *out);
+
+#endif /* ASM_INSTR_ENCODE_H */

From 3528c467ac8f81c23cd3f8944fccc40b9d25a0ee Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Sat, 24 Nov 2018 19:58:13 +0100
Subject: [PATCH 05/32] asm: adding the parser

---
 Makefile        |   2 +-
 src/as/as.c     |  38 ++++++----
 src/as/parser.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/as/parser.h |  33 ++++++++
 4 files changed, 254 insertions(+), 17 deletions(-)
 create mode 100644 src/as/parser.c
 create mode 100644 src/as/parser.h

diff --git a/Makefile b/Makefile
index 45518e7..6fe2b9c 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ all: $(PROGRAMS)
 m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o
 	$(LD) $(LDFLAGS) -o $@ $^
 
-as : src/as/as.o src/as/lexer.o
+as : src/as/as.o src/as/parser.o src/as/lexer.o src/as/instr_encode.o src/as/error.o
 	$(LD) $(LDFLAGS) -o $@ $^
 
 clean :
diff --git a/src/as/as.c b/src/as/as.c
index 3e2052e..c7aa24c 100644
--- a/src/as/as.c
+++ b/src/as/as.c
@@ -19,36 +19,42 @@
  */
 #include <stdio.h>
 #include <stdlib.h>
-#include "lexer.h"
+#include "parser.h"
 
 int usage(char *program) {
 
-	fprintf(stderr, "Usage: %s <string>\n", program);
+	fprintf(stderr, "Usage: %s <inputfile> [ <outputfile ]\n", program);
 	return -1;
 }
 
 int main(int argc, char **argv) {
 
-	FILE *fd;
-	struct lexer lex;
+	FILE *fd_in;
+	FILE *fd_out = stdout;
 
 	if (argc < 2)
 		return usage(argv[0]);
 
-	fd = fopen(argv[1], "r");
-	if (fd == NULL)
+	fd_in = fopen(argv[1], "r");
+	if (fd_in == NULL) {
+		perror("Could not open input file");
 		return -1;
-
-	lexer_init(&lex, fd);
-
-	do {
-		if (lexer_get_next(&lex) < 0)
-			break;
-
-		lexer_print_token(&lex.token);
-	} while(lex.token.type != TOKEN_EOI);
+	}
 
 
-	fclose(fd);
+	// If we have a output file.
+	if (argc > 2) {
+		fd_out = fopen(argv[2], "w");
+		if (fd_out == NULL) {
+			perror("Could not open output file");
+			fclose(fd_in);
+			return -1;
+		}
+	}
+
+	parse(fd_in, fd_out);
+
+	fclose(fd_in);
+	fclose(fd_out);
 	return 0;
 }
diff --git a/src/as/parser.c b/src/as/parser.c
new file mode 100644
index 0000000..7b95283
--- /dev/null
+++ b/src/as/parser.c
@@ -0,0 +1,198 @@
+/* parser.c
+ *
+ *   Copyright (C) 2012   Henrik Hautakoski <henrik@fiktivkod.org>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include "error.h"
+#include "instr_encode.h"
+#include "lexer.h"
+#include "parser.h"
+
+/*
+ * Helper functions/macros for defining parser rules.
+ */
+
+// match the next token.
+// returns 0 if the token was of the correct type. -1 otherwise
+static int match_type(struct lexer* lex, enum token_type type) {
+
+	lexer_get_next(lex);
+	return lex->token.type == type ? 0 : -1;
+}
+
+// Same as match_type() but extracts a number from the token.
+static int match_type_num(struct lexer* lex, enum token_type type,
+			void* out, size_t size) {
+
+	if (match_type(lex, type) < 0)
+		return -1;
+
+	memcpy(out, &lex->token.value.n, size);
+	return 0;
+}
+
+/*
+ * Helper macros for matching tokens.
+ */
+#define match_reg(pos, out) \
+	if (match_type_num(lex, TOKEN_REG, out, sizeof(uint8_t))) \
+		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
+
+#define match_imm(pos, out)  \
+	if (match_type_num(lex, TOKEN_NUMBER, out, sizeof(int16_t)) < 0) \
+		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
+
+#define match_arg(pos) \
+	if (match_type(lex, TOKEN_ARG_SEP) < 0) \
+		return asm_error((lex)->lineno, "Expected separator after argument %i", pos)
+
+#define match_end \
+	if (match_type(lex, TOKEN_EOL) < 0) \
+		return asm_error(lex->lineno, "Expected newline")
+
+/*
+ * Functions for matching complete instructions.
+ */
+
+// R-Type (rs : u8, r0 : u8, r1 : u8)
+static int match_typeR(struct instr_R *instr, struct lexer* lex) {
+
+	match_reg(1, &instr->rs); match_arg(1);
+	match_reg(2, &instr->r0); match_arg(2);
+	match_reg(3, &instr->r1);
+	match_end;
+
+	return 0;
+}
+
+// RI-Type (rs : u8, r0 : u8, offset : s8)
+static int match_typeRI(struct instr_RI *instr, struct lexer* lex) {
+
+	match_reg(1, &instr->rs); match_arg(1);
+	match_reg(2, &instr->r0); match_arg(2);
+	match_imm(3, &instr->offset);
+	match_end;
+
+	return 1;
+}
+
+// I-Type (rs : u8, imm : s8)
+static int match_typeI(struct instr_I *instr, struct lexer* lex) {
+
+	match_reg(1, &instr->rs); match_arg(1);
+	match_imm(2, &instr->imm);
+	match_end;
+
+	return 1;
+}
+
+// J-Type (rs : u8, addr : u16)
+static int match_typeJ(struct instr_J *instr, struct lexer* lex) {
+
+	match_imm(1, &instr->addr);
+	match_end;
+	return 1;
+}
+
+#define opcode_guard(op, v) \
+	if (op == OP_NONE) op = v
+
+/*
+ * Parse a single line.
+ */
+static int parse_line(struct lexer* lex, struct instr *instr) {
+
+	instr->opcode = OP_NONE;
+
+	if (lexer_get_next(lex) < 0)
+		return -1;
+
+	// Opcode should come first.
+	switch(lex->token.type) {
+	case TOKEN_EOI: return -1;
+	case TOKEN_EOL: break;
+	case TOKEN_OPCODE_NOOP : instr->opcode = OP_NOOP;
+		match_end;
+		break;
+	// Type-R
+	case TOKEN_OPCODE_ADD : opcode_guard(instr->opcode, OP_ADD);
+		return match_typeR(&instr->r, lex);
+	// Type-I
+	case TOKEN_OPCODE_MOVL : opcode_guard(instr->opcode, OP_MOVL);
+	case TOKEN_OPCODE_MOVH : opcode_guard(instr->opcode, OP_MOVH);
+	case TOKEN_OPCODE_JR   : opcode_guard(instr->opcode, OP_JR);
+	case TOKEN_OPCODE_INT  : opcode_guard(instr->opcode, OP_INT);
+		return match_typeI(&instr->i, lex);
+	// Type-RI
+	case TOKEN_OPCODE_LD  : opcode_guard(instr->opcode, OP_LW);
+	case TOKEN_OPCODE_SW  : opcode_guard(instr->opcode, OP_SW);
+	case TOKEN_OPCODE_BEQ : opcode_guard(instr->opcode, OP_BEQ);
+		return match_typeRI(&instr->ri, lex);
+	// Type-J
+	case TOKEN_OPCODE_JMP : opcode_guard(instr->opcode, OP_JMP);
+		return match_typeJ(&instr->j, lex);
+	case TOKEN_LABEL_DECL :
+		asm_warn(lex->lineno, "labels are not supported yet. ignoring.");
+		break;
+	default:
+		return asm_error(lex->lineno, "Opcode or label expected");
+	}
+
+	return 0;
+}
+
+static int gencode(FILE *fd, struct instr *instructions, int len) {
+
+	uint16_t buf; // 2-bytes (16-bit) per instruction.
+
+	for(int i = 0; i < len; i++) {
+		instr_encode(instructions + i, &buf);
+
+		fwrite(&buf, sizeof(buf), 1, fd);
+	}
+}
+
+/*
+ * Main parser function.
+ */
+int parse(FILE *source_fd, FILE *dest_fd) {
+
+	int rc;
+	struct lexer lex;
+	struct instr instr[256];
+	int n = 0;
+
+	lexer_init(&lex, source_fd);
+
+	do {
+		rc = parse_line(&lex, instr + n);
+
+		if (instr[n].opcode != OP_NONE) {
+			n++;
+			if (n >= 256)
+				// TODO: Dynamic allocs :)
+				return asm_error(-1, "Oops, parser ran out of memory.");
+		}
+	} while(rc >= 0);
+
+	gencode(dest_fd, instr, n);
+
+	return 0;
+}
diff --git a/src/as/parser.h b/src/as/parser.h
new file mode 100644
index 0000000..29f2fe3
--- /dev/null
+++ b/src/as/parser.h
@@ -0,0 +1,33 @@
+/* parser.h
+ *
+ *   Copyright (C) 2012   Henrik Hautakoski <henrik@fiktivkod.org>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#ifndef ASM_PARSER_H
+#define ASM_PARSER_H
+
+#include <instr.h>
+#include <stdio.h>
+
+struct parse_state {
+	struct instr *instr_ptr;
+	unsigned int num;
+};
+
+int parse(FILE *source_fd, FILE *dest_fd);
+
+#endif /* ASM_PARSER_H */

From 0f41fc0870e4e9c5e366a71184cdb04afd282927 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Sat, 24 Nov 2018 20:01:40 +0100
Subject: [PATCH 06/32] asm: adding example files

---
 asm/hello_world.as | 33 +++++++++++++++++++++++++++++++++
 asm/test.as        | 15 +++++++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 asm/hello_world.as
 create mode 100644 asm/test.as

diff --git a/asm/hello_world.as b/asm/hello_world.as
new file mode 100644
index 0000000..dc101c1
--- /dev/null
+++ b/asm/hello_world.as
@@ -0,0 +1,33 @@
+
+; -- Store "Hello World" string in memory.
+movl 	$1, 72 ; H
+sw	$0, $1, 0
+movl 	$1, 69 ; E
+sw	$0, $1, 1
+movl 	$1, 76 ; L
+sw	$0, $1, 2
+sw	$0, $1, 3
+movl 	$1, 79 ; O
+sw	$0, $1, 4
+movl 	$1, 32 ; Space
+sw	$0, $1, 5
+movl 	$1, 87 ; W
+sw	$0, $1, 6
+movl 	$1, 79 ; O
+sw	$0, $1, 7
+movl 	$1, 82 ; R
+sw	$0, $1, 8
+movl 	$1, 76 ; L
+sw	$0, $1, 9
+movl 	$1, 68 ; D
+sw	$0, $1, 10
+
+; -- setup print loop.
+movl 	$1, 1		; Load 1 in R1 (used for increment the counter)
+_start:
+ld	$15, $0, 0	; Load memory address stored in R0 into R15
+int     $10, 2		; Print character
+add 	$0, $0, $1	; Add 1 (R1) to counter (R0)
+beq	$15, $2, 1	; Branch to "noop" (skipping next instruction) if R15 = 0 (R2 holds 0)
+jmp     22		; jump back to "_start" label (not implemented atm)
+noop
diff --git a/asm/test.as b/asm/test.as
new file mode 100644
index 0000000..0ff196c
--- /dev/null
+++ b/asm/test.as
@@ -0,0 +1,15 @@
+; NOTE: This is just to check the syntax.
+; No logic behind any of the instructions (will prob crash)
+movl 	$0, 3
+movl 	$5, 6500
+; Comment
+noop    ; Comment
+start0: add  	$3, $0, $1
+sw	$2, $3, $2
+ld	$5, $0, $3
+beq	$0, $1, 1
+beq	$0, $1, -25
+beq	$0, $1, -4000
+jr	$5, 0
+int 	$0,    1
+jmp	5

From adbe0006f50c1f4a5c1d7444ab44baf29ee64b1e Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Wed, 28 Nov 2018 19:41:09 +0100
Subject: [PATCH 07/32] asm: adding symbol table datastructure.

---
 src/as/symtab.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/as/symtab.h |  37 +++++++++++++++
 2 files changed, 158 insertions(+)
 create mode 100644 src/as/symtab.c
 create mode 100644 src/as/symtab.h

diff --git a/src/as/symtab.c b/src/as/symtab.c
new file mode 100644
index 0000000..9ac9867
--- /dev/null
+++ b/src/as/symtab.c
@@ -0,0 +1,121 @@
+/* symtab.c
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik.hautakoski@gmail.com>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "symtab.h"
+
+// TODO: Right now, the symtab is implemented as a simple dynamic growing array.
+// This is fine for now, but for lookup speed. A hashtable is more efficient.
+
+// How much memory that should be allocated each time.
+// Note: this should be defined as number of `struct symbol` objects, not bytes.
+#define BLOCK_SIZE 32
+
+// Helper macro to calculate number of bytes the table need.
+#define __M_SIZE(t) \
+	((t)->size * sizeof(struct symbol))
+
+struct symbol {
+	// The label associated with the location.
+	char label[SYMTAB_LABEL_MAXLEN];
+
+	// Location (program adress)
+	uint16_t loc;
+};
+
+// Symbol table structure.
+struct symtab {
+	// Array of symbols.
+	struct symbol *data;
+
+	// Number of symbols in the array.
+	size_t num;
+
+	// Total number of symbols the array can hold.
+	size_t size;
+};
+
+symtab_t* symtab_init() {
+
+	struct symtab *table = malloc(sizeof(struct symtab));
+
+	table->num = 0;
+	table->size = BLOCK_SIZE;
+	table->data = malloc(__M_SIZE(table));
+
+	return table;
+}
+
+void symtab_free(symtab_t *table) {
+
+	if (!table)
+		return;
+
+	if (table->data)
+		free(table->data);
+
+	memset(table, 0, sizeof(struct symtab));
+	free(table);
+}
+
+int symtab_set(symtab_t *table, const char *label, uint16_t loc) {
+
+	// Check if it exist first.
+	int index = symtab_get(table, label, NULL);
+
+	// Entry did not exists. Insert
+	if (index < 0) {
+		struct symbol *sym;
+
+		// We have one more.
+		table->num += 1;
+
+		// Make sure we resize the memory.
+		if (table->num > table->size) {
+			table->size += BLOCK_SIZE;
+			table->data = realloc(table->data, __M_SIZE(table));
+		}
+
+		// insert the symbol at the end.
+		sym = table->data + (table->num - 1);
+		strncpy(sym->label, label, SYMTAB_LABEL_MAXLEN);
+		sym->loc = loc;
+
+		return 0;
+	}
+
+	// Could not insert. return as error.
+	return -1;
+}
+
+int symtab_get(symtab_t *table, const char *label, uint16_t *loc) {
+
+	// Linear search here for simplicity.
+	for(size_t i = 0; i < table->num; i++) {
+		struct symbol *sym = table->data + i;
+
+		if (!strncmp(sym->label, label, SYMTAB_LABEL_MAXLEN)) {
+
+			if (loc) *loc = sym->loc;
+			return i;
+		}
+	}
+	return -1;
+}
diff --git a/src/as/symtab.h b/src/as/symtab.h
new file mode 100644
index 0000000..cd1cd68
--- /dev/null
+++ b/src/as/symtab.h
@@ -0,0 +1,37 @@
+/* symtab.h
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik.hautakoski@gmail.com>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#ifndef ASM_SYMTAB_H
+#define ASM_SYMTAB_H
+
+#include <stdint.h>
+
+#define SYMTAB_LABEL_MAXLEN 32
+
+typedef struct symtab symtab_t;
+
+symtab_t* symtab_init();
+
+void symtab_free(symtab_t *table);
+
+int symtab_set(symtab_t *table, const char *label, uint16_t loc);
+
+int symtab_get(symtab_t *table, const char *label, uint16_t *loc);
+
+#endif /* ASM_SYMTAB_H */

From a305c34b3946d460ad27e27b8b775c054c8cd541 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Sun, 9 Dec 2018 18:31:35 +0100
Subject: [PATCH 08/32] lib: adding vector module

---
 lib/include/vector.h | 41 +++++++++++++++++++++++++++++++++
 lib/src/vector.c     | 54 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)
 create mode 100644 lib/include/vector.h
 create mode 100644 lib/src/vector.c

diff --git a/lib/include/vector.h b/lib/include/vector.h
new file mode 100644
index 0000000..cf2d2b9
--- /dev/null
+++ b/lib/include/vector.h
@@ -0,0 +1,41 @@
+/* vector.h
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik@fiktivkod.org>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#ifndef VECTOR_H
+#define VECTOR_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+struct vector  {
+	size_t blk_sz;
+	size_t alloc;
+	size_t size;
+	void  *base;
+};
+
+#define VECTOR_INIT(block_size) { block_size, 0, 0, NULL }
+
+void vector_init(struct vector* v, size_t block_size);
+
+void vector_destory(struct vector* v);
+
+void vector_append(struct vector* v, const void *ptr, size_t n);
+
+#endif /* VECTOR_H */
diff --git a/lib/src/vector.c b/lib/src/vector.c
new file mode 100644
index 0000000..b49e020
--- /dev/null
+++ b/lib/src/vector.c
@@ -0,0 +1,54 @@
+/* vector.c
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik@fiktivkod.org>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#include <string.h>
+#include <stdlib.h>
+#include <vector.h>
+
+static void ensure_size(struct vector* v, size_t n) {
+
+	size_t old_alloc = v->alloc;
+	while(v->alloc < v->size + n)
+		v->alloc += v->blk_sz;
+
+	if (old_alloc != v->alloc)
+		v->base = realloc(v->base, v->alloc);
+}
+
+void vector_init(struct vector* v, size_t block_size) {
+
+	v->blk_sz = block_size > 0 ? block_size : 64;
+	v->alloc  = 0;
+	v->size   = 0;
+	v->base   = NULL;
+}
+
+void vector_destory(struct vector *v) {
+
+	if (v->base)
+		free(v->base);
+}
+
+void vector_append(struct vector *v, const void *ptr, size_t n) {
+
+	ensure_size(v, n);
+
+	memcpy(v->base + v->size, ptr, n);
+	v->size += n;
+}

From 3122da08c916a077cfd713aadb2e81b7b27c355e Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Sun, 9 Dec 2018 18:32:46 +0100
Subject: [PATCH 09/32] move include/ to lib/include

---
 Makefile                         | 2 +-
 {include => lib/include}/instr.h | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename {include => lib/include}/instr.h (100%)

diff --git a/Makefile b/Makefile
index 6fe2b9c..c7a46a6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 
 CC = gcc
-CFLAGS = -Iinclude -DMEM_SIZE=32 -DM16_DEBUG_MEM
+CFLAGS = -g -Ilib/include -DMEM_SIZE=32 -DM16_DEBUG_MEM
 LD = $(CC)
 
 PROGRAMS = m16vm as
diff --git a/include/instr.h b/lib/include/instr.h
similarity index 100%
rename from include/instr.h
rename to lib/include/instr.h

From aca8d7937e1ebb3655f5b85865f1f0bb602e605b Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Sun, 9 Dec 2018 19:27:26 +0100
Subject: [PATCH 10/32] asm: adding AST

---
 src/as/ast.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/as/ast.h | 81 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 176 insertions(+)
 create mode 100644 src/as/ast.c
 create mode 100644 src/as/ast.h

diff --git a/src/as/ast.c b/src/as/ast.c
new file mode 100644
index 0000000..392517e
--- /dev/null
+++ b/src/as/ast.c
@@ -0,0 +1,95 @@
+
+#include <string.h>
+#include <stdlib.h>
+#include <vector.h>
+#include <stdio.h>
+#include "ast.h"
+
+void ast_init(struct ast* ast) {
+
+	vector_init(&ast->instr, 16 * sizeof(struct ast));
+	vector_init(&ast->labels, 16 * sizeof(char**));
+	ast->symbols = symtab_init();
+}
+
+void ast_free(struct ast* ast) {
+
+	int i;
+
+	// Free all label strings
+	for(int i = 0; i < ast->labels.size; i++) {
+		char *ptr = *((char**) ast->labels.base + i);
+		free(ptr);
+	}
+	vector_destory(&ast->labels);
+	vector_destory(&ast->instr);
+	symtab_free(ast->symbols);
+}
+
+void ast_instr(struct ast* ast, uint8_t opcode) {
+
+	struct ast_instr instr = { 0 };
+
+	instr.opcode = opcode;
+	vector_append(&ast->instr, &instr, sizeof(struct ast_instr));
+}
+
+void ast_instr_operand(struct ast* ast, enum ast_datatype type, void* value) {
+
+	struct ast_instr *instr;
+
+	if (ast->instr.size < 1)
+		return;
+
+	// Fetch latest instruction.
+	instr = ast->instr.base + (ast->instr.size - sizeof(struct ast_instr));
+
+	if (instr->n_operands < 3) {
+		struct ast_instr_operand *n = instr->operands + instr->n_operands;
+
+		n->type = type;
+		if (n->type == DATATYPE_STRING) {
+			n->s = strdup(value);
+			vector_append(&ast->labels, &n->s, sizeof(char**));
+		} else {
+			n->r = ((intptr_t) value) & 0xFF;
+		}
+
+		instr->n_operands++;
+	}
+}
+
+void ast_location(struct ast* ast, const char *label, uint16_t loc) {
+
+	symtab_set(ast->symbols, label, loc);
+}
+
+void ast_print(struct ast* ast) {
+
+	printf("{\n");
+
+	for(int i = 0; i < ast->instr.size; i += sizeof(struct ast_instr)) {
+		struct ast_instr *instr = ast->instr.base + i;
+
+		printf("\topcode: %u", instr->opcode);
+		for(int j = 0; j < instr->n_operands; j++) {
+			struct ast_instr_operand *op = instr->operands + j;
+			printf(", operand%i: ", j);
+			switch(op->type) {
+			case DATATYPE_REGISTER :
+				printf("%u (reg)", op->r);
+				break;
+			case DATATYPE_NUMBER :
+				printf("%i (num)", op->n);
+				break;
+			case DATATYPE_STRING :
+				printf("%s (string)", op->s);
+				break;
+			}
+		}
+		printf("\n");
+	}
+
+	printf("}\n");
+
+}
diff --git a/src/as/ast.h b/src/as/ast.h
new file mode 100644
index 0000000..6c23334
--- /dev/null
+++ b/src/as/ast.h
@@ -0,0 +1,81 @@
+/* ast.h
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik.hautakoski@gmail.com>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#ifndef ASM_AST_H
+#define ASM_AST_H
+
+#include <vector.h>
+#include "symtab.h"
+
+// All datatypes in the language.
+enum ast_datatype {
+	DATATYPE_NONE = 0,   // Not an actual type.
+	DATATYPE_REGISTER,   // 4 bit unsigned.
+	DATATYPE_NUMBER,     // 8 bit signed.
+	DATATYPE_STRING
+};
+
+// Struct to represent a single operand to a instruction.
+struct ast_instr_operand {
+	enum ast_datatype type;
+	union {
+		uint8_t     r;
+		int8_t      n;
+		uint16_t    addr;
+		const char *s;
+	};
+};
+
+// Struct to represent a single instruction.
+struct ast_instr {
+	uint8_t opcode;
+	uint8_t n_operands;
+	struct ast_instr_operand operands[3];
+};
+
+// Structure to represent a program.
+struct ast {
+
+	// List of instructions.
+	struct vector instr;
+
+	// List for storing label pointers (so we can free)
+	struct vector labels;
+
+	// Symbol table with label - location mapping.
+	symtab_t *symbols;
+};
+
+void ast_init(struct ast* ast);
+
+void ast_free(struct ast* ast);
+
+// Add an instruction
+void ast_instr(struct ast* ast, uint8_t opcode);
+
+// Add an operand to the latest instruction.
+//void ast_instr_operand(struct ast* ast, const struct ast_instr_operand* op);
+void ast_instr_operand(struct ast* ast, enum ast_datatype type, void* value);
+
+void ast_location(struct ast* ast, const char *label, uint16_t loc);
+
+// For debugging :)
+void ast_print(struct ast* ast);
+
+#endif /* ASM_AST_H */

From f2d54204ca9022158984bc2021913de370b62e02 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Sun, 9 Dec 2018 19:50:04 +0100
Subject: [PATCH 11/32] Makefile: add rule for building libm16

---
 .gitignore | 1 +
 Makefile   | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/.gitignore b/.gitignore
index b979155..0983487 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 *.o
+*.a
 m16vm
 /as
diff --git a/Makefile b/Makefile
index c7a46a6..a66306f 100644
--- a/Makefile
+++ b/Makefile
@@ -13,9 +13,14 @@ m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program
 as : src/as/as.o src/as/parser.o src/as/lexer.o src/as/instr_encode.o src/as/error.o
 	$(LD) $(LDFLAGS) -o $@ $^
 
+lib/libm16.a : lib/src/vector.o
+	$(AR) rcs $@ $^
+
 clean :
 	$(RM) src/*.o
 	$(RM) src/as/*.o
+	$(RM) lib/*.o
+	$(RM) lib/*.a
 
 distclean : clean
 	$(RM) $(PROGRAMS)

From 96220537de10d424efe1ce214d8b66257b63498e Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 10:06:12 +0100
Subject: [PATCH 12/32] asm: rename instr_encode to codegen and do code
 generation from AST.

---
 src/as/codegen.c                     | 61 ++++++++++++++++++++++++++++
 src/as/{instr_encode.h => codegen.h} | 12 +++---
 src/as/instr_encode.c                | 50 -----------------------
 3 files changed, 67 insertions(+), 56 deletions(-)
 create mode 100644 src/as/codegen.c
 rename src/as/{instr_encode.h => codegen.h} (82%)
 delete mode 100644 src/as/instr_encode.c

diff --git a/src/as/codegen.c b/src/as/codegen.c
new file mode 100644
index 0000000..3d8c76c
--- /dev/null
+++ b/src/as/codegen.c
@@ -0,0 +1,61 @@
+/* codegen.c
+ *
+ *   Copyright (C) 2018   Henrik Hautakoski <henrik@fiktivkod.org>
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ *   MA 02110-1301, USA.
+ */
+#include "codegen.h"
+
+void codegen_emit(struct ast_instr* instr, symtab_t* symbols, uint8_t* out) {
+
+	out[0] = instr->opcode << 4;
+
+	if (instr->n_operands < 1)
+		// No operands, return
+		return;
+
+	// J-Type (We don't have labels yet, so this is just a address)
+	if (instr->operands[0].type == DATATYPE_NUMBER) {
+	//if (instr->operands[0].type == DATATYPE_STRING) {
+		uint16_t addr;
+
+		/* if (symtab_get(symbols, instr->operands[0].s, &addr)) {
+			out[0] |= (addr >> 8);
+			out[1]  =  addr;
+		}*/
+		addr = instr->operands[0].n;
+
+		out[0] |= (addr >> 8);
+		out[1]  =  addr;
+	}
+	// R/RI or I
+	else if (instr->operands[0].type == DATATYPE_REGISTER) {
+
+		out[0] |= instr->operands[0].r & 0xF;
+
+		// I
+		if (instr->operands[1].type == DATATYPE_NUMBER) {
+
+			out[1] = instr->operands[1].n;
+		}
+		// R/RI-Type
+		else if (instr->operands[1].type == DATATYPE_REGISTER &&
+			(instr->operands[2].type == DATATYPE_REGISTER || instr->operands[2].type == DATATYPE_NUMBER)) {
+
+			out[1] = (instr->operands[1].r << 4) | (instr->operands[2].r & 0xF);
+		}
+	}
+}
diff --git a/src/as/instr_encode.h b/src/as/codegen.h
similarity index 82%
rename from src/as/instr_encode.h
rename to src/as/codegen.h
index 0f01304..8964321 100644
--- a/src/as/instr_encode.h
+++ b/src/as/codegen.h
@@ -1,4 +1,4 @@
-/* instr_encode.h
+/* codegen.h
  *
  *   Copyright (C) 2018   Henrik Hautakoski <henrik.hautakoski@gmail.com>
  *
@@ -17,11 +17,11 @@
  *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  *   MA 02110-1301, USA.
  */
-#ifndef ASM_INSTR_ENCODE_H
-#define ASM_INSTR_ENCODE_H
+#ifndef ASM_CODEGEN_H
+#define ASM_CODEGEN_H
 
-#include <instr.h>
+#include "ast.h"
 
-void instr_encode(struct instr* instr, uint16_t *out);
+void codegen_emit(struct ast_instr* ast, symtab_t* symbols, uint8_t* out);
 
-#endif /* ASM_INSTR_ENCODE_H */
+#endif /* ASM_CODEGEN_H */
diff --git a/src/as/instr_encode.c b/src/as/instr_encode.c
deleted file mode 100644
index 1478928..0000000
--- a/src/as/instr_encode.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/* instr_encode.c
- *
- *   Copyright (C) 2018   Henrik Hautakoski <henrik@fiktivkod.org>
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the Free Software
- *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
- *   MA 02110-1301, USA.
- */
-#include <stdio.h>
-#include <string.h>
-#include "instr_encode.h"
-
-void instr_encode(struct instr* instr, uint16_t *out) {
-
-	uint8_t *p = (uint8_t *) out;
-
-	*out = 0;
-	*p = instr->opcode << 4;
-
-	if (instr->opcode == OP_NOOP) {
-		// Do nothing.
-	} else if (instr->opcode == OP_JMP) {
-		*p |= (instr->j.addr >> 8);
-		*(p+1) = instr->j.addr;
-	} else {
-		*p |= instr->r.rs;
-
-		// I-Type
-		if (instr->opcode == OP_MOVL || instr->opcode == OP_MOVH
-			|| instr->opcode == OP_JR || instr->opcode == OP_INT) {
-
-			*(p+1) = instr->i.imm;
-		}
-		// R/RI-Type
-		else {
-			*(p+1) = (instr->r.r0 << 4) | instr->r.r1;
-		}
-	}
-}

From 7646d63736fee3f541c80234bbbc388871ca9220 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 10:09:31 +0100
Subject: [PATCH 13/32] asm: build AST in parser.

---
 Makefile        |   4 +-
 src/as/parser.c | 126 ++++++++++++++++++++++++------------------------
 2 files changed, 67 insertions(+), 63 deletions(-)

diff --git a/Makefile b/Makefile
index a66306f..4360200 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,9 @@ all: $(PROGRAMS)
 m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o
 	$(LD) $(LDFLAGS) -o $@ $^
 
-as : src/as/as.o src/as/parser.o src/as/lexer.o src/as/instr_encode.o src/as/error.o
+as : src/as/as.o src/as/parser.o src/as/lexer.o \
+	src/as/codegen.o src/as/error.o src/as/symtab.o \
+	src/as/ast.o lib/libm16.a
 	$(LD) $(LDFLAGS) -o $@ $^
 
 lib/libm16.a : lib/src/vector.o
diff --git a/src/as/parser.c b/src/as/parser.c
index 7b95283..eca613b 100644
--- a/src/as/parser.c
+++ b/src/as/parser.c
@@ -21,7 +21,8 @@
 #include <stdio.h>
 #include <string.h>
 #include "error.h"
-#include "instr_encode.h"
+#include "codegen.h"
+#include "ast.h"
 #include "lexer.h"
 #include "parser.h"
 
@@ -37,26 +38,30 @@ static int match_type(struct lexer* lex, enum token_type type) {
 	return lex->token.type == type ? 0 : -1;
 }
 
-// Same as match_type() but extracts a number from the token.
-static int match_type_num(struct lexer* lex, enum token_type type,
-			void* out, size_t size) {
+// Same as match_type() but also generates a operand in the AST.
+static int match_operand(struct lexer* lex, enum token_type type, struct ast *ast) {
 
 	if (match_type(lex, type) < 0)
 		return -1;
 
-	memcpy(out, &lex->token.value.n, size);
+	if (type == TOKEN_REG) {
+		ast_instr_operand(ast, DATATYPE_REGISTER, lex->token.value.n);
+	} else {
+		ast_instr_operand(ast, DATATYPE_NUMBER, lex->token.value.n);
+	}
+
 	return 0;
 }
 
 /*
  * Helper macros for matching tokens.
  */
-#define match_reg(pos, out) \
-	if (match_type_num(lex, TOKEN_REG, out, sizeof(uint8_t))) \
+#define match_reg(pos, ast) \
+	if (match_operand(lex, TOKEN_REG, ast) < 0) \
 		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
 
-#define match_imm(pos, out)  \
-	if (match_type_num(lex, TOKEN_NUMBER, out, sizeof(int16_t)) < 0) \
+#define match_imm(pos, ast)  \
+	if (match_operand(lex, TOKEN_NUMBER, ast) < 0) \
 		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
 
 #define match_arg(pos) \
@@ -72,54 +77,54 @@ static int match_type_num(struct lexer* lex, enum token_type type,
  */
 
 // R-Type (rs : u8, r0 : u8, r1 : u8)
-static int match_typeR(struct instr_R *instr, struct lexer* lex) {
+static int match_typeR(struct lexer* lex, struct ast *ast) {
 
-	match_reg(1, &instr->rs); match_arg(1);
-	match_reg(2, &instr->r0); match_arg(2);
-	match_reg(3, &instr->r1);
+	match_reg(1, ast); match_arg(1);
+	match_reg(2, ast); match_arg(2);
+	match_reg(3, ast);
 	match_end;
 
 	return 0;
 }
 
 // RI-Type (rs : u8, r0 : u8, offset : s8)
-static int match_typeRI(struct instr_RI *instr, struct lexer* lex) {
+static int match_typeRI(struct lexer* lex, struct ast *ast) {
 
-	match_reg(1, &instr->rs); match_arg(1);
-	match_reg(2, &instr->r0); match_arg(2);
-	match_imm(3, &instr->offset);
+	match_reg(1, ast); match_arg(1);
+	match_reg(2, ast); match_arg(2);
+	match_imm(3, ast);
 	match_end;
 
 	return 1;
 }
 
 // I-Type (rs : u8, imm : s8)
-static int match_typeI(struct instr_I *instr, struct lexer* lex) {
+static int match_typeI(struct lexer* lex, struct ast *ast) {
 
-	match_reg(1, &instr->rs); match_arg(1);
-	match_imm(2, &instr->imm);
+	match_reg(1, ast); match_arg(1);
+	match_imm(2, ast);
 	match_end;
 
 	return 1;
 }
 
 // J-Type (rs : u8, addr : u16)
-static int match_typeJ(struct instr_J *instr, struct lexer* lex) {
+static int match_typeJ(struct lexer* lex, struct ast *ast) {
 
-	match_imm(1, &instr->addr);
+	match_imm(1, ast);
 	match_end;
 	return 1;
 }
 
-#define opcode_guard(op, v) \
-	if (op == OP_NONE) op = v
+#define opcode_guard(op) \
+	if (op_set == 0) { op_set = 1; ast_instr(ast, op); }
 
 /*
  * Parse a single line.
  */
-static int parse_line(struct lexer* lex, struct instr *instr) {
+static int parse_line(struct lexer* lex, struct ast *ast) {
 
-	instr->opcode = OP_NONE;
+	int op_set = 0;
 
 	if (lexer_get_next(lex) < 0)
 		return -1;
@@ -128,26 +133,26 @@ static int parse_line(struct lexer* lex, struct instr *instr) {
 	switch(lex->token.type) {
 	case TOKEN_EOI: return -1;
 	case TOKEN_EOL: break;
-	case TOKEN_OPCODE_NOOP : instr->opcode = OP_NOOP;
+	case TOKEN_OPCODE_NOOP : ast_instr(ast, OP_NOOP);
 		match_end;
 		break;
 	// Type-R
-	case TOKEN_OPCODE_ADD : opcode_guard(instr->opcode, OP_ADD);
-		return match_typeR(&instr->r, lex);
+	case TOKEN_OPCODE_ADD : opcode_guard(OP_ADD);
+		return match_typeR(lex, ast);
 	// Type-I
-	case TOKEN_OPCODE_MOVL : opcode_guard(instr->opcode, OP_MOVL);
-	case TOKEN_OPCODE_MOVH : opcode_guard(instr->opcode, OP_MOVH);
-	case TOKEN_OPCODE_JR   : opcode_guard(instr->opcode, OP_JR);
-	case TOKEN_OPCODE_INT  : opcode_guard(instr->opcode, OP_INT);
-		return match_typeI(&instr->i, lex);
+	case TOKEN_OPCODE_MOVL : opcode_guard(OP_MOVL);
+	case TOKEN_OPCODE_MOVH : opcode_guard(OP_MOVH);
+	case TOKEN_OPCODE_JR   : opcode_guard(OP_JR);
+	case TOKEN_OPCODE_INT  : opcode_guard(OP_INT);
+		return match_typeI(lex, ast);
 	// Type-RI
-	case TOKEN_OPCODE_LD  : opcode_guard(instr->opcode, OP_LW);
-	case TOKEN_OPCODE_SW  : opcode_guard(instr->opcode, OP_SW);
-	case TOKEN_OPCODE_BEQ : opcode_guard(instr->opcode, OP_BEQ);
-		return match_typeRI(&instr->ri, lex);
+	case TOKEN_OPCODE_LD  : opcode_guard(OP_LW);
+	case TOKEN_OPCODE_SW  : opcode_guard(OP_SW);
+	case TOKEN_OPCODE_BEQ : opcode_guard(OP_BEQ);
+		return match_typeRI(lex, ast);
 	// Type-J
-	case TOKEN_OPCODE_JMP : opcode_guard(instr->opcode, OP_JMP);
-		return match_typeJ(&instr->j, lex);
+	case TOKEN_OPCODE_JMP : opcode_guard(OP_JMP);
+		return match_typeJ(lex, ast);
 	case TOKEN_LABEL_DECL :
 		asm_warn(lex->lineno, "labels are not supported yet. ignoring.");
 		break;
@@ -158,17 +163,6 @@ static int parse_line(struct lexer* lex, struct instr *instr) {
 	return 0;
 }
 
-static int gencode(FILE *fd, struct instr *instructions, int len) {
-
-	uint16_t buf; // 2-bytes (16-bit) per instruction.
-
-	for(int i = 0; i < len; i++) {
-		instr_encode(instructions + i, &buf);
-
-		fwrite(&buf, sizeof(buf), 1, fd);
-	}
-}
-
 /*
  * Main parser function.
  */
@@ -176,23 +170,31 @@ int parse(FILE *source_fd, FILE *dest_fd) {
 
 	int rc;
 	struct lexer lex;
-	struct instr instr[256];
-	int n = 0;
+	struct ast ast;
 
+	ast_init(&ast);
 	lexer_init(&lex, source_fd);
 
+	// Parse and build AST.
 	do {
-		rc = parse_line(&lex, instr + n);
-
-		if (instr[n].opcode != OP_NONE) {
-			n++;
-			if (n >= 256)
-				// TODO: Dynamic allocs :)
-				return asm_error(-1, "Oops, parser ran out of memory.");
-		}
+		rc = parse_line(&lex, &ast);
 	} while(rc >= 0);
 
-	gencode(dest_fd, instr, n);
+	// TODO: Second pass validation
+	// make sure all referenced labels are actually defined.
+
+	// Code generation
+	for(int i = 0; i < ast.instr.size; i += sizeof(struct ast_instr)) {
+		struct ast_instr *instr = ast.instr.base + i;
+		uint8_t code[2] = { 0 };
+
+		codegen_emit(instr, ast.symbols, &code);
+
+		fwrite(&code, sizeof(code), 1, dest_fd);
+	}
+
+	// Cleanup
+	ast_free(&ast);
 
 	return 0;
 }

From 86293537eb84abc2347343ddc523fa3b6c2b1548 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 21:47:04 +0100
Subject: [PATCH 14/32] src/as/lexer.c: make sure we store the string if it's a
 label.

---
 src/as/lexer.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/as/lexer.c b/src/as/lexer.c
index 7a1f7aa..b9cb9b6 100644
--- a/src/as/lexer.c
+++ b/src/as/lexer.c
@@ -71,12 +71,11 @@ static int read_number(FILE *fp) {
 	return val;
 }
 
-static int read_string(FILE *fp) {
+static int read_string(FILE *fp, char *buf, size_t len) {
 
 	int c, label_decl = 0, i = 0;
-	char buf[64];
 
-	while((c = fgetc(fp)) != EOF && i < 64) {
+	while((c = fgetc(fp)) != EOF && i < len) {
 
 		if (string(c)) {
 			buf[i++] = c;
@@ -160,8 +159,11 @@ int lexer_get_next(struct lexer *lex) {
 			lex->token.type = TOKEN_NUMBER;
 			lex->token.value.n = read_number(lex->fp);
 		} else if (first_string(ch)) {
+			char buf[32];
 			ungetc(ch, lex->fp);
-			lex->token.type = read_string(lex->fp);
+			lex->token.type = read_string(lex->fp, buf, sizeof(buf));
+			if (lex->token.type == TOKEN_LABEL_DECL || lex->token.type == TOKEN_LABEL)
+				strcpy(lex->token.value.s, buf);
 		} else {
 			fprintf(stderr, "ERROR: Invalid character '%c' on line: %i\n", ch, lex->lineno);
 			return -1;
@@ -202,9 +204,9 @@ void lexer_print_token(struct token *token) {
 		break;
 	case TOKEN_OPCODE_INT : printf(" [OP INT] ");
 		break;
-	case TOKEN_LABEL : printf(" [LABEL] ");
+	case TOKEN_LABEL : printf(" [LABEL \"%s\"] ", token->value.s);
 		break;
-	case TOKEN_LABEL_DECL : printf(" [LABEL DECL] ");
+	case TOKEN_LABEL_DECL : printf(" [LABEL DECL \"%s\"] ", token->value.s);
 		break;
 	case TOKEN_REG : printf(" [REG %i] ", token->value.n);
 		break;

From 2b80662967868d18c45b1fdb90c5b89c888326c6 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 21:59:37 +0100
Subject: [PATCH 15/32] src/as/parser.c: store address for label declaration in
 the symbol table.

---
 src/as/parser.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/as/parser.c b/src/as/parser.c
index eca613b..bb8653a 100644
--- a/src/as/parser.c
+++ b/src/as/parser.c
@@ -116,6 +116,16 @@ static int match_typeJ(struct lexer* lex, struct ast *ast) {
 	return 1;
 }
 
+// Match \n*<opcode>
+static int match_label_decl(struct lexer* lex, struct ast *ast) {
+
+	uint16_t location = ast->instr.size / sizeof(struct ast_instr);
+
+	ast_location(ast, lex->token.value.s, location);
+
+	return 1;
+}
+
 #define opcode_guard(op) \
 	if (op_set == 0) { op_set = 1; ast_instr(ast, op); }
 
@@ -154,8 +164,7 @@ static int parse_line(struct lexer* lex, struct ast *ast) {
 	case TOKEN_OPCODE_JMP : opcode_guard(OP_JMP);
 		return match_typeJ(lex, ast);
 	case TOKEN_LABEL_DECL :
-		asm_warn(lex->lineno, "labels are not supported yet. ignoring.");
-		break;
+		return match_label_decl(lex, ast);
 	default:
 		return asm_error(lex->lineno, "Opcode or label expected");
 	}

From 7ec84ad2a144bbb9b3722fbe4e2bc6d28898de93 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 22:03:05 +0100
Subject: [PATCH 16/32] src/as/parser.c: J-Type should now accept a label as
 argument.

---
 src/as/parser.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/as/parser.c b/src/as/parser.c
index bb8653a..9f62eef 100644
--- a/src/as/parser.c
+++ b/src/as/parser.c
@@ -108,10 +108,11 @@ static int match_typeI(struct lexer* lex, struct ast *ast) {
 	return 1;
 }
 
-// J-Type (rs : u8, addr : u16)
+// J-Type (addr : string)
 static int match_typeJ(struct lexer* lex, struct ast *ast) {
 
-	match_imm(1, ast);
+	if (match_operand(lex, TOKEN_LABEL, ast) < 0)
+		return asm_error(lex->lineno, "Expected label at argument 1");
 	match_end;
 	return 1;
 }

From aa171ac46cb3204c26317a817c04ce724cdf8a2c Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 22:04:58 +0100
Subject: [PATCH 17/32] src/as/parser.c: in match_operand() store strings in
 the AST.

---
 src/as/parser.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/as/parser.c b/src/as/parser.c
index 9f62eef..36ca98c 100644
--- a/src/as/parser.c
+++ b/src/as/parser.c
@@ -46,8 +46,10 @@ static int match_operand(struct lexer* lex, enum token_type type, struct ast *as
 
 	if (type == TOKEN_REG) {
 		ast_instr_operand(ast, DATATYPE_REGISTER, lex->token.value.n);
-	} else {
+	} else if (type == TOKEN_NUMBER) {
 		ast_instr_operand(ast, DATATYPE_NUMBER, lex->token.value.n);
+	} else {
+		ast_instr_operand(ast, DATATYPE_STRING, lex->token.value.s);
 	}
 
 	return 0;

From 0916f8bcdcbf5a62287a4e34369e395f1fd5401e Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 22:55:20 +0100
Subject: [PATCH 18/32] src/as/parser.c: do semantics checks.

---
 src/as/parser.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/src/as/parser.c b/src/as/parser.c
index 36ca98c..d39bb59 100644
--- a/src/as/parser.c
+++ b/src/as/parser.c
@@ -175,6 +175,29 @@ static int parse_line(struct lexer* lex, struct ast *ast) {
 	return 0;
 }
 
+// Check the semantics of the program's AST.
+// For now, we only need to check that all
+// referenced labels exist in the symbol table
+static int check_semantics(struct ast* ast) {
+
+	int i;
+
+	// TODO: Need to implement a iterator for vectors.
+	for(i = 0; i < ast->instr.size; i += sizeof(struct ast_instr)) {
+		struct ast_instr *instr = ast->instr.base + i;
+
+		// Only J-Type can have labels.
+		if (instr->opcode == OP_JMP
+			&& instr->operands[0].type == DATATYPE_STRING
+			&& symtab_get(ast->symbols, instr->operands[0].s, NULL) < 0)  {
+
+			return asm_error(0, "Label '%s' is not defined", instr->operands[0].s);
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Main parser function.
  */
@@ -192,8 +215,8 @@ int parse(FILE *source_fd, FILE *dest_fd) {
 		rc = parse_line(&lex, &ast);
 	} while(rc >= 0);
 
-	// TODO: Second pass validation
-	// make sure all referenced labels are actually defined.
+	if (check_semantics(&ast) < 0)
+		goto done;
 
 	// Code generation
 	for(int i = 0; i < ast.instr.size; i += sizeof(struct ast_instr)) {
@@ -206,7 +229,6 @@ int parse(FILE *source_fd, FILE *dest_fd) {
 	}
 
 	// Cleanup
-	ast_free(&ast);
-
+done:	ast_free(&ast);
 	return 0;
 }

From 0f10c9fd381040a96d5e91c95be985a9863fe79e Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 23:03:30 +0100
Subject: [PATCH 19/32] src/as/parser.c: on parsing error. skip doing semantics
 checks and code gen.

---
 src/as/parser.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/as/parser.c b/src/as/parser.c
index d39bb59..7856e72 100644
--- a/src/as/parser.c
+++ b/src/as/parser.c
@@ -52,7 +52,7 @@ static int match_operand(struct lexer* lex, enum token_type type, struct ast *as
 		ast_instr_operand(ast, DATATYPE_STRING, lex->token.value.s);
 	}
 
-	return 0;
+	return 1;
 }
 
 /*
@@ -86,7 +86,7 @@ static int match_typeR(struct lexer* lex, struct ast *ast) {
 	match_reg(3, ast);
 	match_end;
 
-	return 0;
+	return 1;
 }
 
 // RI-Type (rs : u8, r0 : u8, offset : s8)
@@ -144,7 +144,7 @@ static int parse_line(struct lexer* lex, struct ast *ast) {
 
 	// Opcode should come first.
 	switch(lex->token.type) {
-	case TOKEN_EOI: return -1;
+	case TOKEN_EOI: return 0;
 	case TOKEN_EOL: break;
 	case TOKEN_OPCODE_NOOP : ast_instr(ast, OP_NOOP);
 		match_end;
@@ -172,7 +172,7 @@ static int parse_line(struct lexer* lex, struct ast *ast) {
 		return asm_error(lex->lineno, "Opcode or label expected");
 	}
 
-	return 0;
+	return 1;
 }
 
 // Check the semantics of the program's AST.
@@ -203,7 +203,6 @@ static int check_semantics(struct ast* ast) {
  */
 int parse(FILE *source_fd, FILE *dest_fd) {
 
-	int rc;
 	struct lexer lex;
 	struct ast ast;
 
@@ -211,9 +210,13 @@ int parse(FILE *source_fd, FILE *dest_fd) {
 	lexer_init(&lex, source_fd);
 
 	// Parse and build AST.
-	do {
-		rc = parse_line(&lex, &ast);
-	} while(rc >= 0);
+	for(;;) {
+		int rc = parse_line(&lex, &ast);
+		if (rc < 0)
+			goto done;
+		if (rc == 0)
+			break;
+	}
 
 	if (check_semantics(&ast) < 0)
 		goto done;

From 1146b925f52d5ed91d25a7a9c0766b28f13cbc37 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 23:11:58 +0100
Subject: [PATCH 20/32] src/as/codegen.c: use symtab to get the address for
 J-Type instruction.

---
 src/as/codegen.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/as/codegen.c b/src/as/codegen.c
index 3d8c76c..d08a6c4 100644
--- a/src/as/codegen.c
+++ b/src/as/codegen.c
@@ -27,19 +27,14 @@ void codegen_emit(struct ast_instr* instr, symtab_t* symbols, uint8_t* out) {
 		// No operands, return
 		return;
 
-	// J-Type (We don't have labels yet, so this is just a address)
-	if (instr->operands[0].type == DATATYPE_NUMBER) {
-	//if (instr->operands[0].type == DATATYPE_STRING) {
+	// J-Type
+	if (instr->operands[0].type == DATATYPE_STRING) {
 		uint16_t addr;
 
-		/* if (symtab_get(symbols, instr->operands[0].s, &addr)) {
+		if (symtab_get(symbols, instr->operands[0].s, &addr) >= 0) {
 			out[0] |= (addr >> 8);
 			out[1]  =  addr;
-		}*/
-		addr = instr->operands[0].n;
-
-		out[0] |= (addr >> 8);
-		out[1]  =  addr;
+		}
 	}
 	// R/RI or I
 	else if (instr->operands[0].type == DATATYPE_REGISTER) {

From ad74f2d4760780774a0412b505b90b58e584cd63 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 23:12:59 +0100
Subject: [PATCH 21/32] asm/hello_world.as: we can now use labels!

---
 asm/hello_world.as | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/asm/hello_world.as b/asm/hello_world.as
index dc101c1..a146665 100644
--- a/asm/hello_world.as
+++ b/asm/hello_world.as
@@ -29,5 +29,5 @@ ld	$15, $0, 0	; Load memory address stored in R0 into R15
 int     $10, 2		; Print character
 add 	$0, $0, $1	; Add 1 (R1) to counter (R0)
 beq	$15, $2, 1	; Branch to "noop" (skipping next instruction) if R15 = 0 (R2 holds 0)
-jmp     22		; jump back to "_start" label (not implemented atm)
+jmp     _start		; jump back to "_start" label
 noop

From 0a91644879b5d128675d986ae41edf07d56a5a34 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 10 Dec 2018 23:17:07 +0100
Subject: [PATCH 22/32] Makefile: call "as" "m16as"

---
 .gitignore | 2 +-
 Makefile   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0983487..03fa769 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
 *.o
 *.a
 m16vm
-/as
+m16as
diff --git a/Makefile b/Makefile
index 4360200..b3b1679 100644
--- a/Makefile
+++ b/Makefile
@@ -3,14 +3,14 @@ CC = gcc
 CFLAGS = -g -Ilib/include -DMEM_SIZE=32 -DM16_DEBUG_MEM
 LD = $(CC)
 
-PROGRAMS = m16vm as
+PROGRAMS = m16vm m16as
 
 all: $(PROGRAMS)
 
 m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o
 	$(LD) $(LDFLAGS) -o $@ $^
 
-as : src/as/as.o src/as/parser.o src/as/lexer.o \
+m16as : src/as/as.o src/as/parser.o src/as/lexer.o \
 	src/as/codegen.o src/as/error.o src/as/symtab.o \
 	src/as/ast.o lib/libm16.a
 	$(LD) $(LDFLAGS) -o $@ $^

From 3513662ad829a841ec0a654a3de25ec4f75342d5 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Tue, 11 Dec 2018 17:53:55 +0100
Subject: [PATCH 23/32] asm: lexer.c: read_string() move all strings into a
 table

---
 src/as/lexer.c | 46 +++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/as/lexer.c b/src/as/lexer.c
index b9cb9b6..9797a05 100644
--- a/src/as/lexer.c
+++ b/src/as/lexer.c
@@ -26,6 +26,25 @@
 #define space(x) ((x) == ' ' || (x) == '\t' || (x) == '\r')
 
 
+struct opcode_ent {
+	char *  name;
+	uint8_t code;
+};
+
+static const struct opcode_ent opcode_table[] = {
+	{ "noop", TOKEN_OPCODE_NOOP },
+	{ "add" , TOKEN_OPCODE_ADD  },
+	{ "movl", TOKEN_OPCODE_MOVL },
+	{ "movh", TOKEN_OPCODE_MOVH },
+	{ "ld"  , TOKEN_OPCODE_LD   },
+	{ "sw"  , TOKEN_OPCODE_SW   },
+	{ "beq" , TOKEN_OPCODE_BEQ  },
+	{ "jmp" , TOKEN_OPCODE_JMP  },
+	{ "jr"  , TOKEN_OPCODE_JR   },
+	{ "int" , TOKEN_OPCODE_INT  },
+	{ NULL  , 0                 },
+};
+
 /**
  * Helper functions
  */
@@ -90,28 +109,13 @@ static int read_string(FILE *fp, char *buf, size_t len) {
 	}
 	buf[i] = '\0';
 
-	if (label_decl) {
+	if (label_decl)
 		return TOKEN_LABEL_DECL;
-	} else if (!strcmp("noop", buf)) {
-		return TOKEN_OPCODE_NOOP;
-	} else if (!strcmp("add", buf)) {
-		return TOKEN_OPCODE_ADD;
-	} else if (!strcmp("movl", buf)) {
-		return TOKEN_OPCODE_MOVL;
-	} else if (!strcmp("movh", buf)) {
-		return TOKEN_OPCODE_MOVH;
-	} else if (!strcmp("ld", buf)) {
-		return TOKEN_OPCODE_LD;
-	} else if (!strcmp("sw", buf)) {
-		return TOKEN_OPCODE_SW;
-	} else if (!strcmp("beq", buf)) {
-		return TOKEN_OPCODE_BEQ;
-	} else if (!strcmp("jmp", buf)) {
-		return TOKEN_OPCODE_JMP;
-	} else if (!strcmp("jr", buf)) {
-		return TOKEN_OPCODE_JR;
-	} else if (!strcmp("int", buf)) {
-		return TOKEN_OPCODE_INT;
+
+	for(i = 0; opcode_table[i].name; i++) {
+
+		if (!strcmp(opcode_table[i].name, buf))
+			return opcode_table[i].code;
 	}
 	return TOKEN_LABEL;
 }

From 1a29b3966370bbbfaa11030a31581397c3774863 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Fri, 14 Dec 2018 00:17:42 +0100
Subject: [PATCH 24/32] src/as/ast.c: bug in ast_free(). we should advance i by
 sizeof(char**).

---
 src/as/ast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/as/ast.c b/src/as/ast.c
index 392517e..bfd6983 100644
--- a/src/as/ast.c
+++ b/src/as/ast.c
@@ -17,7 +17,7 @@ void ast_free(struct ast* ast) {
 	int i;
 
 	// Free all label strings
-	for(int i = 0; i < ast->labels.size; i++) {
+	for(int i = 0; i < ast->labels.size; i += sizeof(char**)) {
 		char *ptr = *((char**) ast->labels.base + i);
 		free(ptr);
 	}

From de29a981bdfb26ab77ecf56eb502cdf978da5deb Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Fri, 14 Dec 2018 00:35:09 +0100
Subject: [PATCH 25/32] src/as/lexer.c: implement hexadecimal numbers.

---
 src/as/lexer.c | 56 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 5 deletions(-)

diff --git a/src/as/lexer.c b/src/as/lexer.c
index 9797a05..786efa3 100644
--- a/src/as/lexer.c
+++ b/src/as/lexer.c
@@ -70,14 +70,34 @@ static int read_next(struct lexer *lex) {
  	return c;
  }
 
-static int read_number(FILE *fp) {
+static int read_hex(FILE *fp) {
+
+	int c, val = 0;
 
-	int c, neg = 0, val = 0;
 	while((c = fgetc(fp)) != EOF) {
-		if (neg == 0 && c == '-') {
-			neg = 1;
-			continue;
+		char n = 0;
+		if (number(c)) {
+			n = c - '0';
 		}
+		else if (  (c >= 'a' && c <= 'f')
+			|| (c >= 'A' && c <= 'F')) {
+			n = (c % 0x20) + 9;
+		}
+		else {
+			ungetc(c, fp);
+			break;
+		}
+
+		val = (val * 16) + n;
+	}
+	return val;
+}
+
+static int read_dec(FILE *fp, int neg) {
+
+	int c, val = 0;
+
+	while((c = fgetc(fp)) != EOF) {
 		if (!number(c)) {
 			ungetc(c, fp);
 			break;
@@ -90,6 +110,32 @@ static int read_number(FILE *fp) {
 	return val;
 }
 
+static int read_number(FILE *fp) {
+
+	int neg = 0, c = fgetc(fp);
+
+	// Check for '0x'.
+	if (c == '0') {
+		c = fgetc(fp);
+		if (c == 'x') {
+			// We have a hexadecimal number.
+			return read_hex(fp);
+		}
+		ungetc(c, fp);
+		ungetc('0', fp);
+	}
+	// While we are at it. check for a negative sign.
+	else if (c == '-') {
+		neg = 1;
+	}
+	// We got something else. put it back.
+	else {
+		ungetc(c, fp);
+	}
+
+	return read_dec(fp, neg);
+}
+
 static int read_string(FILE *fp, char *buf, size_t len) {
 
 	int c, label_decl = 0, i = 0;

From 2c734dc3005758df2691de6b298ba2c1b7b47d2d Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Fri, 14 Dec 2018 00:35:33 +0100
Subject: [PATCH 26/32] asm/hello_world.as: use some hex!

---
 asm/hello_world.as | 54 +++++++++++++++++++++++-----------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/asm/hello_world.as b/asm/hello_world.as
index a146665..f5f5b25 100644
--- a/asm/hello_world.as
+++ b/asm/hello_world.as
@@ -1,33 +1,33 @@
 
 ; -- Store "Hello World" string in memory.
-movl 	$1, 72 ; H
-sw	$0, $1, 0
-movl 	$1, 69 ; E
-sw	$0, $1, 1
-movl 	$1, 76 ; L
-sw	$0, $1, 2
-sw	$0, $1, 3
-movl 	$1, 79 ; O
-sw	$0, $1, 4
-movl 	$1, 32 ; Space
-sw	$0, $1, 5
-movl 	$1, 87 ; W
-sw	$0, $1, 6
-movl 	$1, 79 ; O
-sw	$0, $1, 7
-movl 	$1, 82 ; R
-sw	$0, $1, 8
-movl 	$1, 76 ; L
-sw	$0, $1, 9
-movl 	$1, 68 ; D
-sw	$0, $1, 10
+movl 	$0x1, 72 ; H
+sw	$0x0, $0x1, 0
+movl 	$0x1, 69 ; E
+sw	$0x0, $0x1, 1
+movl 	$0x1, 76 ; L
+sw	$0x0, $0x1, 2
+sw	$0x0, $0x1, 3
+movl 	$0x1, 79 ; O
+sw	$0x0, $0x1, 4
+movl 	$0x1, 32 ; Space
+sw	$0x0, $0x1, 5
+movl 	$0x1, 87 ; W
+sw	$0x0, $0x1, 6
+movl 	$0x1, 79 ; O
+sw	$0x0, $0x1, 7
+movl 	$0x1, 82 ; R
+sw	$0x0, $0x1, 8
+movl 	$0x1, 76 ; L
+sw	$0x0, $0x1, 9
+movl	$0x1, 68 ; D
+sw	$0x0, $0x1, 10
 
 ; -- setup print loop.
-movl 	$1, 1		; Load 1 in R1 (used for increment the counter)
+movl 	$0x1, 1		; Load 1 in R1 (used for increment the counter)
 _start:
-ld	$15, $0, 0	; Load memory address stored in R0 into R15
-int     $10, 2		; Print character
-add 	$0, $0, $1	; Add 1 (R1) to counter (R0)
-beq	$15, $2, 1	; Branch to "noop" (skipping next instruction) if R15 = 0 (R2 holds 0)
-jmp     _start		; jump back to "_start" label
+ld	$0xF, $0x0, 0		; Load memory address stored in R0 into R15
+int	$0xA, 2			; Print character
+add	$0x0, $0x0, $0x1	; Add 1 (R1) to counter (R0)
+beq	$0xF, $0x2, 1		; Branch to "noop" (skipping next instruction) if R15 = 0 (R2 holds 0)
+jmp     _start			; jump back to "_start" label
 noop

From 2c1c8324937a13fca67c3286f86b28685bd11d80 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Fri, 14 Dec 2018 14:04:52 +0100
Subject: [PATCH 27/32] adding asm/mov_test.as

---
 asm/mov_test.as | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 asm/mov_test.as

diff --git a/asm/mov_test.as b/asm/mov_test.as
new file mode 100644
index 0000000..3b83abe
--- /dev/null
+++ b/asm/mov_test.as
@@ -0,0 +1,15 @@
+
+; mov h/l test.
+; Using 2 instructions to store a 16-bit words is abit tricky to write code for
+
+; Storing 32767 = (2^15) - 1 (highest value in 2's complement 16-bit).
+; MSB (signed flag) = 0, rest 1.
+; H [0111 1111] L [1111 1111]
+movl	$0x0, -1
+movh    $0x0, 127
+
+; Storing -32768 = (2^15) (highest value in 2's complement 16-bit).
+; MSB (signed flag) = 1, rest 0.
+; H [1000 0000] L [0000 0000]
+movl	$0x1, 0
+movh    $0x1, -128

From e75349e7f96a1248268488f9c86c35debc9de2a5 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 17 Dec 2018 07:27:52 +0100
Subject: [PATCH 28/32] src/as/lexer.h: in struct token: integer value can max
 be 8 bits wide.

---
 src/as/lexer.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/as/lexer.h b/src/as/lexer.h
index e2a5319..cea04d7 100644
--- a/src/as/lexer.h
+++ b/src/as/lexer.h
@@ -62,8 +62,8 @@ struct token {
 	 * this can be a string or unsigned short
 	 */
 	union {
-		uint16_t n;
-		char	 s[32];
+		int8_t n;
+		char   s[32];
 	} value;
 };
 

From 74ecdfc2ab1c50785bb3897f3af0701ac9771693 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 17 Dec 2018 21:34:57 +0100
Subject: [PATCH 29/32] src/as/lexer.c: guard against integer overflow
 (emitting a warning)

---
 src/as/lexer.c | 55 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/src/as/lexer.c b/src/as/lexer.c
index 786efa3..1bf6f28 100644
--- a/src/as/lexer.c
+++ b/src/as/lexer.c
@@ -70,7 +70,7 @@ static int read_next(struct lexer *lex) {
  	return c;
  }
 
-static int read_hex(FILE *fp) {
+static int read_hex(FILE *fp, int *out) {
 
 	int c, val = 0;
 
@@ -89,11 +89,18 @@ static int read_hex(FILE *fp) {
 		}
 
 		val = (val * 16) + n;
+		if (val > 0xFF)
+			goto overflow;
 	}
-	return val;
+	*out = val;
+	return 0;
+
+overflow:
+	*out = 0xFF;
+	return -1;
 }
 
-static int read_dec(FILE *fp, int neg) {
+static int read_dec(FILE *fp, int neg, int *out) {
 
 	int c, val = 0;
 
@@ -103,14 +110,23 @@ static int read_dec(FILE *fp, int neg) {
 			break;
 		}
 		val = (val * 10) + (c - '0');
+
+		// Cool trick here.
+		// because the range is -128 (0x80) to +127 (0x7F)
+		// We can do 0x80 - 1 if it is NOT a negative number.
+		if (val > (0x80 - !neg))
+			goto overflow;
 	}
 
-	if (neg)
-		return -1 * val;
-	return val;
+	*out = neg ? -1 * val : val;
+	return 0;
+
+overflow:
+	*out = neg ? -1 * 0x80 : 0x7F;
+	return -1;
 }
 
-static int read_number(FILE *fp) {
+static int read_number(FILE *fp, int *out) {
 
 	int neg = 0, c = fgetc(fp);
 
@@ -119,7 +135,7 @@ static int read_number(FILE *fp) {
 		c = fgetc(fp);
 		if (c == 'x') {
 			// We have a hexadecimal number.
-			return read_hex(fp);
+			return read_hex(fp, out);
 		}
 		ungetc(c, fp);
 		ungetc('0', fp);
@@ -133,7 +149,18 @@ static int read_number(FILE *fp) {
 		ungetc(c, fp);
 	}
 
-	return read_dec(fp, neg);
+	return read_dec(fp, neg, out);
+}
+
+static int parse_number(struct lexer *lex) {
+
+	int num;
+
+	if (read_number(lex->fp, &num) < 0)
+		fprintf(stderr, "WARNING: Value truncated on line: %i\n", lex->lineno);
+
+	lex->token.value.n = num;
+	return 0;
 }
 
 static int read_string(FILE *fp, char *buf, size_t len) {
@@ -195,19 +222,15 @@ int lexer_get_next(struct lexer *lex) {
 		break;
 	case '$' :
 		lex->token.type = TOKEN_REG;
-		num = read_number(lex->fp);
-		// Registers is 8-bit only.
-		if (num > 0xF) {
-			fprintf(stderr, "ERROR: Invalid register value '%i' on line: %i\n", num, lex->lineno);
+		if (parse_number(lex) < 0)
 			return -1;
-		}
-		lex->token.value.n = num;
 		break;
 	default:
 		if (first_number(ch)) {
 			ungetc(ch, lex->fp);
 			lex->token.type = TOKEN_NUMBER;
-			lex->token.value.n = read_number(lex->fp);
+			if (parse_number(lex) < 0)
+				return -1;
 		} else if (first_string(ch)) {
 			char buf[32];
 			ungetc(ch, lex->fp);

From 2e66ffb9a532a0b87a1c5509ca742f055178d4f4 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 17 Dec 2018 23:19:52 +0100
Subject: [PATCH 30/32] src/as/parser.c: check that numbers are in the allowed
 range.

---
 src/as/parser.c | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/as/parser.c b/src/as/parser.c
index 7856e72..4481e5a 100644
--- a/src/as/parser.c
+++ b/src/as/parser.c
@@ -30,6 +30,12 @@
  * Helper functions/macros for defining parser rules.
  */
 
+enum number_size {
+	NUMBER_SIZE_U4,
+	NUMBER_SIZE_S4,
+	NUMBER_SIZE_S8,
+};
+
 // match the next token.
 // returns 0 if the token was of the correct type. -1 otherwise
 static int match_type(struct lexer* lex, enum token_type type) {
@@ -38,6 +44,27 @@ static int match_type(struct lexer* lex, enum token_type type) {
 	return lex->token.type == type ? 0 : -1;
 }
 
+static int validate_number(struct lexer* lex, enum number_size size) {
+
+	int8_t n = lex->token.value.n;
+
+	switch(size) {
+	case NUMBER_SIZE_U4 :
+		if (!(n >= 0x0 && n <= 0xF))
+			return asm_error(lex->lineno, "Value out of range %u", (uint8_t) n);
+		break;
+	case NUMBER_SIZE_S4 :
+		if (!(n >= -8 && n < 8))
+			return asm_error(lex->lineno, "Value out of range %i", n);
+		break;
+	case NUMBER_SIZE_S8 :
+		if (!(n >= -128 && n< 128))
+			return asm_error(lex->lineno, "Value out of range %i", n);
+		break;
+	}
+	return 0;
+}
+
 // Same as match_type() but also generates a operand in the AST.
 static int match_operand(struct lexer* lex, enum token_type type, struct ast *ast) {
 
@@ -59,11 +86,11 @@ static int match_operand(struct lexer* lex, enum token_type type, struct ast *as
  * Helper macros for matching tokens.
  */
 #define match_reg(pos, ast) \
-	if (match_operand(lex, TOKEN_REG, ast) < 0) \
+	if (match_operand(lex, TOKEN_REG, ast) < 0 || validate_number(lex, NUMBER_SIZE_U4) < 0) \
 		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
 
-#define match_imm(pos, ast)  \
-	if (match_operand(lex, TOKEN_NUMBER, ast) < 0) \
+#define match_imm(pos, size, ast)  \
+	if (match_operand(lex, TOKEN_NUMBER, ast) < 0 || validate_number(lex, size) < 0) \
 		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
 
 #define match_arg(pos) \
@@ -94,7 +121,7 @@ static int match_typeRI(struct lexer* lex, struct ast *ast) {
 
 	match_reg(1, ast); match_arg(1);
 	match_reg(2, ast); match_arg(2);
-	match_imm(3, ast);
+	match_imm(3, NUMBER_SIZE_S4, ast);
 	match_end;
 
 	return 1;
@@ -104,7 +131,7 @@ static int match_typeRI(struct lexer* lex, struct ast *ast) {
 static int match_typeI(struct lexer* lex, struct ast *ast) {
 
 	match_reg(1, ast); match_arg(1);
-	match_imm(2, ast);
+	match_imm(2, NUMBER_SIZE_S8, ast);
 	match_end;
 
 	return 1;

From 960d6f2e0db0a34b5f9a6922cba12e291f62d75f Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Mon, 17 Dec 2018 23:27:13 +0100
Subject: [PATCH 31/32] src/as/lexer.c: use error.c

---
 src/as/lexer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/as/lexer.c b/src/as/lexer.c
index 1bf6f28..53bf8b9 100644
--- a/src/as/lexer.c
+++ b/src/as/lexer.c
@@ -1,6 +1,7 @@
 
 #include <stdio.h>
 #include <string.h>
+#include "error.h"
 #include "lexer.h"
 
 /**
@@ -157,7 +158,7 @@ static int parse_number(struct lexer *lex) {
 	int num;
 
 	if (read_number(lex->fp, &num) < 0)
-		fprintf(stderr, "WARNING: Value truncated on line: %i\n", lex->lineno);
+		asm_warn(lex->lineno, "Value truncated to %i", num);
 
 	lex->token.value.n = num;
 	return 0;
@@ -238,8 +239,7 @@ int lexer_get_next(struct lexer *lex) {
 			if (lex->token.type == TOKEN_LABEL_DECL || lex->token.type == TOKEN_LABEL)
 				strcpy(lex->token.value.s, buf);
 		} else {
-			fprintf(stderr, "ERROR: Invalid character '%c' on line: %i\n", ch, lex->lineno);
-			return -1;
+			return asm_error(lex->lineno, "Invalid character '%c'", ch);
 		}
 	}
 

From 993a1cbd74801fa22cc43f74b2e45220dfdefda1 Mon Sep 17 00:00:00 2001
From: Henrik Hautakoski <henrik.hautakoski@gmail.com>
Date: Tue, 18 Dec 2018 09:16:26 +0100
Subject: [PATCH 32/32] src/as/parser.c: fix typos.

---
 src/as/parser.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/as/parser.c b/src/as/parser.c
index 4481e5a..1b4eb6e 100644
--- a/src/as/parser.c
+++ b/src/as/parser.c
@@ -87,7 +87,7 @@ static int match_operand(struct lexer* lex, enum token_type type, struct ast *as
  */
 #define match_reg(pos, ast) \
 	if (match_operand(lex, TOKEN_REG, ast) < 0 || validate_number(lex, NUMBER_SIZE_U4) < 0) \
-		return asm_error((lex)->lineno, "Expected number at argument %i", pos)
+		return asm_error((lex)->lineno, "Expected register at argument %i", pos)
 
 #define match_imm(pos, size, ast)  \
 	if (match_operand(lex, TOKEN_NUMBER, ast) < 0 || validate_number(lex, size) < 0) \
@@ -105,7 +105,7 @@ static int match_operand(struct lexer* lex, enum token_type type, struct ast *as
  * Functions for matching complete instructions.
  */
 
-// R-Type (rs : u8, r0 : u8, r1 : u8)
+// R-Type (rs : u4, r0 : u4, r1 : u4)
 static int match_typeR(struct lexer* lex, struct ast *ast) {
 
 	match_reg(1, ast); match_arg(1);
@@ -116,7 +116,7 @@ static int match_typeR(struct lexer* lex, struct ast *ast) {
 	return 1;
 }
 
-// RI-Type (rs : u8, r0 : u8, offset : s8)
+// RI-Type (rs : u4, r0 : u4, offset : s4)
 static int match_typeRI(struct lexer* lex, struct ast *ast) {
 
 	match_reg(1, ast); match_arg(1);
@@ -127,7 +127,7 @@ static int match_typeRI(struct lexer* lex, struct ast *ast) {
 	return 1;
 }
 
-// I-Type (rs : u8, imm : s8)
+// I-Type (rs : u4, imm : s8)
 static int match_typeI(struct lexer* lex, struct ast *ast) {
 
 	match_reg(1, ast); match_arg(1);
@@ -146,7 +146,6 @@ static int match_typeJ(struct lexer* lex, struct ast *ast) {
 	return 1;
 }
 
-// Match \n*<opcode>
 static int match_label_decl(struct lexer* lex, struct ast *ast) {
 
 	uint16_t location = ast->instr.size / sizeof(struct ast_instr);