From 377c008ebeb5894586397eab9ca7e059bb735b67 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Sat, 10 Nov 2018 12:05:10 +0100 Subject: [PATCH 01/32] asm: lexer implementation. --- .gitignore | 1 + Makefile | 14 +++- src/as/as.c | 54 ++++++++++++ src/as/lexer.c | 221 +++++++++++++++++++++++++++++++++++++++++++++++++ src/as/lexer.h | 95 +++++++++++++++++++++ 5 files changed, 381 insertions(+), 4 deletions(-) create mode 100644 src/as/as.c create mode 100644 src/as/lexer.c create mode 100644 src/as/lexer.h diff --git a/.gitignore b/.gitignore index 563e7eb..b979155 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.o m16vm +/as diff --git a/Makefile b/Makefile index 422bd6f..45518e7 100644 --- a/Makefile +++ b/Makefile @@ -3,13 +3,19 @@ CC = gcc CFLAGS = -Iinclude -DMEM_SIZE=32 -DM16_DEBUG_MEM LD = $(CC) -VM = m16vm +PROGRAMS = m16vm as -$(VM) : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o - $(LD) $(LDFLAGS)-o $@ $^ +all: $(PROGRAMS) + +m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o + $(LD) $(LDFLAGS) -o $@ $^ + +as : src/as/as.o src/as/lexer.o + $(LD) $(LDFLAGS) -o $@ $^ clean : $(RM) src/*.o + $(RM) src/as/*.o distclean : clean - $(RM) $(VM) + $(RM) $(PROGRAMS) diff --git a/src/as/as.c b/src/as/as.c new file mode 100644 index 0000000..3e2052e --- /dev/null +++ b/src/as/as.c @@ -0,0 +1,54 @@ +/* as.c + * + * Copyright (C) 2012 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include "lexer.h" + +int usage(char *program) { + + fprintf(stderr, "Usage: %s \n", program); + return -1; +} + +int main(int argc, char **argv) { + + FILE *fd; + struct lexer lex; + + if (argc < 2) + return usage(argv[0]); + + fd = fopen(argv[1], "r"); + if (fd == NULL) + return -1; + + lexer_init(&lex, fd); + + do { + if (lexer_get_next(&lex) < 0) + break; + + lexer_print_token(&lex.token); + } while(lex.token.type != TOKEN_EOI); + + + fclose(fd); + return 0; +} diff --git a/src/as/lexer.c b/src/as/lexer.c new file mode 100644 index 0000000..7a1f7aa --- /dev/null +++ b/src/as/lexer.c @@ -0,0 +1,221 @@ + +#include +#include +#include "lexer.h" + +/** + * macros for the grammar. + */ + +// Numbers is defined as [0-9] +#define number(x) ((x) >= '0' && (x) <= '9') + +// The first digit can however also contain '-' +#define first_number(x) (number(x) || (x) == '-' ) + +// First character in strings can be [a-z][A-Z] or '_' +#define first_string(x) \ + ( ((x) >= 'a' && (x) <= 'z') \ + || ((x) >= 'A' && (x) <= 'Z') \ + || (x) == '_' ) + +// All characters after can also include numbers or ':' +#define string(x) \ + (first_string(x) || number(x)) + +#define space(x) ((x) == ' ' || (x) == '\t' || (x) == '\r') + + +/** + * Helper functions + */ + +static int read_next(struct lexer *lex) { + + int c, comment = 0; + + while((c = fgetc(lex->fp)) != EOF) { + + if (c == '\n') + break; + + if (comment) + continue; + + if (c == ';') { + comment = 1; + } else if (!space(c)) { + break; + } + } + return c; + } + +static int read_number(FILE *fp) { + + int c, neg = 0, val = 0; + while((c = fgetc(fp)) != EOF) { + if (neg == 0 && c == '-') { + neg = 1; + continue; + } + if (!number(c)) { + ungetc(c, fp); + break; + } + val = (val * 10) + (c - '0'); + } + + if (neg) + return -1 * val; + return val; +} + +static int read_string(FILE *fp) { + + int c, label_decl = 0, i = 0; + char buf[64]; + + while((c = fgetc(fp)) != EOF && i < 64) { + + if (string(c)) { + buf[i++] = c; + } else { + if (c == ':') { + label_decl = 1; + } else { + ungetc(c, fp); + } + break; + } + } + buf[i] = '\0'; + + if (label_decl) { + return TOKEN_LABEL_DECL; + } else if (!strcmp("noop", buf)) { + return TOKEN_OPCODE_NOOP; + } else if (!strcmp("add", buf)) { + return TOKEN_OPCODE_ADD; + } else if (!strcmp("movl", buf)) { + return TOKEN_OPCODE_MOVL; + } else if (!strcmp("movh", buf)) { + return TOKEN_OPCODE_MOVH; + } else if (!strcmp("ld", buf)) { + return TOKEN_OPCODE_LD; + } else if (!strcmp("sw", buf)) { + return TOKEN_OPCODE_SW; + } else if (!strcmp("beq", buf)) { + return TOKEN_OPCODE_BEQ; + } else if (!strcmp("jmp", buf)) { + return TOKEN_OPCODE_JMP; + } else if (!strcmp("jr", buf)) { + return TOKEN_OPCODE_JR; + } else if (!strcmp("int", buf)) { + return TOKEN_OPCODE_INT; + } + return TOKEN_LABEL; +} + +/** + * Exposed functions + */ + +void lexer_init(struct lexer *lex, FILE *fp) { + + lex->lineno = 1; + lex->fp = fp; + lex->token.type = TOKEN_EOI; +} + +int lexer_get_next(struct lexer *lex) { + + uint16_t num; + int ch = read_next(lex); + + if (lex->token.type == TOKEN_EOL) + lex->lineno++; + + switch(ch) { + case EOF : lex->token.type = TOKEN_EOI; + break; + case '\n' : + lex->token.type = TOKEN_EOL; + break; + case ',' : lex->token.type = TOKEN_ARG_SEP; + break; + case '$' : + lex->token.type = TOKEN_REG; + num = read_number(lex->fp); + // Registers is 8-bit only. + if (num > 0xF) { + fprintf(stderr, "ERROR: Invalid register value '%i' on line: %i\n", num, lex->lineno); + return -1; + } + lex->token.value.n = num; + break; + default: + if (first_number(ch)) { + ungetc(ch, lex->fp); + lex->token.type = TOKEN_NUMBER; + lex->token.value.n = read_number(lex->fp); + } else if (first_string(ch)) { + ungetc(ch, lex->fp); + lex->token.type = read_string(lex->fp); + } else { + fprintf(stderr, "ERROR: Invalid character '%c' on line: %i\n", ch, lex->lineno); + return -1; + } + } + + lex->token.lineno = lex->lineno; + return 0; +} + +void lexer_print_token(struct token *token) { + + static int lineno = 0; + + if (token->lineno != lineno) { + lineno = token->lineno; + printf("\n%i: ", lineno); + } + + switch(token->type) { + case TOKEN_OPCODE_NOOP : printf(" [OP NOOP] "); + break; + case TOKEN_OPCODE_ADD : printf(" [OP ADD] "); + break; + case TOKEN_OPCODE_MOVL : printf(" [OP MOVL] "); + break; + case TOKEN_OPCODE_MOVH : printf(" [OP MOVH] "); + break; + case TOKEN_OPCODE_LD : printf(" [OP LD] "); + break; + case TOKEN_OPCODE_SW : printf(" [OP SW] "); + break; + case TOKEN_OPCODE_BEQ : printf(" [OP BEQ] "); + break; + case TOKEN_OPCODE_JMP : printf(" [OP JMP] "); + break; + case TOKEN_OPCODE_JR : printf(" [OP JR] "); + break; + case TOKEN_OPCODE_INT : printf(" [OP INT] "); + break; + case TOKEN_LABEL : printf(" [LABEL] "); + break; + case TOKEN_LABEL_DECL : printf(" [LABEL DECL] "); + break; + case TOKEN_REG : printf(" [REG %i] ", token->value.n); + break; + case TOKEN_ARG_SEP : printf(" [SEP] "); + break; + case TOKEN_NUMBER : printf(" [NUM %i] ", token->value.n); + break; + case TOKEN_EOI : printf(" [EOI] "); + break; + case TOKEN_EOL : printf(" [EOL] "); + break; + default: printf(" [U] "); + } +} diff --git a/src/as/lexer.h b/src/as/lexer.h new file mode 100644 index 0000000..e2a5319 --- /dev/null +++ b/src/as/lexer.h @@ -0,0 +1,95 @@ +/* lexer.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_LEXER_H +#define ASM_LEXER_H + +#include +#include + +/** + * All token types. + */ +enum token_type { + TOKEN_EOI = -1, + TOKEN_EOL, // Newline + TOKEN_OPCODE_NOOP, + TOKEN_OPCODE_ADD, + TOKEN_OPCODE_MOVL, + TOKEN_OPCODE_MOVH, + TOKEN_OPCODE_LD, + TOKEN_OPCODE_SW, + TOKEN_OPCODE_BEQ, + TOKEN_OPCODE_JMP, + TOKEN_OPCODE_JR, + TOKEN_OPCODE_INT, + TOKEN_LABEL, + TOKEN_LABEL_DECL, + TOKEN_REG, + TOKEN_NUMBER, + TOKEN_ARG_SEP +}; + +/** + * Token structure. + * + * Holds information about a single token. + */ +struct token { + // Line number where the token was extracted from. + uint16_t lineno; + + enum token_type type; + + /* + * Token value, depending on type + * this can be a string or unsigned short + */ + union { + uint16_t n; + char s[32]; + } value; +}; + +/** + * Lexer state + */ +struct lexer { + uint16_t lineno; // current line number + FILE * fp; // File being lexed. + struct token token; // Current token +}; + +/** + * Initialize the lexer with a file pointer to the file + * that should be lexed. + */ +void lexer_init(struct lexer *lex, FILE *fp); + +/** + * Advance the lexer to the next token. + */ +int lexer_get_next(struct lexer *lex); + +/** + * For debugging, prints the token to standard output. + */ +void lexer_print_token(struct token *token); + +#endif /* ASM_LEXER_H */ From d530dfda63ced48402e58849450fa0b04263ae61 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Sat, 24 Nov 2018 17:45:20 +0100 Subject: [PATCH 02/32] include/instr.h: adding a special OP_NONE opcode. --- include/instr.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/instr.h b/include/instr.h index 4f776e5..4dfe36c 100644 --- a/include/instr.h +++ b/include/instr.h @@ -33,6 +33,7 @@ #define OP_JMP 7 #define OP_JR 8 #define OP_INT 15 +#define OP_NONE 0xFF /* Register type */ struct instr_R { From 95f2e38c2d54c5be37c54386831138e9eded144f Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Sat, 24 Nov 2018 19:57:10 +0100 Subject: [PATCH 03/32] asm: adding error module --- src/as/error.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ src/as/error.h | 27 +++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 src/as/error.c create mode 100644 src/as/error.h diff --git a/src/as/error.c b/src/as/error.c new file mode 100644 index 0000000..704658b --- /dev/null +++ b/src/as/error.c @@ -0,0 +1,46 @@ +/* error.c + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include "error.h" + +void asm_warn(int lineno, const char *fmt, ...) { + + va_list vl; + + fprintf(stderr, "Line %i: Warning: ", lineno); + va_start(vl, fmt); + vfprintf(stderr, fmt, vl); + va_end(vl); + fprintf(stderr, "\n"); +} + +int asm_error(int lineno, const char *fmt, ...) { + + va_list vl; + + fprintf(stderr, "Line %i: Error: ", lineno); + va_start(vl, fmt); + vfprintf(stderr, fmt, vl); + va_end(vl); + fprintf(stderr, "\n"); + + return -1; +} diff --git a/src/as/error.h b/src/as/error.h new file mode 100644 index 0000000..70ebb71 --- /dev/null +++ b/src/as/error.h @@ -0,0 +1,27 @@ +/* error.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_ERROR_H +#define ASM_ERROR_H + +void asm_warn(int lineno, const char *fmt, ...); + +int asm_error(int lineno, const char *fmt, ...); + +#endif /* ASM_ERROR_H */ From 16501e40befcc136cd5e002878c1260ff0aa7efb Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Sat, 24 Nov 2018 19:57:36 +0100 Subject: [PATCH 04/32] asm: adding instr_encode module --- src/as/instr_encode.c | 50 +++++++++++++++++++++++++++++++++++++++++++ src/as/instr_encode.h | 27 +++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 src/as/instr_encode.c create mode 100644 src/as/instr_encode.h diff --git a/src/as/instr_encode.c b/src/as/instr_encode.c new file mode 100644 index 0000000..1478928 --- /dev/null +++ b/src/as/instr_encode.c @@ -0,0 +1,50 @@ +/* instr_encode.c + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include "instr_encode.h" + +void instr_encode(struct instr* instr, uint16_t *out) { + + uint8_t *p = (uint8_t *) out; + + *out = 0; + *p = instr->opcode << 4; + + if (instr->opcode == OP_NOOP) { + // Do nothing. + } else if (instr->opcode == OP_JMP) { + *p |= (instr->j.addr >> 8); + *(p+1) = instr->j.addr; + } else { + *p |= instr->r.rs; + + // I-Type + if (instr->opcode == OP_MOVL || instr->opcode == OP_MOVH + || instr->opcode == OP_JR || instr->opcode == OP_INT) { + + *(p+1) = instr->i.imm; + } + // R/RI-Type + else { + *(p+1) = (instr->r.r0 << 4) | instr->r.r1; + } + } +} diff --git a/src/as/instr_encode.h b/src/as/instr_encode.h new file mode 100644 index 0000000..0f01304 --- /dev/null +++ b/src/as/instr_encode.h @@ -0,0 +1,27 @@ +/* instr_encode.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_INSTR_ENCODE_H +#define ASM_INSTR_ENCODE_H + +#include + +void instr_encode(struct instr* instr, uint16_t *out); + +#endif /* ASM_INSTR_ENCODE_H */ From 3528c467ac8f81c23cd3f8944fccc40b9d25a0ee Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Sat, 24 Nov 2018 19:58:13 +0100 Subject: [PATCH 05/32] asm: adding the parser --- Makefile | 2 +- src/as/as.c | 38 ++++++---- src/as/parser.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++++ src/as/parser.h | 33 ++++++++ 4 files changed, 254 insertions(+), 17 deletions(-) create mode 100644 src/as/parser.c create mode 100644 src/as/parser.h diff --git a/Makefile b/Makefile index 45518e7..6fe2b9c 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ all: $(PROGRAMS) m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o $(LD) $(LDFLAGS) -o $@ $^ -as : src/as/as.o src/as/lexer.o +as : src/as/as.o src/as/parser.o src/as/lexer.o src/as/instr_encode.o src/as/error.o $(LD) $(LDFLAGS) -o $@ $^ clean : diff --git a/src/as/as.c b/src/as/as.c index 3e2052e..c7aa24c 100644 --- a/src/as/as.c +++ b/src/as/as.c @@ -19,36 +19,42 @@ */ #include #include -#include "lexer.h" +#include "parser.h" int usage(char *program) { - fprintf(stderr, "Usage: %s \n", program); + fprintf(stderr, "Usage: %s [ 2) { + fd_out = fopen(argv[2], "w"); + if (fd_out == NULL) { + perror("Could not open output file"); + fclose(fd_in); + return -1; + } + } + + parse(fd_in, fd_out); + + fclose(fd_in); + fclose(fd_out); return 0; } diff --git a/src/as/parser.c b/src/as/parser.c new file mode 100644 index 0000000..7b95283 --- /dev/null +++ b/src/as/parser.c @@ -0,0 +1,198 @@ +/* parser.c + * + * Copyright (C) 2012 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include +#include "error.h" +#include "instr_encode.h" +#include "lexer.h" +#include "parser.h" + +/* + * Helper functions/macros for defining parser rules. + */ + +// match the next token. +// returns 0 if the token was of the correct type. -1 otherwise +static int match_type(struct lexer* lex, enum token_type type) { + + lexer_get_next(lex); + return lex->token.type == type ? 0 : -1; +} + +// Same as match_type() but extracts a number from the token. +static int match_type_num(struct lexer* lex, enum token_type type, + void* out, size_t size) { + + if (match_type(lex, type) < 0) + return -1; + + memcpy(out, &lex->token.value.n, size); + return 0; +} + +/* + * Helper macros for matching tokens. + */ +#define match_reg(pos, out) \ + if (match_type_num(lex, TOKEN_REG, out, sizeof(uint8_t))) \ + return asm_error((lex)->lineno, "Expected number at argument %i", pos) + +#define match_imm(pos, out) \ + if (match_type_num(lex, TOKEN_NUMBER, out, sizeof(int16_t)) < 0) \ + return asm_error((lex)->lineno, "Expected number at argument %i", pos) + +#define match_arg(pos) \ + if (match_type(lex, TOKEN_ARG_SEP) < 0) \ + return asm_error((lex)->lineno, "Expected separator after argument %i", pos) + +#define match_end \ + if (match_type(lex, TOKEN_EOL) < 0) \ + return asm_error(lex->lineno, "Expected newline") + +/* + * Functions for matching complete instructions. + */ + +// R-Type (rs : u8, r0 : u8, r1 : u8) +static int match_typeR(struct instr_R *instr, struct lexer* lex) { + + match_reg(1, &instr->rs); match_arg(1); + match_reg(2, &instr->r0); match_arg(2); + match_reg(3, &instr->r1); + match_end; + + return 0; +} + +// RI-Type (rs : u8, r0 : u8, offset : s8) +static int match_typeRI(struct instr_RI *instr, struct lexer* lex) { + + match_reg(1, &instr->rs); match_arg(1); + match_reg(2, &instr->r0); match_arg(2); + match_imm(3, &instr->offset); + match_end; + + return 1; +} + +// I-Type (rs : u8, imm : s8) +static int match_typeI(struct instr_I *instr, struct lexer* lex) { + + match_reg(1, &instr->rs); match_arg(1); + match_imm(2, &instr->imm); + match_end; + + return 1; +} + +// J-Type (rs : u8, addr : u16) +static int match_typeJ(struct instr_J *instr, struct lexer* lex) { + + match_imm(1, &instr->addr); + match_end; + return 1; +} + +#define opcode_guard(op, v) \ + if (op == OP_NONE) op = v + +/* + * Parse a single line. + */ +static int parse_line(struct lexer* lex, struct instr *instr) { + + instr->opcode = OP_NONE; + + if (lexer_get_next(lex) < 0) + return -1; + + // Opcode should come first. + switch(lex->token.type) { + case TOKEN_EOI: return -1; + case TOKEN_EOL: break; + case TOKEN_OPCODE_NOOP : instr->opcode = OP_NOOP; + match_end; + break; + // Type-R + case TOKEN_OPCODE_ADD : opcode_guard(instr->opcode, OP_ADD); + return match_typeR(&instr->r, lex); + // Type-I + case TOKEN_OPCODE_MOVL : opcode_guard(instr->opcode, OP_MOVL); + case TOKEN_OPCODE_MOVH : opcode_guard(instr->opcode, OP_MOVH); + case TOKEN_OPCODE_JR : opcode_guard(instr->opcode, OP_JR); + case TOKEN_OPCODE_INT : opcode_guard(instr->opcode, OP_INT); + return match_typeI(&instr->i, lex); + // Type-RI + case TOKEN_OPCODE_LD : opcode_guard(instr->opcode, OP_LW); + case TOKEN_OPCODE_SW : opcode_guard(instr->opcode, OP_SW); + case TOKEN_OPCODE_BEQ : opcode_guard(instr->opcode, OP_BEQ); + return match_typeRI(&instr->ri, lex); + // Type-J + case TOKEN_OPCODE_JMP : opcode_guard(instr->opcode, OP_JMP); + return match_typeJ(&instr->j, lex); + case TOKEN_LABEL_DECL : + asm_warn(lex->lineno, "labels are not supported yet. ignoring."); + break; + default: + return asm_error(lex->lineno, "Opcode or label expected"); + } + + return 0; +} + +static int gencode(FILE *fd, struct instr *instructions, int len) { + + uint16_t buf; // 2-bytes (16-bit) per instruction. + + for(int i = 0; i < len; i++) { + instr_encode(instructions + i, &buf); + + fwrite(&buf, sizeof(buf), 1, fd); + } +} + +/* + * Main parser function. + */ +int parse(FILE *source_fd, FILE *dest_fd) { + + int rc; + struct lexer lex; + struct instr instr[256]; + int n = 0; + + lexer_init(&lex, source_fd); + + do { + rc = parse_line(&lex, instr + n); + + if (instr[n].opcode != OP_NONE) { + n++; + if (n >= 256) + // TODO: Dynamic allocs :) + return asm_error(-1, "Oops, parser ran out of memory."); + } + } while(rc >= 0); + + gencode(dest_fd, instr, n); + + return 0; +} diff --git a/src/as/parser.h b/src/as/parser.h new file mode 100644 index 0000000..29f2fe3 --- /dev/null +++ b/src/as/parser.h @@ -0,0 +1,33 @@ +/* parser.h + * + * Copyright (C) 2012 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_PARSER_H +#define ASM_PARSER_H + +#include +#include + +struct parse_state { + struct instr *instr_ptr; + unsigned int num; +}; + +int parse(FILE *source_fd, FILE *dest_fd); + +#endif /* ASM_PARSER_H */ From 0f41fc0870e4e9c5e366a71184cdb04afd282927 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Sat, 24 Nov 2018 20:01:40 +0100 Subject: [PATCH 06/32] asm: adding example files --- asm/hello_world.as | 33 +++++++++++++++++++++++++++++++++ asm/test.as | 15 +++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 asm/hello_world.as create mode 100644 asm/test.as diff --git a/asm/hello_world.as b/asm/hello_world.as new file mode 100644 index 0000000..dc101c1 --- /dev/null +++ b/asm/hello_world.as @@ -0,0 +1,33 @@ + +; -- Store "Hello World" string in memory. +movl $1, 72 ; H +sw $0, $1, 0 +movl $1, 69 ; E +sw $0, $1, 1 +movl $1, 76 ; L +sw $0, $1, 2 +sw $0, $1, 3 +movl $1, 79 ; O +sw $0, $1, 4 +movl $1, 32 ; Space +sw $0, $1, 5 +movl $1, 87 ; W +sw $0, $1, 6 +movl $1, 79 ; O +sw $0, $1, 7 +movl $1, 82 ; R +sw $0, $1, 8 +movl $1, 76 ; L +sw $0, $1, 9 +movl $1, 68 ; D +sw $0, $1, 10 + +; -- setup print loop. +movl $1, 1 ; Load 1 in R1 (used for increment the counter) +_start: +ld $15, $0, 0 ; Load memory address stored in R0 into R15 +int $10, 2 ; Print character +add $0, $0, $1 ; Add 1 (R1) to counter (R0) +beq $15, $2, 1 ; Branch to "noop" (skipping next instruction) if R15 = 0 (R2 holds 0) +jmp 22 ; jump back to "_start" label (not implemented atm) +noop diff --git a/asm/test.as b/asm/test.as new file mode 100644 index 0000000..0ff196c --- /dev/null +++ b/asm/test.as @@ -0,0 +1,15 @@ +; NOTE: This is just to check the syntax. +; No logic behind any of the instructions (will prob crash) +movl $0, 3 +movl $5, 6500 +; Comment +noop ; Comment +start0: add $3, $0, $1 +sw $2, $3, $2 +ld $5, $0, $3 +beq $0, $1, 1 +beq $0, $1, -25 +beq $0, $1, -4000 +jr $5, 0 +int $0, 1 +jmp 5 From adbe0006f50c1f4a5c1d7444ab44baf29ee64b1e Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Wed, 28 Nov 2018 19:41:09 +0100 Subject: [PATCH 07/32] asm: adding symbol table datastructure. --- src/as/symtab.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++ src/as/symtab.h | 37 +++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 src/as/symtab.c create mode 100644 src/as/symtab.h diff --git a/src/as/symtab.c b/src/as/symtab.c new file mode 100644 index 0000000..9ac9867 --- /dev/null +++ b/src/as/symtab.c @@ -0,0 +1,121 @@ +/* symtab.c + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include "symtab.h" + +// TODO: Right now, the symtab is implemented as a simple dynamic growing array. +// This is fine for now, but for lookup speed. A hashtable is more efficient. + +// How much memory that should be allocated each time. +// Note: this should be defined as number of `struct symbol` objects, not bytes. +#define BLOCK_SIZE 32 + +// Helper macro to calculate number of bytes the table need. +#define __M_SIZE(t) \ + ((t)->size * sizeof(struct symbol)) + +struct symbol { + // The label associated with the location. + char label[SYMTAB_LABEL_MAXLEN]; + + // Location (program adress) + uint16_t loc; +}; + +// Symbol table structure. +struct symtab { + // Array of symbols. + struct symbol *data; + + // Number of symbols in the array. + size_t num; + + // Total number of symbols the array can hold. + size_t size; +}; + +symtab_t* symtab_init() { + + struct symtab *table = malloc(sizeof(struct symtab)); + + table->num = 0; + table->size = BLOCK_SIZE; + table->data = malloc(__M_SIZE(table)); + + return table; +} + +void symtab_free(symtab_t *table) { + + if (!table) + return; + + if (table->data) + free(table->data); + + memset(table, 0, sizeof(struct symtab)); + free(table); +} + +int symtab_set(symtab_t *table, const char *label, uint16_t loc) { + + // Check if it exist first. + int index = symtab_get(table, label, NULL); + + // Entry did not exists. Insert + if (index < 0) { + struct symbol *sym; + + // We have one more. + table->num += 1; + + // Make sure we resize the memory. + if (table->num > table->size) { + table->size += BLOCK_SIZE; + table->data = realloc(table->data, __M_SIZE(table)); + } + + // insert the symbol at the end. + sym = table->data + (table->num - 1); + strncpy(sym->label, label, SYMTAB_LABEL_MAXLEN); + sym->loc = loc; + + return 0; + } + + // Could not insert. return as error. + return -1; +} + +int symtab_get(symtab_t *table, const char *label, uint16_t *loc) { + + // Linear search here for simplicity. + for(size_t i = 0; i < table->num; i++) { + struct symbol *sym = table->data + i; + + if (!strncmp(sym->label, label, SYMTAB_LABEL_MAXLEN)) { + + if (loc) *loc = sym->loc; + return i; + } + } + return -1; +} diff --git a/src/as/symtab.h b/src/as/symtab.h new file mode 100644 index 0000000..cd1cd68 --- /dev/null +++ b/src/as/symtab.h @@ -0,0 +1,37 @@ +/* symtab.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_SYMTAB_H +#define ASM_SYMTAB_H + +#include + +#define SYMTAB_LABEL_MAXLEN 32 + +typedef struct symtab symtab_t; + +symtab_t* symtab_init(); + +void symtab_free(symtab_t *table); + +int symtab_set(symtab_t *table, const char *label, uint16_t loc); + +int symtab_get(symtab_t *table, const char *label, uint16_t *loc); + +#endif /* ASM_SYMTAB_H */ From a305c34b3946d460ad27e27b8b775c054c8cd541 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Sun, 9 Dec 2018 18:31:35 +0100 Subject: [PATCH 08/32] lib: adding vector module --- lib/include/vector.h | 41 +++++++++++++++++++++++++++++++++ lib/src/vector.c | 54 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 lib/include/vector.h create mode 100644 lib/src/vector.c diff --git a/lib/include/vector.h b/lib/include/vector.h new file mode 100644 index 0000000..cf2d2b9 --- /dev/null +++ b/lib/include/vector.h @@ -0,0 +1,41 @@ +/* vector.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef VECTOR_H +#define VECTOR_H + +#include +#include + +struct vector { + size_t blk_sz; + size_t alloc; + size_t size; + void *base; +}; + +#define VECTOR_INIT(block_size) { block_size, 0, 0, NULL } + +void vector_init(struct vector* v, size_t block_size); + +void vector_destory(struct vector* v); + +void vector_append(struct vector* v, const void *ptr, size_t n); + +#endif /* VECTOR_H */ diff --git a/lib/src/vector.c b/lib/src/vector.c new file mode 100644 index 0000000..b49e020 --- /dev/null +++ b/lib/src/vector.c @@ -0,0 +1,54 @@ +/* vector.c + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include + +static void ensure_size(struct vector* v, size_t n) { + + size_t old_alloc = v->alloc; + while(v->alloc < v->size + n) + v->alloc += v->blk_sz; + + if (old_alloc != v->alloc) + v->base = realloc(v->base, v->alloc); +} + +void vector_init(struct vector* v, size_t block_size) { + + v->blk_sz = block_size > 0 ? block_size : 64; + v->alloc = 0; + v->size = 0; + v->base = NULL; +} + +void vector_destory(struct vector *v) { + + if (v->base) + free(v->base); +} + +void vector_append(struct vector *v, const void *ptr, size_t n) { + + ensure_size(v, n); + + memcpy(v->base + v->size, ptr, n); + v->size += n; +} From 3122da08c916a077cfd713aadb2e81b7b27c355e Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Sun, 9 Dec 2018 18:32:46 +0100 Subject: [PATCH 09/32] move include/ to lib/include --- Makefile | 2 +- {include => lib/include}/instr.h | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename {include => lib/include}/instr.h (100%) diff --git a/Makefile b/Makefile index 6fe2b9c..c7a46a6 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ CC = gcc -CFLAGS = -Iinclude -DMEM_SIZE=32 -DM16_DEBUG_MEM +CFLAGS = -g -Ilib/include -DMEM_SIZE=32 -DM16_DEBUG_MEM LD = $(CC) PROGRAMS = m16vm as diff --git a/include/instr.h b/lib/include/instr.h similarity index 100% rename from include/instr.h rename to lib/include/instr.h From aca8d7937e1ebb3655f5b85865f1f0bb602e605b Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Sun, 9 Dec 2018 19:27:26 +0100 Subject: [PATCH 10/32] asm: adding AST --- src/as/ast.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/as/ast.h | 81 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 src/as/ast.c create mode 100644 src/as/ast.h diff --git a/src/as/ast.c b/src/as/ast.c new file mode 100644 index 0000000..392517e --- /dev/null +++ b/src/as/ast.c @@ -0,0 +1,95 @@ + +#include +#include +#include +#include +#include "ast.h" + +void ast_init(struct ast* ast) { + + vector_init(&ast->instr, 16 * sizeof(struct ast)); + vector_init(&ast->labels, 16 * sizeof(char**)); + ast->symbols = symtab_init(); +} + +void ast_free(struct ast* ast) { + + int i; + + // Free all label strings + for(int i = 0; i < ast->labels.size; i++) { + char *ptr = *((char**) ast->labels.base + i); + free(ptr); + } + vector_destory(&ast->labels); + vector_destory(&ast->instr); + symtab_free(ast->symbols); +} + +void ast_instr(struct ast* ast, uint8_t opcode) { + + struct ast_instr instr = { 0 }; + + instr.opcode = opcode; + vector_append(&ast->instr, &instr, sizeof(struct ast_instr)); +} + +void ast_instr_operand(struct ast* ast, enum ast_datatype type, void* value) { + + struct ast_instr *instr; + + if (ast->instr.size < 1) + return; + + // Fetch latest instruction. + instr = ast->instr.base + (ast->instr.size - sizeof(struct ast_instr)); + + if (instr->n_operands < 3) { + struct ast_instr_operand *n = instr->operands + instr->n_operands; + + n->type = type; + if (n->type == DATATYPE_STRING) { + n->s = strdup(value); + vector_append(&ast->labels, &n->s, sizeof(char**)); + } else { + n->r = ((intptr_t) value) & 0xFF; + } + + instr->n_operands++; + } +} + +void ast_location(struct ast* ast, const char *label, uint16_t loc) { + + symtab_set(ast->symbols, label, loc); +} + +void ast_print(struct ast* ast) { + + printf("{\n"); + + for(int i = 0; i < ast->instr.size; i += sizeof(struct ast_instr)) { + struct ast_instr *instr = ast->instr.base + i; + + printf("\topcode: %u", instr->opcode); + for(int j = 0; j < instr->n_operands; j++) { + struct ast_instr_operand *op = instr->operands + j; + printf(", operand%i: ", j); + switch(op->type) { + case DATATYPE_REGISTER : + printf("%u (reg)", op->r); + break; + case DATATYPE_NUMBER : + printf("%i (num)", op->n); + break; + case DATATYPE_STRING : + printf("%s (string)", op->s); + break; + } + } + printf("\n"); + } + + printf("}\n"); + +} diff --git a/src/as/ast.h b/src/as/ast.h new file mode 100644 index 0000000..6c23334 --- /dev/null +++ b/src/as/ast.h @@ -0,0 +1,81 @@ +/* ast.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_AST_H +#define ASM_AST_H + +#include +#include "symtab.h" + +// All datatypes in the language. +enum ast_datatype { + DATATYPE_NONE = 0, // Not an actual type. + DATATYPE_REGISTER, // 4 bit unsigned. + DATATYPE_NUMBER, // 8 bit signed. + DATATYPE_STRING +}; + +// Struct to represent a single operand to a instruction. +struct ast_instr_operand { + enum ast_datatype type; + union { + uint8_t r; + int8_t n; + uint16_t addr; + const char *s; + }; +}; + +// Struct to represent a single instruction. +struct ast_instr { + uint8_t opcode; + uint8_t n_operands; + struct ast_instr_operand operands[3]; +}; + +// Structure to represent a program. +struct ast { + + // List of instructions. + struct vector instr; + + // List for storing label pointers (so we can free) + struct vector labels; + + // Symbol table with label - location mapping. + symtab_t *symbols; +}; + +void ast_init(struct ast* ast); + +void ast_free(struct ast* ast); + +// Add an instruction +void ast_instr(struct ast* ast, uint8_t opcode); + +// Add an operand to the latest instruction. +//void ast_instr_operand(struct ast* ast, const struct ast_instr_operand* op); +void ast_instr_operand(struct ast* ast, enum ast_datatype type, void* value); + +void ast_location(struct ast* ast, const char *label, uint16_t loc); + +// For debugging :) +void ast_print(struct ast* ast); + +#endif /* ASM_AST_H */ From f2d54204ca9022158984bc2021913de370b62e02 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Sun, 9 Dec 2018 19:50:04 +0100 Subject: [PATCH 11/32] Makefile: add rule for building libm16 --- .gitignore | 1 + Makefile | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index b979155..0983487 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.o +*.a m16vm /as diff --git a/Makefile b/Makefile index c7a46a6..a66306f 100644 --- a/Makefile +++ b/Makefile @@ -13,9 +13,14 @@ m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program as : src/as/as.o src/as/parser.o src/as/lexer.o src/as/instr_encode.o src/as/error.o $(LD) $(LDFLAGS) -o $@ $^ +lib/libm16.a : lib/src/vector.o + $(AR) rcs $@ $^ + clean : $(RM) src/*.o $(RM) src/as/*.o + $(RM) lib/*.o + $(RM) lib/*.a distclean : clean $(RM) $(PROGRAMS) From 96220537de10d424efe1ce214d8b66257b63498e Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 10:06:12 +0100 Subject: [PATCH 12/32] asm: rename instr_encode to codegen and do code generation from AST. --- src/as/codegen.c | 61 ++++++++++++++++++++++++++++ src/as/{instr_encode.h => codegen.h} | 12 +++--- src/as/instr_encode.c | 50 ----------------------- 3 files changed, 67 insertions(+), 56 deletions(-) create mode 100644 src/as/codegen.c rename src/as/{instr_encode.h => codegen.h} (82%) delete mode 100644 src/as/instr_encode.c diff --git a/src/as/codegen.c b/src/as/codegen.c new file mode 100644 index 0000000..3d8c76c --- /dev/null +++ b/src/as/codegen.c @@ -0,0 +1,61 @@ +/* codegen.c + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include "codegen.h" + +void codegen_emit(struct ast_instr* instr, symtab_t* symbols, uint8_t* out) { + + out[0] = instr->opcode << 4; + + if (instr->n_operands < 1) + // No operands, return + return; + + // J-Type (We don't have labels yet, so this is just a address) + if (instr->operands[0].type == DATATYPE_NUMBER) { + //if (instr->operands[0].type == DATATYPE_STRING) { + uint16_t addr; + + /* if (symtab_get(symbols, instr->operands[0].s, &addr)) { + out[0] |= (addr >> 8); + out[1] = addr; + }*/ + addr = instr->operands[0].n; + + out[0] |= (addr >> 8); + out[1] = addr; + } + // R/RI or I + else if (instr->operands[0].type == DATATYPE_REGISTER) { + + out[0] |= instr->operands[0].r & 0xF; + + // I + if (instr->operands[1].type == DATATYPE_NUMBER) { + + out[1] = instr->operands[1].n; + } + // R/RI-Type + else if (instr->operands[1].type == DATATYPE_REGISTER && + (instr->operands[2].type == DATATYPE_REGISTER || instr->operands[2].type == DATATYPE_NUMBER)) { + + out[1] = (instr->operands[1].r << 4) | (instr->operands[2].r & 0xF); + } + } +} diff --git a/src/as/instr_encode.h b/src/as/codegen.h similarity index 82% rename from src/as/instr_encode.h rename to src/as/codegen.h index 0f01304..8964321 100644 --- a/src/as/instr_encode.h +++ b/src/as/codegen.h @@ -1,4 +1,4 @@ -/* instr_encode.h +/* codegen.h * * Copyright (C) 2018 Henrik Hautakoski * @@ -17,11 +17,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#ifndef ASM_INSTR_ENCODE_H -#define ASM_INSTR_ENCODE_H +#ifndef ASM_CODEGEN_H +#define ASM_CODEGEN_H -#include +#include "ast.h" -void instr_encode(struct instr* instr, uint16_t *out); +void codegen_emit(struct ast_instr* ast, symtab_t* symbols, uint8_t* out); -#endif /* ASM_INSTR_ENCODE_H */ +#endif /* ASM_CODEGEN_H */ diff --git a/src/as/instr_encode.c b/src/as/instr_encode.c deleted file mode 100644 index 1478928..0000000 --- a/src/as/instr_encode.c +++ /dev/null @@ -1,50 +0,0 @@ -/* instr_encode.c - * - * Copyright (C) 2018 Henrik Hautakoski - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ -#include -#include -#include "instr_encode.h" - -void instr_encode(struct instr* instr, uint16_t *out) { - - uint8_t *p = (uint8_t *) out; - - *out = 0; - *p = instr->opcode << 4; - - if (instr->opcode == OP_NOOP) { - // Do nothing. - } else if (instr->opcode == OP_JMP) { - *p |= (instr->j.addr >> 8); - *(p+1) = instr->j.addr; - } else { - *p |= instr->r.rs; - - // I-Type - if (instr->opcode == OP_MOVL || instr->opcode == OP_MOVH - || instr->opcode == OP_JR || instr->opcode == OP_INT) { - - *(p+1) = instr->i.imm; - } - // R/RI-Type - else { - *(p+1) = (instr->r.r0 << 4) | instr->r.r1; - } - } -} From 7646d63736fee3f541c80234bbbc388871ca9220 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 10:09:31 +0100 Subject: [PATCH 13/32] asm: build AST in parser. --- Makefile | 4 +- src/as/parser.c | 126 ++++++++++++++++++++++++------------------------ 2 files changed, 67 insertions(+), 63 deletions(-) diff --git a/Makefile b/Makefile index a66306f..4360200 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,9 @@ all: $(PROGRAMS) m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o $(LD) $(LDFLAGS) -o $@ $^ -as : src/as/as.o src/as/parser.o src/as/lexer.o src/as/instr_encode.o src/as/error.o +as : src/as/as.o src/as/parser.o src/as/lexer.o \ + src/as/codegen.o src/as/error.o src/as/symtab.o \ + src/as/ast.o lib/libm16.a $(LD) $(LDFLAGS) -o $@ $^ lib/libm16.a : lib/src/vector.o diff --git a/src/as/parser.c b/src/as/parser.c index 7b95283..eca613b 100644 --- a/src/as/parser.c +++ b/src/as/parser.c @@ -21,7 +21,8 @@ #include #include #include "error.h" -#include "instr_encode.h" +#include "codegen.h" +#include "ast.h" #include "lexer.h" #include "parser.h" @@ -37,26 +38,30 @@ static int match_type(struct lexer* lex, enum token_type type) { return lex->token.type == type ? 0 : -1; } -// Same as match_type() but extracts a number from the token. -static int match_type_num(struct lexer* lex, enum token_type type, - void* out, size_t size) { +// Same as match_type() but also generates a operand in the AST. +static int match_operand(struct lexer* lex, enum token_type type, struct ast *ast) { if (match_type(lex, type) < 0) return -1; - memcpy(out, &lex->token.value.n, size); + if (type == TOKEN_REG) { + ast_instr_operand(ast, DATATYPE_REGISTER, lex->token.value.n); + } else { + ast_instr_operand(ast, DATATYPE_NUMBER, lex->token.value.n); + } + return 0; } /* * Helper macros for matching tokens. */ -#define match_reg(pos, out) \ - if (match_type_num(lex, TOKEN_REG, out, sizeof(uint8_t))) \ +#define match_reg(pos, ast) \ + if (match_operand(lex, TOKEN_REG, ast) < 0) \ return asm_error((lex)->lineno, "Expected number at argument %i", pos) -#define match_imm(pos, out) \ - if (match_type_num(lex, TOKEN_NUMBER, out, sizeof(int16_t)) < 0) \ +#define match_imm(pos, ast) \ + if (match_operand(lex, TOKEN_NUMBER, ast) < 0) \ return asm_error((lex)->lineno, "Expected number at argument %i", pos) #define match_arg(pos) \ @@ -72,54 +77,54 @@ static int match_type_num(struct lexer* lex, enum token_type type, */ // R-Type (rs : u8, r0 : u8, r1 : u8) -static int match_typeR(struct instr_R *instr, struct lexer* lex) { +static int match_typeR(struct lexer* lex, struct ast *ast) { - match_reg(1, &instr->rs); match_arg(1); - match_reg(2, &instr->r0); match_arg(2); - match_reg(3, &instr->r1); + match_reg(1, ast); match_arg(1); + match_reg(2, ast); match_arg(2); + match_reg(3, ast); match_end; return 0; } // RI-Type (rs : u8, r0 : u8, offset : s8) -static int match_typeRI(struct instr_RI *instr, struct lexer* lex) { +static int match_typeRI(struct lexer* lex, struct ast *ast) { - match_reg(1, &instr->rs); match_arg(1); - match_reg(2, &instr->r0); match_arg(2); - match_imm(3, &instr->offset); + match_reg(1, ast); match_arg(1); + match_reg(2, ast); match_arg(2); + match_imm(3, ast); match_end; return 1; } // I-Type (rs : u8, imm : s8) -static int match_typeI(struct instr_I *instr, struct lexer* lex) { +static int match_typeI(struct lexer* lex, struct ast *ast) { - match_reg(1, &instr->rs); match_arg(1); - match_imm(2, &instr->imm); + match_reg(1, ast); match_arg(1); + match_imm(2, ast); match_end; return 1; } // J-Type (rs : u8, addr : u16) -static int match_typeJ(struct instr_J *instr, struct lexer* lex) { +static int match_typeJ(struct lexer* lex, struct ast *ast) { - match_imm(1, &instr->addr); + match_imm(1, ast); match_end; return 1; } -#define opcode_guard(op, v) \ - if (op == OP_NONE) op = v +#define opcode_guard(op) \ + if (op_set == 0) { op_set = 1; ast_instr(ast, op); } /* * Parse a single line. */ -static int parse_line(struct lexer* lex, struct instr *instr) { +static int parse_line(struct lexer* lex, struct ast *ast) { - instr->opcode = OP_NONE; + int op_set = 0; if (lexer_get_next(lex) < 0) return -1; @@ -128,26 +133,26 @@ static int parse_line(struct lexer* lex, struct instr *instr) { switch(lex->token.type) { case TOKEN_EOI: return -1; case TOKEN_EOL: break; - case TOKEN_OPCODE_NOOP : instr->opcode = OP_NOOP; + case TOKEN_OPCODE_NOOP : ast_instr(ast, OP_NOOP); match_end; break; // Type-R - case TOKEN_OPCODE_ADD : opcode_guard(instr->opcode, OP_ADD); - return match_typeR(&instr->r, lex); + case TOKEN_OPCODE_ADD : opcode_guard(OP_ADD); + return match_typeR(lex, ast); // Type-I - case TOKEN_OPCODE_MOVL : opcode_guard(instr->opcode, OP_MOVL); - case TOKEN_OPCODE_MOVH : opcode_guard(instr->opcode, OP_MOVH); - case TOKEN_OPCODE_JR : opcode_guard(instr->opcode, OP_JR); - case TOKEN_OPCODE_INT : opcode_guard(instr->opcode, OP_INT); - return match_typeI(&instr->i, lex); + case TOKEN_OPCODE_MOVL : opcode_guard(OP_MOVL); + case TOKEN_OPCODE_MOVH : opcode_guard(OP_MOVH); + case TOKEN_OPCODE_JR : opcode_guard(OP_JR); + case TOKEN_OPCODE_INT : opcode_guard(OP_INT); + return match_typeI(lex, ast); // Type-RI - case TOKEN_OPCODE_LD : opcode_guard(instr->opcode, OP_LW); - case TOKEN_OPCODE_SW : opcode_guard(instr->opcode, OP_SW); - case TOKEN_OPCODE_BEQ : opcode_guard(instr->opcode, OP_BEQ); - return match_typeRI(&instr->ri, lex); + case TOKEN_OPCODE_LD : opcode_guard(OP_LW); + case TOKEN_OPCODE_SW : opcode_guard(OP_SW); + case TOKEN_OPCODE_BEQ : opcode_guard(OP_BEQ); + return match_typeRI(lex, ast); // Type-J - case TOKEN_OPCODE_JMP : opcode_guard(instr->opcode, OP_JMP); - return match_typeJ(&instr->j, lex); + case TOKEN_OPCODE_JMP : opcode_guard(OP_JMP); + return match_typeJ(lex, ast); case TOKEN_LABEL_DECL : asm_warn(lex->lineno, "labels are not supported yet. ignoring."); break; @@ -158,17 +163,6 @@ static int parse_line(struct lexer* lex, struct instr *instr) { return 0; } -static int gencode(FILE *fd, struct instr *instructions, int len) { - - uint16_t buf; // 2-bytes (16-bit) per instruction. - - for(int i = 0; i < len; i++) { - instr_encode(instructions + i, &buf); - - fwrite(&buf, sizeof(buf), 1, fd); - } -} - /* * Main parser function. */ @@ -176,23 +170,31 @@ int parse(FILE *source_fd, FILE *dest_fd) { int rc; struct lexer lex; - struct instr instr[256]; - int n = 0; + struct ast ast; + ast_init(&ast); lexer_init(&lex, source_fd); + // Parse and build AST. do { - rc = parse_line(&lex, instr + n); - - if (instr[n].opcode != OP_NONE) { - n++; - if (n >= 256) - // TODO: Dynamic allocs :) - return asm_error(-1, "Oops, parser ran out of memory."); - } + rc = parse_line(&lex, &ast); } while(rc >= 0); - gencode(dest_fd, instr, n); + // TODO: Second pass validation + // make sure all referenced labels are actually defined. + + // Code generation + for(int i = 0; i < ast.instr.size; i += sizeof(struct ast_instr)) { + struct ast_instr *instr = ast.instr.base + i; + uint8_t code[2] = { 0 }; + + codegen_emit(instr, ast.symbols, &code); + + fwrite(&code, sizeof(code), 1, dest_fd); + } + + // Cleanup + ast_free(&ast); return 0; } From 86293537eb84abc2347343ddc523fa3b6c2b1548 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 21:47:04 +0100 Subject: [PATCH 14/32] src/as/lexer.c: make sure we store the string if it's a label. --- src/as/lexer.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/as/lexer.c b/src/as/lexer.c index 7a1f7aa..b9cb9b6 100644 --- a/src/as/lexer.c +++ b/src/as/lexer.c @@ -71,12 +71,11 @@ static int read_number(FILE *fp) { return val; } -static int read_string(FILE *fp) { +static int read_string(FILE *fp, char *buf, size_t len) { int c, label_decl = 0, i = 0; - char buf[64]; - while((c = fgetc(fp)) != EOF && i < 64) { + while((c = fgetc(fp)) != EOF && i < len) { if (string(c)) { buf[i++] = c; @@ -160,8 +159,11 @@ int lexer_get_next(struct lexer *lex) { lex->token.type = TOKEN_NUMBER; lex->token.value.n = read_number(lex->fp); } else if (first_string(ch)) { + char buf[32]; ungetc(ch, lex->fp); - lex->token.type = read_string(lex->fp); + lex->token.type = read_string(lex->fp, buf, sizeof(buf)); + if (lex->token.type == TOKEN_LABEL_DECL || lex->token.type == TOKEN_LABEL) + strcpy(lex->token.value.s, buf); } else { fprintf(stderr, "ERROR: Invalid character '%c' on line: %i\n", ch, lex->lineno); return -1; @@ -202,9 +204,9 @@ void lexer_print_token(struct token *token) { break; case TOKEN_OPCODE_INT : printf(" [OP INT] "); break; - case TOKEN_LABEL : printf(" [LABEL] "); + case TOKEN_LABEL : printf(" [LABEL \"%s\"] ", token->value.s); break; - case TOKEN_LABEL_DECL : printf(" [LABEL DECL] "); + case TOKEN_LABEL_DECL : printf(" [LABEL DECL \"%s\"] ", token->value.s); break; case TOKEN_REG : printf(" [REG %i] ", token->value.n); break; From 2b80662967868d18c45b1fdb90c5b89c888326c6 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 21:59:37 +0100 Subject: [PATCH 15/32] src/as/parser.c: store address for label declaration in the symbol table. --- src/as/parser.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/as/parser.c b/src/as/parser.c index eca613b..bb8653a 100644 --- a/src/as/parser.c +++ b/src/as/parser.c @@ -116,6 +116,16 @@ static int match_typeJ(struct lexer* lex, struct ast *ast) { return 1; } +// Match \n* +static int match_label_decl(struct lexer* lex, struct ast *ast) { + + uint16_t location = ast->instr.size / sizeof(struct ast_instr); + + ast_location(ast, lex->token.value.s, location); + + return 1; +} + #define opcode_guard(op) \ if (op_set == 0) { op_set = 1; ast_instr(ast, op); } @@ -154,8 +164,7 @@ static int parse_line(struct lexer* lex, struct ast *ast) { case TOKEN_OPCODE_JMP : opcode_guard(OP_JMP); return match_typeJ(lex, ast); case TOKEN_LABEL_DECL : - asm_warn(lex->lineno, "labels are not supported yet. ignoring."); - break; + return match_label_decl(lex, ast); default: return asm_error(lex->lineno, "Opcode or label expected"); } From 7ec84ad2a144bbb9b3722fbe4e2bc6d28898de93 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 22:03:05 +0100 Subject: [PATCH 16/32] src/as/parser.c: J-Type should now accept a label as argument. --- src/as/parser.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/as/parser.c b/src/as/parser.c index bb8653a..9f62eef 100644 --- a/src/as/parser.c +++ b/src/as/parser.c @@ -108,10 +108,11 @@ static int match_typeI(struct lexer* lex, struct ast *ast) { return 1; } -// J-Type (rs : u8, addr : u16) +// J-Type (addr : string) static int match_typeJ(struct lexer* lex, struct ast *ast) { - match_imm(1, ast); + if (match_operand(lex, TOKEN_LABEL, ast) < 0) + return asm_error(lex->lineno, "Expected label at argument 1"); match_end; return 1; } From aa171ac46cb3204c26317a817c04ce724cdf8a2c Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 22:04:58 +0100 Subject: [PATCH 17/32] src/as/parser.c: in match_operand() store strings in the AST. --- src/as/parser.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/as/parser.c b/src/as/parser.c index 9f62eef..36ca98c 100644 --- a/src/as/parser.c +++ b/src/as/parser.c @@ -46,8 +46,10 @@ static int match_operand(struct lexer* lex, enum token_type type, struct ast *as if (type == TOKEN_REG) { ast_instr_operand(ast, DATATYPE_REGISTER, lex->token.value.n); - } else { + } else if (type == TOKEN_NUMBER) { ast_instr_operand(ast, DATATYPE_NUMBER, lex->token.value.n); + } else { + ast_instr_operand(ast, DATATYPE_STRING, lex->token.value.s); } return 0; From 0916f8bcdcbf5a62287a4e34369e395f1fd5401e Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 22:55:20 +0100 Subject: [PATCH 18/32] src/as/parser.c: do semantics checks. --- src/as/parser.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/as/parser.c b/src/as/parser.c index 36ca98c..d39bb59 100644 --- a/src/as/parser.c +++ b/src/as/parser.c @@ -175,6 +175,29 @@ static int parse_line(struct lexer* lex, struct ast *ast) { return 0; } +// Check the semantics of the program's AST. +// For now, we only need to check that all +// referenced labels exist in the symbol table +static int check_semantics(struct ast* ast) { + + int i; + + // TODO: Need to implement a iterator for vectors. + for(i = 0; i < ast->instr.size; i += sizeof(struct ast_instr)) { + struct ast_instr *instr = ast->instr.base + i; + + // Only J-Type can have labels. + if (instr->opcode == OP_JMP + && instr->operands[0].type == DATATYPE_STRING + && symtab_get(ast->symbols, instr->operands[0].s, NULL) < 0) { + + return asm_error(0, "Label '%s' is not defined", instr->operands[0].s); + } + } + + return 0; +} + /* * Main parser function. */ @@ -192,8 +215,8 @@ int parse(FILE *source_fd, FILE *dest_fd) { rc = parse_line(&lex, &ast); } while(rc >= 0); - // TODO: Second pass validation - // make sure all referenced labels are actually defined. + if (check_semantics(&ast) < 0) + goto done; // Code generation for(int i = 0; i < ast.instr.size; i += sizeof(struct ast_instr)) { @@ -206,7 +229,6 @@ int parse(FILE *source_fd, FILE *dest_fd) { } // Cleanup - ast_free(&ast); - +done: ast_free(&ast); return 0; } From 0f10c9fd381040a96d5e91c95be985a9863fe79e Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 23:03:30 +0100 Subject: [PATCH 19/32] src/as/parser.c: on parsing error. skip doing semantics checks and code gen. --- src/as/parser.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/as/parser.c b/src/as/parser.c index d39bb59..7856e72 100644 --- a/src/as/parser.c +++ b/src/as/parser.c @@ -52,7 +52,7 @@ static int match_operand(struct lexer* lex, enum token_type type, struct ast *as ast_instr_operand(ast, DATATYPE_STRING, lex->token.value.s); } - return 0; + return 1; } /* @@ -86,7 +86,7 @@ static int match_typeR(struct lexer* lex, struct ast *ast) { match_reg(3, ast); match_end; - return 0; + return 1; } // RI-Type (rs : u8, r0 : u8, offset : s8) @@ -144,7 +144,7 @@ static int parse_line(struct lexer* lex, struct ast *ast) { // Opcode should come first. switch(lex->token.type) { - case TOKEN_EOI: return -1; + case TOKEN_EOI: return 0; case TOKEN_EOL: break; case TOKEN_OPCODE_NOOP : ast_instr(ast, OP_NOOP); match_end; @@ -172,7 +172,7 @@ static int parse_line(struct lexer* lex, struct ast *ast) { return asm_error(lex->lineno, "Opcode or label expected"); } - return 0; + return 1; } // Check the semantics of the program's AST. @@ -203,7 +203,6 @@ static int check_semantics(struct ast* ast) { */ int parse(FILE *source_fd, FILE *dest_fd) { - int rc; struct lexer lex; struct ast ast; @@ -211,9 +210,13 @@ int parse(FILE *source_fd, FILE *dest_fd) { lexer_init(&lex, source_fd); // Parse and build AST. - do { - rc = parse_line(&lex, &ast); - } while(rc >= 0); + for(;;) { + int rc = parse_line(&lex, &ast); + if (rc < 0) + goto done; + if (rc == 0) + break; + } if (check_semantics(&ast) < 0) goto done; From 1146b925f52d5ed91d25a7a9c0766b28f13cbc37 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 23:11:58 +0100 Subject: [PATCH 20/32] src/as/codegen.c: use symtab to get the address for J-Type instruction. --- src/as/codegen.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/as/codegen.c b/src/as/codegen.c index 3d8c76c..d08a6c4 100644 --- a/src/as/codegen.c +++ b/src/as/codegen.c @@ -27,19 +27,14 @@ void codegen_emit(struct ast_instr* instr, symtab_t* symbols, uint8_t* out) { // No operands, return return; - // J-Type (We don't have labels yet, so this is just a address) - if (instr->operands[0].type == DATATYPE_NUMBER) { - //if (instr->operands[0].type == DATATYPE_STRING) { + // J-Type + if (instr->operands[0].type == DATATYPE_STRING) { uint16_t addr; - /* if (symtab_get(symbols, instr->operands[0].s, &addr)) { + if (symtab_get(symbols, instr->operands[0].s, &addr) >= 0) { out[0] |= (addr >> 8); out[1] = addr; - }*/ - addr = instr->operands[0].n; - - out[0] |= (addr >> 8); - out[1] = addr; + } } // R/RI or I else if (instr->operands[0].type == DATATYPE_REGISTER) { From ad74f2d4760780774a0412b505b90b58e584cd63 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 23:12:59 +0100 Subject: [PATCH 21/32] asm/hello_world.as: we can now use labels! --- asm/hello_world.as | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asm/hello_world.as b/asm/hello_world.as index dc101c1..a146665 100644 --- a/asm/hello_world.as +++ b/asm/hello_world.as @@ -29,5 +29,5 @@ ld $15, $0, 0 ; Load memory address stored in R0 into R15 int $10, 2 ; Print character add $0, $0, $1 ; Add 1 (R1) to counter (R0) beq $15, $2, 1 ; Branch to "noop" (skipping next instruction) if R15 = 0 (R2 holds 0) -jmp 22 ; jump back to "_start" label (not implemented atm) +jmp _start ; jump back to "_start" label noop From 0a91644879b5d128675d986ae41edf07d56a5a34 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 10 Dec 2018 23:17:07 +0100 Subject: [PATCH 22/32] Makefile: call "as" "m16as" --- .gitignore | 2 +- Makefile | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 0983487..03fa769 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ *.o *.a m16vm -/as +m16as diff --git a/Makefile b/Makefile index 4360200..b3b1679 100644 --- a/Makefile +++ b/Makefile @@ -3,14 +3,14 @@ CC = gcc CFLAGS = -g -Ilib/include -DMEM_SIZE=32 -DM16_DEBUG_MEM LD = $(CC) -PROGRAMS = m16vm as +PROGRAMS = m16vm m16as all: $(PROGRAMS) m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o $(LD) $(LDFLAGS) -o $@ $^ -as : src/as/as.o src/as/parser.o src/as/lexer.o \ +m16as : src/as/as.o src/as/parser.o src/as/lexer.o \ src/as/codegen.o src/as/error.o src/as/symtab.o \ src/as/ast.o lib/libm16.a $(LD) $(LDFLAGS) -o $@ $^ From 3513662ad829a841ec0a654a3de25ec4f75342d5 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Tue, 11 Dec 2018 17:53:55 +0100 Subject: [PATCH 23/32] asm: lexer.c: read_string() move all strings into a table --- src/as/lexer.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/src/as/lexer.c b/src/as/lexer.c index b9cb9b6..9797a05 100644 --- a/src/as/lexer.c +++ b/src/as/lexer.c @@ -26,6 +26,25 @@ #define space(x) ((x) == ' ' || (x) == '\t' || (x) == '\r') +struct opcode_ent { + char * name; + uint8_t code; +}; + +static const struct opcode_ent opcode_table[] = { + { "noop", TOKEN_OPCODE_NOOP }, + { "add" , TOKEN_OPCODE_ADD }, + { "movl", TOKEN_OPCODE_MOVL }, + { "movh", TOKEN_OPCODE_MOVH }, + { "ld" , TOKEN_OPCODE_LD }, + { "sw" , TOKEN_OPCODE_SW }, + { "beq" , TOKEN_OPCODE_BEQ }, + { "jmp" , TOKEN_OPCODE_JMP }, + { "jr" , TOKEN_OPCODE_JR }, + { "int" , TOKEN_OPCODE_INT }, + { NULL , 0 }, +}; + /** * Helper functions */ @@ -90,28 +109,13 @@ static int read_string(FILE *fp, char *buf, size_t len) { } buf[i] = '\0'; - if (label_decl) { + if (label_decl) return TOKEN_LABEL_DECL; - } else if (!strcmp("noop", buf)) { - return TOKEN_OPCODE_NOOP; - } else if (!strcmp("add", buf)) { - return TOKEN_OPCODE_ADD; - } else if (!strcmp("movl", buf)) { - return TOKEN_OPCODE_MOVL; - } else if (!strcmp("movh", buf)) { - return TOKEN_OPCODE_MOVH; - } else if (!strcmp("ld", buf)) { - return TOKEN_OPCODE_LD; - } else if (!strcmp("sw", buf)) { - return TOKEN_OPCODE_SW; - } else if (!strcmp("beq", buf)) { - return TOKEN_OPCODE_BEQ; - } else if (!strcmp("jmp", buf)) { - return TOKEN_OPCODE_JMP; - } else if (!strcmp("jr", buf)) { - return TOKEN_OPCODE_JR; - } else if (!strcmp("int", buf)) { - return TOKEN_OPCODE_INT; + + for(i = 0; opcode_table[i].name; i++) { + + if (!strcmp(opcode_table[i].name, buf)) + return opcode_table[i].code; } return TOKEN_LABEL; } From 1a29b3966370bbbfaa11030a31581397c3774863 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Fri, 14 Dec 2018 00:17:42 +0100 Subject: [PATCH 24/32] src/as/ast.c: bug in ast_free(). we should advance i by sizeof(char**). --- src/as/ast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/as/ast.c b/src/as/ast.c index 392517e..bfd6983 100644 --- a/src/as/ast.c +++ b/src/as/ast.c @@ -17,7 +17,7 @@ void ast_free(struct ast* ast) { int i; // Free all label strings - for(int i = 0; i < ast->labels.size; i++) { + for(int i = 0; i < ast->labels.size; i += sizeof(char**)) { char *ptr = *((char**) ast->labels.base + i); free(ptr); } From de29a981bdfb26ab77ecf56eb502cdf978da5deb Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Fri, 14 Dec 2018 00:35:09 +0100 Subject: [PATCH 25/32] src/as/lexer.c: implement hexadecimal numbers. --- src/as/lexer.c | 56 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/src/as/lexer.c b/src/as/lexer.c index 9797a05..786efa3 100644 --- a/src/as/lexer.c +++ b/src/as/lexer.c @@ -70,14 +70,34 @@ static int read_next(struct lexer *lex) { return c; } -static int read_number(FILE *fp) { +static int read_hex(FILE *fp) { + + int c, val = 0; - int c, neg = 0, val = 0; while((c = fgetc(fp)) != EOF) { - if (neg == 0 && c == '-') { - neg = 1; - continue; + char n = 0; + if (number(c)) { + n = c - '0'; } + else if ( (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F')) { + n = (c % 0x20) + 9; + } + else { + ungetc(c, fp); + break; + } + + val = (val * 16) + n; + } + return val; +} + +static int read_dec(FILE *fp, int neg) { + + int c, val = 0; + + while((c = fgetc(fp)) != EOF) { if (!number(c)) { ungetc(c, fp); break; @@ -90,6 +110,32 @@ static int read_number(FILE *fp) { return val; } +static int read_number(FILE *fp) { + + int neg = 0, c = fgetc(fp); + + // Check for '0x'. + if (c == '0') { + c = fgetc(fp); + if (c == 'x') { + // We have a hexadecimal number. + return read_hex(fp); + } + ungetc(c, fp); + ungetc('0', fp); + } + // While we are at it. check for a negative sign. + else if (c == '-') { + neg = 1; + } + // We got something else. put it back. + else { + ungetc(c, fp); + } + + return read_dec(fp, neg); +} + static int read_string(FILE *fp, char *buf, size_t len) { int c, label_decl = 0, i = 0; From 2c734dc3005758df2691de6b298ba2c1b7b47d2d Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Fri, 14 Dec 2018 00:35:33 +0100 Subject: [PATCH 26/32] asm/hello_world.as: use some hex! --- asm/hello_world.as | 54 +++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/asm/hello_world.as b/asm/hello_world.as index a146665..f5f5b25 100644 --- a/asm/hello_world.as +++ b/asm/hello_world.as @@ -1,33 +1,33 @@ ; -- Store "Hello World" string in memory. -movl $1, 72 ; H -sw $0, $1, 0 -movl $1, 69 ; E -sw $0, $1, 1 -movl $1, 76 ; L -sw $0, $1, 2 -sw $0, $1, 3 -movl $1, 79 ; O -sw $0, $1, 4 -movl $1, 32 ; Space -sw $0, $1, 5 -movl $1, 87 ; W -sw $0, $1, 6 -movl $1, 79 ; O -sw $0, $1, 7 -movl $1, 82 ; R -sw $0, $1, 8 -movl $1, 76 ; L -sw $0, $1, 9 -movl $1, 68 ; D -sw $0, $1, 10 +movl $0x1, 72 ; H +sw $0x0, $0x1, 0 +movl $0x1, 69 ; E +sw $0x0, $0x1, 1 +movl $0x1, 76 ; L +sw $0x0, $0x1, 2 +sw $0x0, $0x1, 3 +movl $0x1, 79 ; O +sw $0x0, $0x1, 4 +movl $0x1, 32 ; Space +sw $0x0, $0x1, 5 +movl $0x1, 87 ; W +sw $0x0, $0x1, 6 +movl $0x1, 79 ; O +sw $0x0, $0x1, 7 +movl $0x1, 82 ; R +sw $0x0, $0x1, 8 +movl $0x1, 76 ; L +sw $0x0, $0x1, 9 +movl $0x1, 68 ; D +sw $0x0, $0x1, 10 ; -- setup print loop. -movl $1, 1 ; Load 1 in R1 (used for increment the counter) +movl $0x1, 1 ; Load 1 in R1 (used for increment the counter) _start: -ld $15, $0, 0 ; Load memory address stored in R0 into R15 -int $10, 2 ; Print character -add $0, $0, $1 ; Add 1 (R1) to counter (R0) -beq $15, $2, 1 ; Branch to "noop" (skipping next instruction) if R15 = 0 (R2 holds 0) -jmp _start ; jump back to "_start" label +ld $0xF, $0x0, 0 ; Load memory address stored in R0 into R15 +int $0xA, 2 ; Print character +add $0x0, $0x0, $0x1 ; Add 1 (R1) to counter (R0) +beq $0xF, $0x2, 1 ; Branch to "noop" (skipping next instruction) if R15 = 0 (R2 holds 0) +jmp _start ; jump back to "_start" label noop From 2c1c8324937a13fca67c3286f86b28685bd11d80 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Fri, 14 Dec 2018 14:04:52 +0100 Subject: [PATCH 27/32] adding asm/mov_test.as --- asm/mov_test.as | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 asm/mov_test.as diff --git a/asm/mov_test.as b/asm/mov_test.as new file mode 100644 index 0000000..3b83abe --- /dev/null +++ b/asm/mov_test.as @@ -0,0 +1,15 @@ + +; mov h/l test. +; Using 2 instructions to store a 16-bit words is abit tricky to write code for + +; Storing 32767 = (2^15) - 1 (highest value in 2's complement 16-bit). +; MSB (signed flag) = 0, rest 1. +; H [0111 1111] L [1111 1111] +movl $0x0, -1 +movh $0x0, 127 + +; Storing -32768 = (2^15) (highest value in 2's complement 16-bit). +; MSB (signed flag) = 1, rest 0. +; H [1000 0000] L [0000 0000] +movl $0x1, 0 +movh $0x1, -128 From e75349e7f96a1248268488f9c86c35debc9de2a5 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 17 Dec 2018 07:27:52 +0100 Subject: [PATCH 28/32] src/as/lexer.h: in struct token: integer value can max be 8 bits wide. --- src/as/lexer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/as/lexer.h b/src/as/lexer.h index e2a5319..cea04d7 100644 --- a/src/as/lexer.h +++ b/src/as/lexer.h @@ -62,8 +62,8 @@ struct token { * this can be a string or unsigned short */ union { - uint16_t n; - char s[32]; + int8_t n; + char s[32]; } value; }; From 74ecdfc2ab1c50785bb3897f3af0701ac9771693 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 17 Dec 2018 21:34:57 +0100 Subject: [PATCH 29/32] src/as/lexer.c: guard against integer overflow (emitting a warning) --- src/as/lexer.c | 55 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/src/as/lexer.c b/src/as/lexer.c index 786efa3..1bf6f28 100644 --- a/src/as/lexer.c +++ b/src/as/lexer.c @@ -70,7 +70,7 @@ static int read_next(struct lexer *lex) { return c; } -static int read_hex(FILE *fp) { +static int read_hex(FILE *fp, int *out) { int c, val = 0; @@ -89,11 +89,18 @@ static int read_hex(FILE *fp) { } val = (val * 16) + n; + if (val > 0xFF) + goto overflow; } - return val; + *out = val; + return 0; + +overflow: + *out = 0xFF; + return -1; } -static int read_dec(FILE *fp, int neg) { +static int read_dec(FILE *fp, int neg, int *out) { int c, val = 0; @@ -103,14 +110,23 @@ static int read_dec(FILE *fp, int neg) { break; } val = (val * 10) + (c - '0'); + + // Cool trick here. + // because the range is -128 (0x80) to +127 (0x7F) + // We can do 0x80 - 1 if it is NOT a negative number. + if (val > (0x80 - !neg)) + goto overflow; } - if (neg) - return -1 * val; - return val; + *out = neg ? -1 * val : val; + return 0; + +overflow: + *out = neg ? -1 * 0x80 : 0x7F; + return -1; } -static int read_number(FILE *fp) { +static int read_number(FILE *fp, int *out) { int neg = 0, c = fgetc(fp); @@ -119,7 +135,7 @@ static int read_number(FILE *fp) { c = fgetc(fp); if (c == 'x') { // We have a hexadecimal number. - return read_hex(fp); + return read_hex(fp, out); } ungetc(c, fp); ungetc('0', fp); @@ -133,7 +149,18 @@ static int read_number(FILE *fp) { ungetc(c, fp); } - return read_dec(fp, neg); + return read_dec(fp, neg, out); +} + +static int parse_number(struct lexer *lex) { + + int num; + + if (read_number(lex->fp, &num) < 0) + fprintf(stderr, "WARNING: Value truncated on line: %i\n", lex->lineno); + + lex->token.value.n = num; + return 0; } static int read_string(FILE *fp, char *buf, size_t len) { @@ -195,19 +222,15 @@ int lexer_get_next(struct lexer *lex) { break; case '$' : lex->token.type = TOKEN_REG; - num = read_number(lex->fp); - // Registers is 8-bit only. - if (num > 0xF) { - fprintf(stderr, "ERROR: Invalid register value '%i' on line: %i\n", num, lex->lineno); + if (parse_number(lex) < 0) return -1; - } - lex->token.value.n = num; break; default: if (first_number(ch)) { ungetc(ch, lex->fp); lex->token.type = TOKEN_NUMBER; - lex->token.value.n = read_number(lex->fp); + if (parse_number(lex) < 0) + return -1; } else if (first_string(ch)) { char buf[32]; ungetc(ch, lex->fp); From 2e66ffb9a532a0b87a1c5509ca742f055178d4f4 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 17 Dec 2018 23:19:52 +0100 Subject: [PATCH 30/32] src/as/parser.c: check that numbers are in the allowed range. --- src/as/parser.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/as/parser.c b/src/as/parser.c index 7856e72..4481e5a 100644 --- a/src/as/parser.c +++ b/src/as/parser.c @@ -30,6 +30,12 @@ * Helper functions/macros for defining parser rules. */ +enum number_size { + NUMBER_SIZE_U4, + NUMBER_SIZE_S4, + NUMBER_SIZE_S8, +}; + // match the next token. // returns 0 if the token was of the correct type. -1 otherwise static int match_type(struct lexer* lex, enum token_type type) { @@ -38,6 +44,27 @@ static int match_type(struct lexer* lex, enum token_type type) { return lex->token.type == type ? 0 : -1; } +static int validate_number(struct lexer* lex, enum number_size size) { + + int8_t n = lex->token.value.n; + + switch(size) { + case NUMBER_SIZE_U4 : + if (!(n >= 0x0 && n <= 0xF)) + return asm_error(lex->lineno, "Value out of range %u", (uint8_t) n); + break; + case NUMBER_SIZE_S4 : + if (!(n >= -8 && n < 8)) + return asm_error(lex->lineno, "Value out of range %i", n); + break; + case NUMBER_SIZE_S8 : + if (!(n >= -128 && n< 128)) + return asm_error(lex->lineno, "Value out of range %i", n); + break; + } + return 0; +} + // Same as match_type() but also generates a operand in the AST. static int match_operand(struct lexer* lex, enum token_type type, struct ast *ast) { @@ -59,11 +86,11 @@ static int match_operand(struct lexer* lex, enum token_type type, struct ast *as * Helper macros for matching tokens. */ #define match_reg(pos, ast) \ - if (match_operand(lex, TOKEN_REG, ast) < 0) \ + if (match_operand(lex, TOKEN_REG, ast) < 0 || validate_number(lex, NUMBER_SIZE_U4) < 0) \ return asm_error((lex)->lineno, "Expected number at argument %i", pos) -#define match_imm(pos, ast) \ - if (match_operand(lex, TOKEN_NUMBER, ast) < 0) \ +#define match_imm(pos, size, ast) \ + if (match_operand(lex, TOKEN_NUMBER, ast) < 0 || validate_number(lex, size) < 0) \ return asm_error((lex)->lineno, "Expected number at argument %i", pos) #define match_arg(pos) \ @@ -94,7 +121,7 @@ static int match_typeRI(struct lexer* lex, struct ast *ast) { match_reg(1, ast); match_arg(1); match_reg(2, ast); match_arg(2); - match_imm(3, ast); + match_imm(3, NUMBER_SIZE_S4, ast); match_end; return 1; @@ -104,7 +131,7 @@ static int match_typeRI(struct lexer* lex, struct ast *ast) { static int match_typeI(struct lexer* lex, struct ast *ast) { match_reg(1, ast); match_arg(1); - match_imm(2, ast); + match_imm(2, NUMBER_SIZE_S8, ast); match_end; return 1; From 960d6f2e0db0a34b5f9a6922cba12e291f62d75f Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Mon, 17 Dec 2018 23:27:13 +0100 Subject: [PATCH 31/32] src/as/lexer.c: use error.c --- src/as/lexer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/as/lexer.c b/src/as/lexer.c index 1bf6f28..53bf8b9 100644 --- a/src/as/lexer.c +++ b/src/as/lexer.c @@ -1,6 +1,7 @@ #include #include +#include "error.h" #include "lexer.h" /** @@ -157,7 +158,7 @@ static int parse_number(struct lexer *lex) { int num; if (read_number(lex->fp, &num) < 0) - fprintf(stderr, "WARNING: Value truncated on line: %i\n", lex->lineno); + asm_warn(lex->lineno, "Value truncated to %i", num); lex->token.value.n = num; return 0; @@ -238,8 +239,7 @@ int lexer_get_next(struct lexer *lex) { if (lex->token.type == TOKEN_LABEL_DECL || lex->token.type == TOKEN_LABEL) strcpy(lex->token.value.s, buf); } else { - fprintf(stderr, "ERROR: Invalid character '%c' on line: %i\n", ch, lex->lineno); - return -1; + return asm_error(lex->lineno, "Invalid character '%c'", ch); } } From 993a1cbd74801fa22cc43f74b2e45220dfdefda1 Mon Sep 17 00:00:00 2001 From: Henrik Hautakoski Date: Tue, 18 Dec 2018 09:16:26 +0100 Subject: [PATCH 32/32] src/as/parser.c: fix typos. --- src/as/parser.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/as/parser.c b/src/as/parser.c index 4481e5a..1b4eb6e 100644 --- a/src/as/parser.c +++ b/src/as/parser.c @@ -87,7 +87,7 @@ static int match_operand(struct lexer* lex, enum token_type type, struct ast *as */ #define match_reg(pos, ast) \ if (match_operand(lex, TOKEN_REG, ast) < 0 || validate_number(lex, NUMBER_SIZE_U4) < 0) \ - return asm_error((lex)->lineno, "Expected number at argument %i", pos) + return asm_error((lex)->lineno, "Expected register at argument %i", pos) #define match_imm(pos, size, ast) \ if (match_operand(lex, TOKEN_NUMBER, ast) < 0 || validate_number(lex, size) < 0) \ @@ -105,7 +105,7 @@ static int match_operand(struct lexer* lex, enum token_type type, struct ast *as * Functions for matching complete instructions. */ -// R-Type (rs : u8, r0 : u8, r1 : u8) +// R-Type (rs : u4, r0 : u4, r1 : u4) static int match_typeR(struct lexer* lex, struct ast *ast) { match_reg(1, ast); match_arg(1); @@ -116,7 +116,7 @@ static int match_typeR(struct lexer* lex, struct ast *ast) { return 1; } -// RI-Type (rs : u8, r0 : u8, offset : s8) +// RI-Type (rs : u4, r0 : u4, offset : s4) static int match_typeRI(struct lexer* lex, struct ast *ast) { match_reg(1, ast); match_arg(1); @@ -127,7 +127,7 @@ static int match_typeRI(struct lexer* lex, struct ast *ast) { return 1; } -// I-Type (rs : u8, imm : s8) +// I-Type (rs : u4, imm : s8) static int match_typeI(struct lexer* lex, struct ast *ast) { match_reg(1, ast); match_arg(1); @@ -146,7 +146,6 @@ static int match_typeJ(struct lexer* lex, struct ast *ast) { return 1; } -// Match \n* static int match_label_decl(struct lexer* lex, struct ast *ast) { uint16_t location = ast->instr.size / sizeof(struct ast_instr);