diff --git a/.gitignore b/.gitignore index 563e7eb..03fa769 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *.o +*.a m16vm +m16as diff --git a/Makefile b/Makefile index 713fac7..87e0b0c 100644 --- a/Makefile +++ b/Makefile @@ -17,16 +17,29 @@ # Outputs the instructions executed in a human-readable format. # CC = gcc -CFLAGS = -Iinclude -DMEM_SIZE=32 -DM16_DEBUG_MEM +CFLAGS = -g -Ilib/include -DMEM_SIZE=32 -DM16_DEBUG_MEM LD = $(CC) -VM = m16vm +PROGRAMS = m16vm m16as -$(VM) : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o - $(LD) $(LDFLAGS)-o $@ $^ +all: $(PROGRAMS) + +m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o + $(LD) $(LDFLAGS) -o $@ $^ + +m16as : src/as/as.o src/as/parser.o src/as/lexer.o \ + src/as/codegen.o src/as/error.o src/as/symtab.o \ + src/as/ast.o lib/libm16.a + $(LD) $(LDFLAGS) -o $@ $^ + +lib/libm16.a : lib/src/vector.o + $(AR) rcs $@ $^ clean : $(RM) src/*.o + $(RM) src/as/*.o + $(RM) lib/*.o + $(RM) lib/*.a distclean : clean - $(RM) $(VM) + $(RM) $(PROGRAMS) diff --git a/asm/hello_world.as b/asm/hello_world.as new file mode 100644 index 0000000..f5f5b25 --- /dev/null +++ b/asm/hello_world.as @@ -0,0 +1,33 @@ + +; -- Store "Hello World" string in memory. +movl $0x1, 72 ; H +sw $0x0, $0x1, 0 +movl $0x1, 69 ; E +sw $0x0, $0x1, 1 +movl $0x1, 76 ; L +sw $0x0, $0x1, 2 +sw $0x0, $0x1, 3 +movl $0x1, 79 ; O +sw $0x0, $0x1, 4 +movl $0x1, 32 ; Space +sw $0x0, $0x1, 5 +movl $0x1, 87 ; W +sw $0x0, $0x1, 6 +movl $0x1, 79 ; O +sw $0x0, $0x1, 7 +movl $0x1, 82 ; R +sw $0x0, $0x1, 8 +movl $0x1, 76 ; L +sw $0x0, $0x1, 9 +movl $0x1, 68 ; D +sw $0x0, $0x1, 10 + +; -- setup print loop. +movl $0x1, 1 ; Load 1 in R1 (used for increment the counter) +_start: +ld $0xF, $0x0, 0 ; Load memory address stored in R0 into R15 +int $0xA, 2 ; Print character +add $0x0, $0x0, $0x1 ; Add 1 (R1) to counter (R0) +beq $0xF, $0x2, 1 ; Branch to "noop" (skipping next instruction) if R15 = 0 (R2 holds 0) +jmp _start ; jump back to "_start" label +noop diff --git a/asm/mov_test.as b/asm/mov_test.as new file mode 100644 index 0000000..3b83abe --- /dev/null +++ b/asm/mov_test.as @@ -0,0 +1,15 @@ + +; mov h/l test. +; Using 2 instructions to store a 16-bit words is abit tricky to write code for + +; Storing 32767 = (2^15) - 1 (highest value in 2's complement 16-bit). +; MSB (signed flag) = 0, rest 1. +; H [0111 1111] L [1111 1111] +movl $0x0, -1 +movh $0x0, 127 + +; Storing -32768 = (2^15) (highest value in 2's complement 16-bit). +; MSB (signed flag) = 1, rest 0. +; H [1000 0000] L [0000 0000] +movl $0x1, 0 +movh $0x1, -128 diff --git a/asm/test.as b/asm/test.as new file mode 100644 index 0000000..0ff196c --- /dev/null +++ b/asm/test.as @@ -0,0 +1,15 @@ +; NOTE: This is just to check the syntax. +; No logic behind any of the instructions (will prob crash) +movl $0, 3 +movl $5, 6500 +; Comment +noop ; Comment +start0: add $3, $0, $1 +sw $2, $3, $2 +ld $5, $0, $3 +beq $0, $1, 1 +beq $0, $1, -25 +beq $0, $1, -4000 +jr $5, 0 +int $0, 1 +jmp 5 diff --git a/include/instr.h b/lib/include/instr.h similarity index 98% rename from include/instr.h rename to lib/include/instr.h index 4f776e5..4dfe36c 100644 --- a/include/instr.h +++ b/lib/include/instr.h @@ -33,6 +33,7 @@ #define OP_JMP 7 #define OP_JR 8 #define OP_INT 15 +#define OP_NONE 0xFF /* Register type */ struct instr_R { diff --git a/lib/include/vector.h b/lib/include/vector.h new file mode 100644 index 0000000..cf2d2b9 --- /dev/null +++ b/lib/include/vector.h @@ -0,0 +1,41 @@ +/* vector.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef VECTOR_H +#define VECTOR_H + +#include +#include + +struct vector { + size_t blk_sz; + size_t alloc; + size_t size; + void *base; +}; + +#define VECTOR_INIT(block_size) { block_size, 0, 0, NULL } + +void vector_init(struct vector* v, size_t block_size); + +void vector_destory(struct vector* v); + +void vector_append(struct vector* v, const void *ptr, size_t n); + +#endif /* VECTOR_H */ diff --git a/lib/src/vector.c b/lib/src/vector.c new file mode 100644 index 0000000..b49e020 --- /dev/null +++ b/lib/src/vector.c @@ -0,0 +1,54 @@ +/* vector.c + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include + +static void ensure_size(struct vector* v, size_t n) { + + size_t old_alloc = v->alloc; + while(v->alloc < v->size + n) + v->alloc += v->blk_sz; + + if (old_alloc != v->alloc) + v->base = realloc(v->base, v->alloc); +} + +void vector_init(struct vector* v, size_t block_size) { + + v->blk_sz = block_size > 0 ? block_size : 64; + v->alloc = 0; + v->size = 0; + v->base = NULL; +} + +void vector_destory(struct vector *v) { + + if (v->base) + free(v->base); +} + +void vector_append(struct vector *v, const void *ptr, size_t n) { + + ensure_size(v, n); + + memcpy(v->base + v->size, ptr, n); + v->size += n; +} diff --git a/src/as/as.c b/src/as/as.c new file mode 100644 index 0000000..c7aa24c --- /dev/null +++ b/src/as/as.c @@ -0,0 +1,60 @@ +/* as.c + * + * Copyright (C) 2012 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include "parser.h" + +int usage(char *program) { + + fprintf(stderr, "Usage: %s [ 2) { + fd_out = fopen(argv[2], "w"); + if (fd_out == NULL) { + perror("Could not open output file"); + fclose(fd_in); + return -1; + } + } + + parse(fd_in, fd_out); + + fclose(fd_in); + fclose(fd_out); + return 0; +} diff --git a/src/as/ast.c b/src/as/ast.c new file mode 100644 index 0000000..bfd6983 --- /dev/null +++ b/src/as/ast.c @@ -0,0 +1,95 @@ + +#include +#include +#include +#include +#include "ast.h" + +void ast_init(struct ast* ast) { + + vector_init(&ast->instr, 16 * sizeof(struct ast)); + vector_init(&ast->labels, 16 * sizeof(char**)); + ast->symbols = symtab_init(); +} + +void ast_free(struct ast* ast) { + + int i; + + // Free all label strings + for(int i = 0; i < ast->labels.size; i += sizeof(char**)) { + char *ptr = *((char**) ast->labels.base + i); + free(ptr); + } + vector_destory(&ast->labels); + vector_destory(&ast->instr); + symtab_free(ast->symbols); +} + +void ast_instr(struct ast* ast, uint8_t opcode) { + + struct ast_instr instr = { 0 }; + + instr.opcode = opcode; + vector_append(&ast->instr, &instr, sizeof(struct ast_instr)); +} + +void ast_instr_operand(struct ast* ast, enum ast_datatype type, void* value) { + + struct ast_instr *instr; + + if (ast->instr.size < 1) + return; + + // Fetch latest instruction. + instr = ast->instr.base + (ast->instr.size - sizeof(struct ast_instr)); + + if (instr->n_operands < 3) { + struct ast_instr_operand *n = instr->operands + instr->n_operands; + + n->type = type; + if (n->type == DATATYPE_STRING) { + n->s = strdup(value); + vector_append(&ast->labels, &n->s, sizeof(char**)); + } else { + n->r = ((intptr_t) value) & 0xFF; + } + + instr->n_operands++; + } +} + +void ast_location(struct ast* ast, const char *label, uint16_t loc) { + + symtab_set(ast->symbols, label, loc); +} + +void ast_print(struct ast* ast) { + + printf("{\n"); + + for(int i = 0; i < ast->instr.size; i += sizeof(struct ast_instr)) { + struct ast_instr *instr = ast->instr.base + i; + + printf("\topcode: %u", instr->opcode); + for(int j = 0; j < instr->n_operands; j++) { + struct ast_instr_operand *op = instr->operands + j; + printf(", operand%i: ", j); + switch(op->type) { + case DATATYPE_REGISTER : + printf("%u (reg)", op->r); + break; + case DATATYPE_NUMBER : + printf("%i (num)", op->n); + break; + case DATATYPE_STRING : + printf("%s (string)", op->s); + break; + } + } + printf("\n"); + } + + printf("}\n"); + +} diff --git a/src/as/ast.h b/src/as/ast.h new file mode 100644 index 0000000..6c23334 --- /dev/null +++ b/src/as/ast.h @@ -0,0 +1,81 @@ +/* ast.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_AST_H +#define ASM_AST_H + +#include +#include "symtab.h" + +// All datatypes in the language. +enum ast_datatype { + DATATYPE_NONE = 0, // Not an actual type. + DATATYPE_REGISTER, // 4 bit unsigned. + DATATYPE_NUMBER, // 8 bit signed. + DATATYPE_STRING +}; + +// Struct to represent a single operand to a instruction. +struct ast_instr_operand { + enum ast_datatype type; + union { + uint8_t r; + int8_t n; + uint16_t addr; + const char *s; + }; +}; + +// Struct to represent a single instruction. +struct ast_instr { + uint8_t opcode; + uint8_t n_operands; + struct ast_instr_operand operands[3]; +}; + +// Structure to represent a program. +struct ast { + + // List of instructions. + struct vector instr; + + // List for storing label pointers (so we can free) + struct vector labels; + + // Symbol table with label - location mapping. + symtab_t *symbols; +}; + +void ast_init(struct ast* ast); + +void ast_free(struct ast* ast); + +// Add an instruction +void ast_instr(struct ast* ast, uint8_t opcode); + +// Add an operand to the latest instruction. +//void ast_instr_operand(struct ast* ast, const struct ast_instr_operand* op); +void ast_instr_operand(struct ast* ast, enum ast_datatype type, void* value); + +void ast_location(struct ast* ast, const char *label, uint16_t loc); + +// For debugging :) +void ast_print(struct ast* ast); + +#endif /* ASM_AST_H */ diff --git a/src/as/codegen.c b/src/as/codegen.c new file mode 100644 index 0000000..d08a6c4 --- /dev/null +++ b/src/as/codegen.c @@ -0,0 +1,56 @@ +/* codegen.c + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include "codegen.h" + +void codegen_emit(struct ast_instr* instr, symtab_t* symbols, uint8_t* out) { + + out[0] = instr->opcode << 4; + + if (instr->n_operands < 1) + // No operands, return + return; + + // J-Type + if (instr->operands[0].type == DATATYPE_STRING) { + uint16_t addr; + + if (symtab_get(symbols, instr->operands[0].s, &addr) >= 0) { + out[0] |= (addr >> 8); + out[1] = addr; + } + } + // R/RI or I + else if (instr->operands[0].type == DATATYPE_REGISTER) { + + out[0] |= instr->operands[0].r & 0xF; + + // I + if (instr->operands[1].type == DATATYPE_NUMBER) { + + out[1] = instr->operands[1].n; + } + // R/RI-Type + else if (instr->operands[1].type == DATATYPE_REGISTER && + (instr->operands[2].type == DATATYPE_REGISTER || instr->operands[2].type == DATATYPE_NUMBER)) { + + out[1] = (instr->operands[1].r << 4) | (instr->operands[2].r & 0xF); + } + } +} diff --git a/src/as/codegen.h b/src/as/codegen.h new file mode 100644 index 0000000..8964321 --- /dev/null +++ b/src/as/codegen.h @@ -0,0 +1,27 @@ +/* codegen.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_CODEGEN_H +#define ASM_CODEGEN_H + +#include "ast.h" + +void codegen_emit(struct ast_instr* ast, symtab_t* symbols, uint8_t* out); + +#endif /* ASM_CODEGEN_H */ diff --git a/src/as/error.c b/src/as/error.c new file mode 100644 index 0000000..704658b --- /dev/null +++ b/src/as/error.c @@ -0,0 +1,46 @@ +/* error.c + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include "error.h" + +void asm_warn(int lineno, const char *fmt, ...) { + + va_list vl; + + fprintf(stderr, "Line %i: Warning: ", lineno); + va_start(vl, fmt); + vfprintf(stderr, fmt, vl); + va_end(vl); + fprintf(stderr, "\n"); +} + +int asm_error(int lineno, const char *fmt, ...) { + + va_list vl; + + fprintf(stderr, "Line %i: Error: ", lineno); + va_start(vl, fmt); + vfprintf(stderr, fmt, vl); + va_end(vl); + fprintf(stderr, "\n"); + + return -1; +} diff --git a/src/as/error.h b/src/as/error.h new file mode 100644 index 0000000..70ebb71 --- /dev/null +++ b/src/as/error.h @@ -0,0 +1,27 @@ +/* error.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_ERROR_H +#define ASM_ERROR_H + +void asm_warn(int lineno, const char *fmt, ...); + +int asm_error(int lineno, const char *fmt, ...); + +#endif /* ASM_ERROR_H */ diff --git a/src/as/lexer.c b/src/as/lexer.c new file mode 100644 index 0000000..53bf8b9 --- /dev/null +++ b/src/as/lexer.c @@ -0,0 +1,296 @@ + +#include +#include +#include "error.h" +#include "lexer.h" + +/** + * macros for the grammar. + */ + +// Numbers is defined as [0-9] +#define number(x) ((x) >= '0' && (x) <= '9') + +// The first digit can however also contain '-' +#define first_number(x) (number(x) || (x) == '-' ) + +// First character in strings can be [a-z][A-Z] or '_' +#define first_string(x) \ + ( ((x) >= 'a' && (x) <= 'z') \ + || ((x) >= 'A' && (x) <= 'Z') \ + || (x) == '_' ) + +// All characters after can also include numbers or ':' +#define string(x) \ + (first_string(x) || number(x)) + +#define space(x) ((x) == ' ' || (x) == '\t' || (x) == '\r') + + +struct opcode_ent { + char * name; + uint8_t code; +}; + +static const struct opcode_ent opcode_table[] = { + { "noop", TOKEN_OPCODE_NOOP }, + { "add" , TOKEN_OPCODE_ADD }, + { "movl", TOKEN_OPCODE_MOVL }, + { "movh", TOKEN_OPCODE_MOVH }, + { "ld" , TOKEN_OPCODE_LD }, + { "sw" , TOKEN_OPCODE_SW }, + { "beq" , TOKEN_OPCODE_BEQ }, + { "jmp" , TOKEN_OPCODE_JMP }, + { "jr" , TOKEN_OPCODE_JR }, + { "int" , TOKEN_OPCODE_INT }, + { NULL , 0 }, +}; + +/** + * Helper functions + */ + +static int read_next(struct lexer *lex) { + + int c, comment = 0; + + while((c = fgetc(lex->fp)) != EOF) { + + if (c == '\n') + break; + + if (comment) + continue; + + if (c == ';') { + comment = 1; + } else if (!space(c)) { + break; + } + } + return c; + } + +static int read_hex(FILE *fp, int *out) { + + int c, val = 0; + + while((c = fgetc(fp)) != EOF) { + char n = 0; + if (number(c)) { + n = c - '0'; + } + else if ( (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F')) { + n = (c % 0x20) + 9; + } + else { + ungetc(c, fp); + break; + } + + val = (val * 16) + n; + if (val > 0xFF) + goto overflow; + } + *out = val; + return 0; + +overflow: + *out = 0xFF; + return -1; +} + +static int read_dec(FILE *fp, int neg, int *out) { + + int c, val = 0; + + while((c = fgetc(fp)) != EOF) { + if (!number(c)) { + ungetc(c, fp); + break; + } + val = (val * 10) + (c - '0'); + + // Cool trick here. + // because the range is -128 (0x80) to +127 (0x7F) + // We can do 0x80 - 1 if it is NOT a negative number. + if (val > (0x80 - !neg)) + goto overflow; + } + + *out = neg ? -1 * val : val; + return 0; + +overflow: + *out = neg ? -1 * 0x80 : 0x7F; + return -1; +} + +static int read_number(FILE *fp, int *out) { + + int neg = 0, c = fgetc(fp); + + // Check for '0x'. + if (c == '0') { + c = fgetc(fp); + if (c == 'x') { + // We have a hexadecimal number. + return read_hex(fp, out); + } + ungetc(c, fp); + ungetc('0', fp); + } + // While we are at it. check for a negative sign. + else if (c == '-') { + neg = 1; + } + // We got something else. put it back. + else { + ungetc(c, fp); + } + + return read_dec(fp, neg, out); +} + +static int parse_number(struct lexer *lex) { + + int num; + + if (read_number(lex->fp, &num) < 0) + asm_warn(lex->lineno, "Value truncated to %i", num); + + lex->token.value.n = num; + return 0; +} + +static int read_string(FILE *fp, char *buf, size_t len) { + + int c, label_decl = 0, i = 0; + + while((c = fgetc(fp)) != EOF && i < len) { + + if (string(c)) { + buf[i++] = c; + } else { + if (c == ':') { + label_decl = 1; + } else { + ungetc(c, fp); + } + break; + } + } + buf[i] = '\0'; + + if (label_decl) + return TOKEN_LABEL_DECL; + + for(i = 0; opcode_table[i].name; i++) { + + if (!strcmp(opcode_table[i].name, buf)) + return opcode_table[i].code; + } + return TOKEN_LABEL; +} + +/** + * Exposed functions + */ + +void lexer_init(struct lexer *lex, FILE *fp) { + + lex->lineno = 1; + lex->fp = fp; + lex->token.type = TOKEN_EOI; +} + +int lexer_get_next(struct lexer *lex) { + + uint16_t num; + int ch = read_next(lex); + + if (lex->token.type == TOKEN_EOL) + lex->lineno++; + + switch(ch) { + case EOF : lex->token.type = TOKEN_EOI; + break; + case '\n' : + lex->token.type = TOKEN_EOL; + break; + case ',' : lex->token.type = TOKEN_ARG_SEP; + break; + case '$' : + lex->token.type = TOKEN_REG; + if (parse_number(lex) < 0) + return -1; + break; + default: + if (first_number(ch)) { + ungetc(ch, lex->fp); + lex->token.type = TOKEN_NUMBER; + if (parse_number(lex) < 0) + return -1; + } else if (first_string(ch)) { + char buf[32]; + ungetc(ch, lex->fp); + lex->token.type = read_string(lex->fp, buf, sizeof(buf)); + if (lex->token.type == TOKEN_LABEL_DECL || lex->token.type == TOKEN_LABEL) + strcpy(lex->token.value.s, buf); + } else { + return asm_error(lex->lineno, "Invalid character '%c'", ch); + } + } + + lex->token.lineno = lex->lineno; + return 0; +} + +void lexer_print_token(struct token *token) { + + static int lineno = 0; + + if (token->lineno != lineno) { + lineno = token->lineno; + printf("\n%i: ", lineno); + } + + switch(token->type) { + case TOKEN_OPCODE_NOOP : printf(" [OP NOOP] "); + break; + case TOKEN_OPCODE_ADD : printf(" [OP ADD] "); + break; + case TOKEN_OPCODE_MOVL : printf(" [OP MOVL] "); + break; + case TOKEN_OPCODE_MOVH : printf(" [OP MOVH] "); + break; + case TOKEN_OPCODE_LD : printf(" [OP LD] "); + break; + case TOKEN_OPCODE_SW : printf(" [OP SW] "); + break; + case TOKEN_OPCODE_BEQ : printf(" [OP BEQ] "); + break; + case TOKEN_OPCODE_JMP : printf(" [OP JMP] "); + break; + case TOKEN_OPCODE_JR : printf(" [OP JR] "); + break; + case TOKEN_OPCODE_INT : printf(" [OP INT] "); + break; + case TOKEN_LABEL : printf(" [LABEL \"%s\"] ", token->value.s); + break; + case TOKEN_LABEL_DECL : printf(" [LABEL DECL \"%s\"] ", token->value.s); + break; + case TOKEN_REG : printf(" [REG %i] ", token->value.n); + break; + case TOKEN_ARG_SEP : printf(" [SEP] "); + break; + case TOKEN_NUMBER : printf(" [NUM %i] ", token->value.n); + break; + case TOKEN_EOI : printf(" [EOI] "); + break; + case TOKEN_EOL : printf(" [EOL] "); + break; + default: printf(" [U] "); + } +} diff --git a/src/as/lexer.h b/src/as/lexer.h new file mode 100644 index 0000000..cea04d7 --- /dev/null +++ b/src/as/lexer.h @@ -0,0 +1,95 @@ +/* lexer.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_LEXER_H +#define ASM_LEXER_H + +#include +#include + +/** + * All token types. + */ +enum token_type { + TOKEN_EOI = -1, + TOKEN_EOL, // Newline + TOKEN_OPCODE_NOOP, + TOKEN_OPCODE_ADD, + TOKEN_OPCODE_MOVL, + TOKEN_OPCODE_MOVH, + TOKEN_OPCODE_LD, + TOKEN_OPCODE_SW, + TOKEN_OPCODE_BEQ, + TOKEN_OPCODE_JMP, + TOKEN_OPCODE_JR, + TOKEN_OPCODE_INT, + TOKEN_LABEL, + TOKEN_LABEL_DECL, + TOKEN_REG, + TOKEN_NUMBER, + TOKEN_ARG_SEP +}; + +/** + * Token structure. + * + * Holds information about a single token. + */ +struct token { + // Line number where the token was extracted from. + uint16_t lineno; + + enum token_type type; + + /* + * Token value, depending on type + * this can be a string or unsigned short + */ + union { + int8_t n; + char s[32]; + } value; +}; + +/** + * Lexer state + */ +struct lexer { + uint16_t lineno; // current line number + FILE * fp; // File being lexed. + struct token token; // Current token +}; + +/** + * Initialize the lexer with a file pointer to the file + * that should be lexed. + */ +void lexer_init(struct lexer *lex, FILE *fp); + +/** + * Advance the lexer to the next token. + */ +int lexer_get_next(struct lexer *lex); + +/** + * For debugging, prints the token to standard output. + */ +void lexer_print_token(struct token *token); + +#endif /* ASM_LEXER_H */ diff --git a/src/as/parser.c b/src/as/parser.c new file mode 100644 index 0000000..1b4eb6e --- /dev/null +++ b/src/as/parser.c @@ -0,0 +1,263 @@ +/* parser.c + * + * Copyright (C) 2012 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include +#include "error.h" +#include "codegen.h" +#include "ast.h" +#include "lexer.h" +#include "parser.h" + +/* + * Helper functions/macros for defining parser rules. + */ + +enum number_size { + NUMBER_SIZE_U4, + NUMBER_SIZE_S4, + NUMBER_SIZE_S8, +}; + +// match the next token. +// returns 0 if the token was of the correct type. -1 otherwise +static int match_type(struct lexer* lex, enum token_type type) { + + lexer_get_next(lex); + return lex->token.type == type ? 0 : -1; +} + +static int validate_number(struct lexer* lex, enum number_size size) { + + int8_t n = lex->token.value.n; + + switch(size) { + case NUMBER_SIZE_U4 : + if (!(n >= 0x0 && n <= 0xF)) + return asm_error(lex->lineno, "Value out of range %u", (uint8_t) n); + break; + case NUMBER_SIZE_S4 : + if (!(n >= -8 && n < 8)) + return asm_error(lex->lineno, "Value out of range %i", n); + break; + case NUMBER_SIZE_S8 : + if (!(n >= -128 && n< 128)) + return asm_error(lex->lineno, "Value out of range %i", n); + break; + } + return 0; +} + +// Same as match_type() but also generates a operand in the AST. +static int match_operand(struct lexer* lex, enum token_type type, struct ast *ast) { + + if (match_type(lex, type) < 0) + return -1; + + if (type == TOKEN_REG) { + ast_instr_operand(ast, DATATYPE_REGISTER, lex->token.value.n); + } else if (type == TOKEN_NUMBER) { + ast_instr_operand(ast, DATATYPE_NUMBER, lex->token.value.n); + } else { + ast_instr_operand(ast, DATATYPE_STRING, lex->token.value.s); + } + + return 1; +} + +/* + * Helper macros for matching tokens. + */ +#define match_reg(pos, ast) \ + if (match_operand(lex, TOKEN_REG, ast) < 0 || validate_number(lex, NUMBER_SIZE_U4) < 0) \ + return asm_error((lex)->lineno, "Expected register at argument %i", pos) + +#define match_imm(pos, size, ast) \ + if (match_operand(lex, TOKEN_NUMBER, ast) < 0 || validate_number(lex, size) < 0) \ + return asm_error((lex)->lineno, "Expected number at argument %i", pos) + +#define match_arg(pos) \ + if (match_type(lex, TOKEN_ARG_SEP) < 0) \ + return asm_error((lex)->lineno, "Expected separator after argument %i", pos) + +#define match_end \ + if (match_type(lex, TOKEN_EOL) < 0) \ + return asm_error(lex->lineno, "Expected newline") + +/* + * Functions for matching complete instructions. + */ + +// R-Type (rs : u4, r0 : u4, r1 : u4) +static int match_typeR(struct lexer* lex, struct ast *ast) { + + match_reg(1, ast); match_arg(1); + match_reg(2, ast); match_arg(2); + match_reg(3, ast); + match_end; + + return 1; +} + +// RI-Type (rs : u4, r0 : u4, offset : s4) +static int match_typeRI(struct lexer* lex, struct ast *ast) { + + match_reg(1, ast); match_arg(1); + match_reg(2, ast); match_arg(2); + match_imm(3, NUMBER_SIZE_S4, ast); + match_end; + + return 1; +} + +// I-Type (rs : u4, imm : s8) +static int match_typeI(struct lexer* lex, struct ast *ast) { + + match_reg(1, ast); match_arg(1); + match_imm(2, NUMBER_SIZE_S8, ast); + match_end; + + return 1; +} + +// J-Type (addr : string) +static int match_typeJ(struct lexer* lex, struct ast *ast) { + + if (match_operand(lex, TOKEN_LABEL, ast) < 0) + return asm_error(lex->lineno, "Expected label at argument 1"); + match_end; + return 1; +} + +static int match_label_decl(struct lexer* lex, struct ast *ast) { + + uint16_t location = ast->instr.size / sizeof(struct ast_instr); + + ast_location(ast, lex->token.value.s, location); + + return 1; +} + +#define opcode_guard(op) \ + if (op_set == 0) { op_set = 1; ast_instr(ast, op); } + +/* + * Parse a single line. + */ +static int parse_line(struct lexer* lex, struct ast *ast) { + + int op_set = 0; + + if (lexer_get_next(lex) < 0) + return -1; + + // Opcode should come first. + switch(lex->token.type) { + case TOKEN_EOI: return 0; + case TOKEN_EOL: break; + case TOKEN_OPCODE_NOOP : ast_instr(ast, OP_NOOP); + match_end; + break; + // Type-R + case TOKEN_OPCODE_ADD : opcode_guard(OP_ADD); + return match_typeR(lex, ast); + // Type-I + case TOKEN_OPCODE_MOVL : opcode_guard(OP_MOVL); + case TOKEN_OPCODE_MOVH : opcode_guard(OP_MOVH); + case TOKEN_OPCODE_JR : opcode_guard(OP_JR); + case TOKEN_OPCODE_INT : opcode_guard(OP_INT); + return match_typeI(lex, ast); + // Type-RI + case TOKEN_OPCODE_LD : opcode_guard(OP_LW); + case TOKEN_OPCODE_SW : opcode_guard(OP_SW); + case TOKEN_OPCODE_BEQ : opcode_guard(OP_BEQ); + return match_typeRI(lex, ast); + // Type-J + case TOKEN_OPCODE_JMP : opcode_guard(OP_JMP); + return match_typeJ(lex, ast); + case TOKEN_LABEL_DECL : + return match_label_decl(lex, ast); + default: + return asm_error(lex->lineno, "Opcode or label expected"); + } + + return 1; +} + +// Check the semantics of the program's AST. +// For now, we only need to check that all +// referenced labels exist in the symbol table +static int check_semantics(struct ast* ast) { + + int i; + + // TODO: Need to implement a iterator for vectors. + for(i = 0; i < ast->instr.size; i += sizeof(struct ast_instr)) { + struct ast_instr *instr = ast->instr.base + i; + + // Only J-Type can have labels. + if (instr->opcode == OP_JMP + && instr->operands[0].type == DATATYPE_STRING + && symtab_get(ast->symbols, instr->operands[0].s, NULL) < 0) { + + return asm_error(0, "Label '%s' is not defined", instr->operands[0].s); + } + } + + return 0; +} + +/* + * Main parser function. + */ +int parse(FILE *source_fd, FILE *dest_fd) { + + struct lexer lex; + struct ast ast; + + ast_init(&ast); + lexer_init(&lex, source_fd); + + // Parse and build AST. + for(;;) { + int rc = parse_line(&lex, &ast); + if (rc < 0) + goto done; + if (rc == 0) + break; + } + + if (check_semantics(&ast) < 0) + goto done; + + // Code generation + for(int i = 0; i < ast.instr.size; i += sizeof(struct ast_instr)) { + struct ast_instr *instr = ast.instr.base + i; + uint8_t code[2] = { 0 }; + + codegen_emit(instr, ast.symbols, &code); + + fwrite(&code, sizeof(code), 1, dest_fd); + } + + // Cleanup +done: ast_free(&ast); + return 0; +} diff --git a/src/as/parser.h b/src/as/parser.h new file mode 100644 index 0000000..29f2fe3 --- /dev/null +++ b/src/as/parser.h @@ -0,0 +1,33 @@ +/* parser.h + * + * Copyright (C) 2012 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_PARSER_H +#define ASM_PARSER_H + +#include +#include + +struct parse_state { + struct instr *instr_ptr; + unsigned int num; +}; + +int parse(FILE *source_fd, FILE *dest_fd); + +#endif /* ASM_PARSER_H */ diff --git a/src/as/symtab.c b/src/as/symtab.c new file mode 100644 index 0000000..9ac9867 --- /dev/null +++ b/src/as/symtab.c @@ -0,0 +1,121 @@ +/* symtab.c + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#include +#include +#include "symtab.h" + +// TODO: Right now, the symtab is implemented as a simple dynamic growing array. +// This is fine for now, but for lookup speed. A hashtable is more efficient. + +// How much memory that should be allocated each time. +// Note: this should be defined as number of `struct symbol` objects, not bytes. +#define BLOCK_SIZE 32 + +// Helper macro to calculate number of bytes the table need. +#define __M_SIZE(t) \ + ((t)->size * sizeof(struct symbol)) + +struct symbol { + // The label associated with the location. + char label[SYMTAB_LABEL_MAXLEN]; + + // Location (program adress) + uint16_t loc; +}; + +// Symbol table structure. +struct symtab { + // Array of symbols. + struct symbol *data; + + // Number of symbols in the array. + size_t num; + + // Total number of symbols the array can hold. + size_t size; +}; + +symtab_t* symtab_init() { + + struct symtab *table = malloc(sizeof(struct symtab)); + + table->num = 0; + table->size = BLOCK_SIZE; + table->data = malloc(__M_SIZE(table)); + + return table; +} + +void symtab_free(symtab_t *table) { + + if (!table) + return; + + if (table->data) + free(table->data); + + memset(table, 0, sizeof(struct symtab)); + free(table); +} + +int symtab_set(symtab_t *table, const char *label, uint16_t loc) { + + // Check if it exist first. + int index = symtab_get(table, label, NULL); + + // Entry did not exists. Insert + if (index < 0) { + struct symbol *sym; + + // We have one more. + table->num += 1; + + // Make sure we resize the memory. + if (table->num > table->size) { + table->size += BLOCK_SIZE; + table->data = realloc(table->data, __M_SIZE(table)); + } + + // insert the symbol at the end. + sym = table->data + (table->num - 1); + strncpy(sym->label, label, SYMTAB_LABEL_MAXLEN); + sym->loc = loc; + + return 0; + } + + // Could not insert. return as error. + return -1; +} + +int symtab_get(symtab_t *table, const char *label, uint16_t *loc) { + + // Linear search here for simplicity. + for(size_t i = 0; i < table->num; i++) { + struct symbol *sym = table->data + i; + + if (!strncmp(sym->label, label, SYMTAB_LABEL_MAXLEN)) { + + if (loc) *loc = sym->loc; + return i; + } + } + return -1; +} diff --git a/src/as/symtab.h b/src/as/symtab.h new file mode 100644 index 0000000..cd1cd68 --- /dev/null +++ b/src/as/symtab.h @@ -0,0 +1,37 @@ +/* symtab.h + * + * Copyright (C) 2018 Henrik Hautakoski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef ASM_SYMTAB_H +#define ASM_SYMTAB_H + +#include + +#define SYMTAB_LABEL_MAXLEN 32 + +typedef struct symtab symtab_t; + +symtab_t* symtab_init(); + +void symtab_free(symtab_t *table); + +int symtab_set(symtab_t *table, const char *label, uint16_t loc); + +int symtab_get(symtab_t *table, const char *label, uint16_t *loc); + +#endif /* ASM_SYMTAB_H */