asm: lexer implementation.

2026-06-18 04:00:02 +02:00 · 2018-11-10 12:05:10 +01:00 · 2018-11-10 12:05:10 +01:00 · 377c008ebe
commit 377c008ebe
parent 5bf70ad664
5 changed files with 381 additions and 4 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,3 @@
 *.o
 m16vm
 /as
--- a/14
+++ b/14
@ -3,13 +3,19 @@ CC = gcc
 CFLAGS = -Iinclude -DMEM_SIZE=32 -DM16_DEBUG_MEM
 LD = $(CC)
-VM = m16vm
+PROGRAMS = m16vm as
-$(VM) : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o
+all: $(PROGRAMS)
-	$(LD) $(LDFLAGS)-o $@ $^
+
 m16vm : src/vm.o src/cpu.o src/mm.o src/instr_decode.o src/syscall.o src/program.o
 	$(LD) $(LDFLAGS) -o $@ $^
 as : src/as/as.o src/as/lexer.o
 	$(LD) $(LDFLAGS) -o $@ $^
 clean :
 	$(RM) src/*.o
 	$(RM) src/as/*.o
 distclean : clean
-	$(RM) $(VM)
+	$(RM) $(PROGRAMS)
--- a/src/as/as.c
+++ b/src/as/as.c
@ -0,0 +1,54 @@
 /* as.c
 *
 *   Copyright (C) 2012   Henrik Hautakoski <henrik@fiktivkod.org>
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the Free Software
 *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 *   MA 02110-1301, USA.
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include "lexer.h"
 int usage(char *program) {
 	fprintf(stderr, "Usage: %s <string>\n", program);
 	return -1;
 }
 int main(int argc, char **argv) {
 	FILE *fd;
 	struct lexer lex;
 	if (argc < 2)
 		return usage(argv[0]);
 	fd = fopen(argv[1], "r");
 	if (fd == NULL)
 		return -1;
 	lexer_init(&lex, fd);
 	do {
 		if (lexer_get_next(&lex) < 0)
 			break;
 		lexer_print_token(&lex.token);
 	} while(lex.token.type != TOKEN_EOI);
 	fclose(fd);
 	return 0;
 }
--- a/src/as/lexer.c
+++ b/src/as/lexer.c
@ -0,0 +1,221 @@
 #include <stdio.h>
 #include <string.h>
 #include "lexer.h"
 /**
 * macros for the grammar.
 */
 // Numbers is defined as [0-9]
 #define number(x) ((x) >= '0' && (x) <= '9')
 // The first digit can however also contain '-'
 #define first_number(x) (number(x) || (x) == '-' )
 // First character in strings can be [a-z][A-Z] or '_'
 #define first_string(x)			\
 	(  ((x) >= 'a' && (x) <= 'z')   \
 	|| ((x) >= 'A' && (x) <= 'Z')	\
 	||  (x) == '_'			)
 // All characters after can also include numbers or ':'
 #define string(x) \
 	(first_string(x) || number(x))
 #define space(x) ((x) == ' ' || (x) == '\t' || (x) == '\r')
 /**
 * Helper functions
 */
 static int read_next(struct lexer *lex) {
 	int c, comment = 0;
 	while((c = fgetc(lex->fp)) != EOF) {
 		if (c == '\n')
 			break;
 		if (comment)
 			continue;
 		if (c == ';') {
 			comment = 1;
 		} else if (!space(c)) {
 			break;
 		}
 	}
 	return c;
 }
 static int read_number(FILE *fp) {
 	int c, neg = 0, val = 0;
 	while((c = fgetc(fp)) != EOF) {
 		if (neg == 0 && c == '-') {
 			neg = 1;
 			continue;
 		}
 		if (!number(c)) {
 			ungetc(c, fp);
 			break;
 		}
 		val = (val * 10) + (c - '0');
 	}
 	if (neg)
 		return -1 * val;
 	return val;
 }
 static int read_string(FILE *fp) {
 	int c, label_decl = 0, i = 0;
 	char buf[64];
 	while((c = fgetc(fp)) != EOF && i < 64) {
 		if (string(c)) {
 			buf[i++] = c;
 		} else {
 			if (c == ':') {
 				label_decl = 1;
 			} else {
 				ungetc(c, fp);
 			}
 			break;
 		}
 	}
 	buf[i] = '\0';
 	if (label_decl) {
 		return TOKEN_LABEL_DECL;
 	} else if (!strcmp("noop", buf)) {
 		return TOKEN_OPCODE_NOOP;
 	} else if (!strcmp("add", buf)) {
 		return TOKEN_OPCODE_ADD;
 	} else if (!strcmp("movl", buf)) {
 		return TOKEN_OPCODE_MOVL;
 	} else if (!strcmp("movh", buf)) {
 		return TOKEN_OPCODE_MOVH;
 	} else if (!strcmp("ld", buf)) {
 		return TOKEN_OPCODE_LD;
 	} else if (!strcmp("sw", buf)) {
 		return TOKEN_OPCODE_SW;
 	} else if (!strcmp("beq", buf)) {
 		return TOKEN_OPCODE_BEQ;
 	} else if (!strcmp("jmp", buf)) {
 		return TOKEN_OPCODE_JMP;
 	} else if (!strcmp("jr", buf)) {
 		return TOKEN_OPCODE_JR;
 	} else if (!strcmp("int", buf)) {
 		return TOKEN_OPCODE_INT;
 	}
 	return TOKEN_LABEL;
 }
 /**
 * Exposed functions
 */
 void lexer_init(struct lexer *lex, FILE *fp) {
 	lex->lineno = 1;
 	lex->fp = fp;
 	lex->token.type = TOKEN_EOI;
 }
 int lexer_get_next(struct lexer *lex) {
 	uint16_t num;
 	int ch = read_next(lex);
 	if (lex->token.type == TOKEN_EOL)
 		lex->lineno++;
 	switch(ch) {
 	case EOF : lex->token.type = TOKEN_EOI;
 		break;
 	case '\n' :
 		lex->token.type = TOKEN_EOL;
 		break;
 	case ',' : lex->token.type = TOKEN_ARG_SEP;
 		break;
 	case '$' :
 		lex->token.type = TOKEN_REG;
 		num = read_number(lex->fp);
 		// Registers is 8-bit only.
 		if (num > 0xF) {
 			fprintf(stderr, "ERROR: Invalid register value '%i' on line: %i\n", num, lex->lineno);
 			return -1;
 		}
 		lex->token.value.n = num;
 		break;
 	default:
 		if (first_number(ch)) {
 			ungetc(ch, lex->fp);
 			lex->token.type = TOKEN_NUMBER;
 			lex->token.value.n = read_number(lex->fp);
 		} else if (first_string(ch)) {
 			ungetc(ch, lex->fp);
 			lex->token.type = read_string(lex->fp);
 		} else {
 			fprintf(stderr, "ERROR: Invalid character '%c' on line: %i\n", ch, lex->lineno);
 			return -1;
 		}
 	}
 	lex->token.lineno = lex->lineno;
 	return 0;
 }
 void lexer_print_token(struct token *token) {
 	static int lineno = 0;
 	if (token->lineno != lineno) {
 		lineno = token->lineno;
 		printf("\n%i: ", lineno);
 	}
 	switch(token->type) {
 	case TOKEN_OPCODE_NOOP : printf(" [OP NOOP] ");
 		break;
 	case TOKEN_OPCODE_ADD : printf(" [OP ADD] ");
 		break;
 	case TOKEN_OPCODE_MOVL : printf(" [OP MOVL] ");
 		break;
 	case TOKEN_OPCODE_MOVH : printf(" [OP MOVH] ");
 		break;
 	case TOKEN_OPCODE_LD : printf(" [OP LD] ");
 		break;
 	case TOKEN_OPCODE_SW : printf(" [OP SW] ");
 		break;
 	case TOKEN_OPCODE_BEQ : printf(" [OP BEQ] ");
 		break;
 	case TOKEN_OPCODE_JMP : printf(" [OP JMP] ");
 		break;
 	case TOKEN_OPCODE_JR : printf(" [OP JR] ");
 		break;
 	case TOKEN_OPCODE_INT : printf(" [OP INT] ");
 		break;
 	case TOKEN_LABEL : printf(" [LABEL] ");
 		break;
 	case TOKEN_LABEL_DECL : printf(" [LABEL DECL] ");
 		break;
 	case TOKEN_REG : printf(" [REG %i] ", token->value.n);
 		break;
 	case TOKEN_ARG_SEP : printf(" [SEP] ");
 		break;
 	case TOKEN_NUMBER : printf(" [NUM %i] ", token->value.n);
 		break;
 	case TOKEN_EOI : printf(" [EOI] ");
 		break;
 	case TOKEN_EOL : printf(" [EOL] ");
 		break;
 	default: printf(" [U] ");
 	}
 }
--- a/src/as/lexer.h
+++ b/src/as/lexer.h
@ -0,0 +1,95 @@
 /* lexer.h
 *
 *   Copyright (C) 2018   Henrik Hautakoski <henrik@fiktivkod.org>
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the Free Software
 *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 *   MA 02110-1301, USA.
 */
 #ifndef ASM_LEXER_H
 #define ASM_LEXER_H
 #include <stdlib.h>
 #include <stdint.h>
 /**
 * All token types.
 */
 enum token_type {
 	TOKEN_EOI = -1,
 	TOKEN_EOL,		// Newline
 	TOKEN_OPCODE_NOOP,
 	TOKEN_OPCODE_ADD,
 	TOKEN_OPCODE_MOVL,
 	TOKEN_OPCODE_MOVH,
 	TOKEN_OPCODE_LD,
 	TOKEN_OPCODE_SW,
 	TOKEN_OPCODE_BEQ,
 	TOKEN_OPCODE_JMP,
 	TOKEN_OPCODE_JR,
 	TOKEN_OPCODE_INT,
 	TOKEN_LABEL,
 	TOKEN_LABEL_DECL,
 	TOKEN_REG,
 	TOKEN_NUMBER,
 	TOKEN_ARG_SEP
 };
 /**
 * Token structure.
 *
 * Holds information about a single token.
 */
 struct token {
 	// Line number where the token was extracted from.
 	uint16_t 	lineno;
 	enum token_type	type;
 	/*
 	 * Token value, depending on type
 	 * this can be a string or unsigned short
 	 */
 	union {
 		uint16_t n;
 		char	 s[32];
 	} value;
 };
 /**
 * Lexer state
 */
 struct lexer {
 	uint16_t 	lineno;		// current line number
 	FILE *		fp;		// File being lexed.
 	struct token 	token;		// Current token
 };
 /**
 * Initialize the lexer with a file pointer to the file
 * that should be lexed.
 */
 void lexer_init(struct lexer *lex, FILE *fp);
 /**
 * Advance the lexer to the next token.
 */
 int lexer_get_next(struct lexer *lex);
 /**
 * For debugging, prints the token to standard output.
 */
 void lexer_print_token(struct token *token);
 #endif /* ASM_LEXER_H */