1
0
Fork 0
mirror of https://github.com/pnx/m16vm synced 2026-06-16 03:44:55 +02:00

asm: lexer implementation.

This commit is contained in:
Henrik Hautakoski 2018-11-10 12:05:10 +01:00
parent 5bf70ad664
commit 377c008ebe
No known key found for this signature in database
GPG key ID: 839F3A7EAFAEAFAA
5 changed files with 381 additions and 4 deletions

54
src/as/as.c Normal file
View file

@ -0,0 +1,54 @@
/* as.c
*
* Copyright (C) 2012 Henrik Hautakoski <henrik@fiktivkod.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <stdio.h>
#include <stdlib.h>
#include "lexer.h"
int usage(char *program) {
fprintf(stderr, "Usage: %s <string>\n", program);
return -1;
}
int main(int argc, char **argv) {
FILE *fd;
struct lexer lex;
if (argc < 2)
return usage(argv[0]);
fd = fopen(argv[1], "r");
if (fd == NULL)
return -1;
lexer_init(&lex, fd);
do {
if (lexer_get_next(&lex) < 0)
break;
lexer_print_token(&lex.token);
} while(lex.token.type != TOKEN_EOI);
fclose(fd);
return 0;
}

221
src/as/lexer.c Normal file
View file

@ -0,0 +1,221 @@
#include <stdio.h>
#include <string.h>
#include "lexer.h"
/**
* macros for the grammar.
*/
// Numbers is defined as [0-9]
#define number(x) ((x) >= '0' && (x) <= '9')
// The first digit can however also contain '-'
#define first_number(x) (number(x) || (x) == '-' )
// First character in strings can be [a-z][A-Z] or '_'
#define first_string(x) \
( ((x) >= 'a' && (x) <= 'z') \
|| ((x) >= 'A' && (x) <= 'Z') \
|| (x) == '_' )
// All characters after can also include numbers or ':'
#define string(x) \
(first_string(x) || number(x))
#define space(x) ((x) == ' ' || (x) == '\t' || (x) == '\r')
/**
* Helper functions
*/
static int read_next(struct lexer *lex) {
int c, comment = 0;
while((c = fgetc(lex->fp)) != EOF) {
if (c == '\n')
break;
if (comment)
continue;
if (c == ';') {
comment = 1;
} else if (!space(c)) {
break;
}
}
return c;
}
static int read_number(FILE *fp) {
int c, neg = 0, val = 0;
while((c = fgetc(fp)) != EOF) {
if (neg == 0 && c == '-') {
neg = 1;
continue;
}
if (!number(c)) {
ungetc(c, fp);
break;
}
val = (val * 10) + (c - '0');
}
if (neg)
return -1 * val;
return val;
}
static int read_string(FILE *fp) {
int c, label_decl = 0, i = 0;
char buf[64];
while((c = fgetc(fp)) != EOF && i < 64) {
if (string(c)) {
buf[i++] = c;
} else {
if (c == ':') {
label_decl = 1;
} else {
ungetc(c, fp);
}
break;
}
}
buf[i] = '\0';
if (label_decl) {
return TOKEN_LABEL_DECL;
} else if (!strcmp("noop", buf)) {
return TOKEN_OPCODE_NOOP;
} else if (!strcmp("add", buf)) {
return TOKEN_OPCODE_ADD;
} else if (!strcmp("movl", buf)) {
return TOKEN_OPCODE_MOVL;
} else if (!strcmp("movh", buf)) {
return TOKEN_OPCODE_MOVH;
} else if (!strcmp("ld", buf)) {
return TOKEN_OPCODE_LD;
} else if (!strcmp("sw", buf)) {
return TOKEN_OPCODE_SW;
} else if (!strcmp("beq", buf)) {
return TOKEN_OPCODE_BEQ;
} else if (!strcmp("jmp", buf)) {
return TOKEN_OPCODE_JMP;
} else if (!strcmp("jr", buf)) {
return TOKEN_OPCODE_JR;
} else if (!strcmp("int", buf)) {
return TOKEN_OPCODE_INT;
}
return TOKEN_LABEL;
}
/**
* Exposed functions
*/
void lexer_init(struct lexer *lex, FILE *fp) {
lex->lineno = 1;
lex->fp = fp;
lex->token.type = TOKEN_EOI;
}
int lexer_get_next(struct lexer *lex) {
uint16_t num;
int ch = read_next(lex);
if (lex->token.type == TOKEN_EOL)
lex->lineno++;
switch(ch) {
case EOF : lex->token.type = TOKEN_EOI;
break;
case '\n' :
lex->token.type = TOKEN_EOL;
break;
case ',' : lex->token.type = TOKEN_ARG_SEP;
break;
case '$' :
lex->token.type = TOKEN_REG;
num = read_number(lex->fp);
// Registers is 8-bit only.
if (num > 0xF) {
fprintf(stderr, "ERROR: Invalid register value '%i' on line: %i\n", num, lex->lineno);
return -1;
}
lex->token.value.n = num;
break;
default:
if (first_number(ch)) {
ungetc(ch, lex->fp);
lex->token.type = TOKEN_NUMBER;
lex->token.value.n = read_number(lex->fp);
} else if (first_string(ch)) {
ungetc(ch, lex->fp);
lex->token.type = read_string(lex->fp);
} else {
fprintf(stderr, "ERROR: Invalid character '%c' on line: %i\n", ch, lex->lineno);
return -1;
}
}
lex->token.lineno = lex->lineno;
return 0;
}
void lexer_print_token(struct token *token) {
static int lineno = 0;
if (token->lineno != lineno) {
lineno = token->lineno;
printf("\n%i: ", lineno);
}
switch(token->type) {
case TOKEN_OPCODE_NOOP : printf(" [OP NOOP] ");
break;
case TOKEN_OPCODE_ADD : printf(" [OP ADD] ");
break;
case TOKEN_OPCODE_MOVL : printf(" [OP MOVL] ");
break;
case TOKEN_OPCODE_MOVH : printf(" [OP MOVH] ");
break;
case TOKEN_OPCODE_LD : printf(" [OP LD] ");
break;
case TOKEN_OPCODE_SW : printf(" [OP SW] ");
break;
case TOKEN_OPCODE_BEQ : printf(" [OP BEQ] ");
break;
case TOKEN_OPCODE_JMP : printf(" [OP JMP] ");
break;
case TOKEN_OPCODE_JR : printf(" [OP JR] ");
break;
case TOKEN_OPCODE_INT : printf(" [OP INT] ");
break;
case TOKEN_LABEL : printf(" [LABEL] ");
break;
case TOKEN_LABEL_DECL : printf(" [LABEL DECL] ");
break;
case TOKEN_REG : printf(" [REG %i] ", token->value.n);
break;
case TOKEN_ARG_SEP : printf(" [SEP] ");
break;
case TOKEN_NUMBER : printf(" [NUM %i] ", token->value.n);
break;
case TOKEN_EOI : printf(" [EOI] ");
break;
case TOKEN_EOL : printf(" [EOL] ");
break;
default: printf(" [U] ");
}
}

95
src/as/lexer.h Normal file
View file

@ -0,0 +1,95 @@
/* lexer.h
*
* Copyright (C) 2018 Henrik Hautakoski <henrik@fiktivkod.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#ifndef ASM_LEXER_H
#define ASM_LEXER_H
#include <stdlib.h>
#include <stdint.h>
/**
* All token types.
*/
enum token_type {
TOKEN_EOI = -1,
TOKEN_EOL, // Newline
TOKEN_OPCODE_NOOP,
TOKEN_OPCODE_ADD,
TOKEN_OPCODE_MOVL,
TOKEN_OPCODE_MOVH,
TOKEN_OPCODE_LD,
TOKEN_OPCODE_SW,
TOKEN_OPCODE_BEQ,
TOKEN_OPCODE_JMP,
TOKEN_OPCODE_JR,
TOKEN_OPCODE_INT,
TOKEN_LABEL,
TOKEN_LABEL_DECL,
TOKEN_REG,
TOKEN_NUMBER,
TOKEN_ARG_SEP
};
/**
* Token structure.
*
* Holds information about a single token.
*/
struct token {
// Line number where the token was extracted from.
uint16_t lineno;
enum token_type type;
/*
* Token value, depending on type
* this can be a string or unsigned short
*/
union {
uint16_t n;
char s[32];
} value;
};
/**
* Lexer state
*/
struct lexer {
uint16_t lineno; // current line number
FILE * fp; // File being lexed.
struct token token; // Current token
};
/**
* Initialize the lexer with a file pointer to the file
* that should be lexed.
*/
void lexer_init(struct lexer *lex, FILE *fp);
/**
* Advance the lexer to the next token.
*/
int lexer_get_next(struct lexer *lex);
/**
* For debugging, prints the token to standard output.
*/
void lexer_print_token(struct token *token);
#endif /* ASM_LEXER_H */