1
0
Fork 0
mirror of https://github.com/pnx/m16vm synced 2026-06-16 03:44:55 +02:00
m16vm/as/lexer.c

314 lines
6.3 KiB
C

/* lexer.c
*
* Copyright (C) 2018-2019 Henrik Hautakoski <henrik@fiktivkod.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/
#include <stdio.h>
#include <string.h>
#include "asm_error.h"
#include "lexer.h"
/**
* macros for the grammar.
*/
// Numbers is defined as [0-9]
#define number(x) ((x) >= '0' && (x) <= '9')
// The first digit can however also contain '-'
#define first_number(x) (number(x) || (x) == '-' )
// First character in strings can be [a-z][A-Z] or '_'
#define first_string(x) \
( ((x) >= 'a' && (x) <= 'z') \
|| ((x) >= 'A' && (x) <= 'Z') \
|| (x) == '_' )
// All characters after can also include numbers or ':'
#define string(x) \
(first_string(x) || number(x))
#define space(x) ((x) == ' ' || (x) == '\t' || (x) == '\r')
struct opcode_ent {
char * name;
uint8_t code;
};
static const struct opcode_ent opcode_table[] = {
{ "noop", TOKEN_OPCODE_NOOP },
{ "add" , TOKEN_OPCODE_ADD },
{ "movl", TOKEN_OPCODE_MOVL },
{ "movh", TOKEN_OPCODE_MOVH },
{ "ld" , TOKEN_OPCODE_LD },
{ "sw" , TOKEN_OPCODE_SW },
{ "beq" , TOKEN_OPCODE_BEQ },
{ "jmp" , TOKEN_OPCODE_JMP },
{ "jr" , TOKEN_OPCODE_JR },
{ "int" , TOKEN_OPCODE_INT },
{ NULL , 0 },
};
/**
* Helper functions
*/
static int read_next(struct lexer *lex) {
int c, comment = 0;
while((c = fgetc(lex->fp)) != EOF) {
if (c == '\n')
break;
if (comment)
continue;
if (c == ';') {
comment = 1;
} else if (!space(c)) {
break;
}
}
return c;
}
static int read_hex(FILE *fp, int *out) {
int c, val = 0;
while((c = fgetc(fp)) != EOF) {
char n = 0;
if (number(c)) {
n = c - '0';
}
else if ( (c >= 'a' && c <= 'f')
|| (c >= 'A' && c <= 'F')) {
n = (c % 0x20) + 9;
}
else {
ungetc(c, fp);
break;
}
val = (val * 16) + n;
if (val > 0xFF)
goto overflow;
}
*out = val;
return 0;
overflow:
*out = 0xFF;
return -1;
}
static int read_dec(FILE *fp, int neg, int *out) {
int c, val = 0;
while((c = fgetc(fp)) != EOF) {
if (!number(c)) {
ungetc(c, fp);
break;
}
val = (val * 10) + (c - '0');
// Cool trick here.
// because the range is -128 (0x80) to +127 (0x7F)
// We can do 0x80 - 1 if it is NOT a negative number.
if (val > (0x80 - !neg))
goto overflow;
}
*out = neg ? -1 * val : val;
return 0;
overflow:
*out = neg ? -1 * 0x80 : 0x7F;
return -1;
}
static int read_number(FILE *fp, int *out) {
int neg = 0, c = fgetc(fp);
// Check for '0x'.
if (c == '0') {
c = fgetc(fp);
if (c == 'x') {
// We have a hexadecimal number.
return read_hex(fp, out);
}
ungetc(c, fp);
ungetc('0', fp);
}
// While we are at it. check for a negative sign.
else if (c == '-') {
neg = 1;
}
// We got something else. put it back.
else {
ungetc(c, fp);
}
return read_dec(fp, neg, out);
}
static int parse_number(struct lexer *lex) {
int num;
if (read_number(lex->fp, &num) < 0)
asm_warn(lex->lineno, "Value truncated to %i", num);
lex->token.value.n = num;
return 0;
}
static int read_string(FILE *fp, char *buf, size_t len) {
int c, label_decl = 0, i = 0;
while((c = fgetc(fp)) != EOF && i < len) {
if (string(c)) {
buf[i++] = c;
} else {
if (c == ':') {
label_decl = 1;
} else {
ungetc(c, fp);
}
break;
}
}
buf[i] = '\0';
if (label_decl)
return TOKEN_LABEL_DECL;
for(i = 0; opcode_table[i].name; i++) {
if (!strcmp(opcode_table[i].name, buf))
return opcode_table[i].code;
}
return TOKEN_LABEL;
}
/**
* Exposed functions
*/
void lexer_init(struct lexer *lex, FILE *fp) {
lex->lineno = 1;
lex->fp = fp;
lex->token.type = TOKEN_EOI;
}
int lexer_get_next(struct lexer *lex) {
uint16_t num;
int ch = read_next(lex);
if (lex->token.type == TOKEN_EOL)
lex->lineno++;
switch(ch) {
case EOF : lex->token.type = TOKEN_EOI;
break;
case '\n' :
lex->token.type = TOKEN_EOL;
break;
case ',' : lex->token.type = TOKEN_ARG_SEP;
break;
case '$' :
lex->token.type = TOKEN_REG;
if (parse_number(lex) < 0)
return -1;
break;
default:
if (first_number(ch)) {
ungetc(ch, lex->fp);
lex->token.type = TOKEN_NUMBER;
if (parse_number(lex) < 0)
return -1;
} else if (first_string(ch)) {
char buf[32];
ungetc(ch, lex->fp);
lex->token.type = read_string(lex->fp, buf, sizeof(buf));
if (lex->token.type == TOKEN_LABEL_DECL || lex->token.type == TOKEN_LABEL)
strcpy(lex->token.value.s, buf);
} else {
return asm_error(lex->lineno, "Invalid character '%c'", ch);
}
}
lex->token.lineno = lex->lineno;
return 0;
}
void lexer_print_token(struct token *token) {
static int lineno = 0;
if (token->lineno != lineno) {
lineno = token->lineno;
printf("\n%i: ", lineno);
}
switch(token->type) {
case TOKEN_OPCODE_NOOP : printf(" [OP NOOP] ");
break;
case TOKEN_OPCODE_ADD : printf(" [OP ADD] ");
break;
case TOKEN_OPCODE_MOVL : printf(" [OP MOVL] ");
break;
case TOKEN_OPCODE_MOVH : printf(" [OP MOVH] ");
break;
case TOKEN_OPCODE_LD : printf(" [OP LD] ");
break;
case TOKEN_OPCODE_SW : printf(" [OP SW] ");
break;
case TOKEN_OPCODE_BEQ : printf(" [OP BEQ] ");
break;
case TOKEN_OPCODE_JMP : printf(" [OP JMP] ");
break;
case TOKEN_OPCODE_JR : printf(" [OP JR] ");
break;
case TOKEN_OPCODE_INT : printf(" [OP INT] ");
break;
case TOKEN_LABEL : printf(" [LABEL \"%s\"] ", token->value.s);
break;
case TOKEN_LABEL_DECL : printf(" [LABEL DECL \"%s\"] ", token->value.s);
break;
case TOKEN_REG : printf(" [REG %i] ", token->value.n);
break;
case TOKEN_ARG_SEP : printf(" [SEP] ");
break;
case TOKEN_NUMBER : printf(" [NUM %i] ", token->value.n);
break;
case TOKEN_EOI : printf(" [EOI] ");
break;
case TOKEN_EOL : printf(" [EOL] ");
break;
default: printf(" [U] ");
}
}