diff options
-rw-r--r-- | Makefile | 51 | ||||
-rw-r--r-- | ast.c | 251 | ||||
-rw-r--r-- | ast.h | 43 | ||||
-rw-r--r-- | driver.c | 30 | ||||
-rw-r--r-- | example.imp | 1 | ||||
-rw-r--r-- | lexer.l | 47 | ||||
-rw-r--r-- | parser.y | 103 |
7 files changed, 526 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..33bf25d --- /dev/null +++ b/Makefile @@ -0,0 +1,51 @@ +CC := gcc +CFLAGS := -Wall -g -I. +BISON := bison +FLEX := flex + +BUILD_DIR := build + +PARSER_Y := parser.y +LEXER_L := lexer.l +DRIVER_C := driver.c +AST_C := ast.c +AST_H := ast.h + +PARSER_C := $(BUILD_DIR)/parser.tab.c +PARSER_H := $(BUILD_DIR)/parser.tab.h +LEXER_C := $(BUILD_DIR)/lex.yy.c +OBJS := $(BUILD_DIR)/parser.tab.o $(BUILD_DIR)/lex.yy.o $(BUILD_DIR)/driver.o $(BUILD_DIR)/ast.o + +TARGET := $(BUILD_DIR)/imp + +.PHONY: all clean + +all: $(TARGET) + +$(TARGET): $(OBJS) + $(CC) $(CFLAGS) -o $@ $^ + +$(BUILD_DIR): + mkdir -p $(BUILD_DIR) + +$(PARSER_C) $(PARSER_H): $(PARSER_Y) | $(BUILD_DIR) + $(BISON) -d --defines=$(PARSER_H) -o $(PARSER_C) $(PARSER_Y) + +$(LEXER_C): $(LEXER_L) $(PARSER_H) | $(BUILD_DIR) + $(FLEX) -o $@ $< + +$(BUILD_DIR)/imp_parser.tab.o: $(PARSER_C) $(PARSER_H) + $(CC) $(CFLAGS) -c $(PARSER_C) -o $@ + +$(BUILD_DIR)/lex.yy.o: $(LEXER_C) $(PARSER_H) + $(CC) $(CFLAGS) -c $(LEXER_C) -o $@ + +$(BUILD_DIR)/driver.o: $(DRIVER_C) $(PARSER_H) $(AST_H) | $(BUILD_DIR) + $(CC) $(CFLAGS) -c $(DRIVER_C) -o $@ + +$(BUILD_DIR)/ast.o: $(AST_C) $(AST_H) | $(BUILD_DIR) + $(CC) $(CFLAGS) -c $(AST_C) -o $@ + +clean: + rm -rf $(BUILD_DIR) + @@ -0,0 +1,251 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "ast.h" + +static ASTNode *new_node(NodeType type) { + ASTNode *n = malloc(sizeof(ASTNode)); + if (!n) { + fprintf(stderr, "Out of memory\n"); + exit(EXIT_FAILURE); + } + n->type = type; + return n; +} + +ASTNode *ast_skip(void) { + return new_node(NT_SKIP); +} + +ASTNode *ast_assign(ASTNode *var, ASTNode *aexp) { + ASTNode *n = new_node(NT_ASSIGN); + n->u.assign.var = var; + n->u.assign.aexp = aexp; + return n; +} + +ASTNode *ast_seq(ASTNode *s1, ASTNode *s2) { + ASTNode *n = new_node(NT_SEQ); + n->u.seq.s1 = s1; + n->u.seq.s2 = s2; + return n; +} + +ASTNode *ast_if(ASTNode *bexp, ASTNode *s1, ASTNode *s2) { + ASTNode *n = new_node(NT_IF); + n->u.cond.bexp = bexp; + n->u.cond.s1 = s1; + n->u.cond.s2 = s2; + return n; +} + +ASTNode *ast_while(ASTNode *bexp, ASTNode *s) { + ASTNode *n = new_node(NT_WHILE); + n->u.cond.bexp = bexp; + n->u.cond.s1 = s; + return n; +} + +ASTNode *ast_int(int val) { + ASTNode *n = new_node(NT_INT); + n->u.val = val; + return n; +} + +ASTNode *ast_var(char *var) { + ASTNode *n = new_node(NT_VAR); + n->u.var = strdup(var); + return n; +} + +ASTNode *ast_aop(AOp op, ASTNode *aexp1, ASTNode *aexp2) { + ASTNode *n = new_node(NT_AOP); + n->u.bin.exp1 = aexp1; + n->u.bin.exp2 = aexp2; + n->u.bin.op.aop = op; + return n; +} + +ASTNode *ast_bop(BOp op, ASTNode *bexp1, ASTNode *bexp2) { + ASTNode *n = new_node(NT_BOP); + n->u.bin.exp1 = bexp1; + n->u.bin.exp2 = bexp2; + n->u.bin.op.bop = op; + return n; +} + +ASTNode *ast_not(ASTNode *bexp) { + ASTNode *n = new_node(NT_NOT); + n->u.bexp = bexp; + return n; +} + +ASTNode *ast_rop(ROp op, ASTNode *aexp1, ASTNode *aexp2) { + ASTNode *n = new_node(NT_ROP); + n->u.bin.exp1 = aexp1; + n->u.bin.exp2 = aexp2; + n->u.bin.op.rop = op; + return n; +} + +void free_ast(ASTNode *n) { + if (!n) return; + switch (n->type) { + case NT_SKIP: + break; + case NT_ASSIGN: + free_ast(n->u.assign.var); + free_ast(n->u.assign.aexp); + break; + case NT_SEQ: + free_ast(n->u.seq.s1); + free_ast(n->u.seq.s2); + break; + case NT_IF: + free_ast(n->u.cond.bexp); + free_ast(n->u.cond.s1); + free_ast(n->u.cond.s2); + break; + case NT_WHILE: + free_ast(n->u.cond.bexp); + free_ast(n->u.cond.s1); + break; + case NT_INT: + break; + case NT_VAR: + free(n->u.var); + break; + case NT_AOP: + case NT_BOP: + case NT_ROP: + free_ast(n->u.bin.exp1); + free_ast(n->u.bin.exp2); + break; + case NT_NOT: + free_ast(n->u.bexp); + break; + } + free(n); +} + +typedef struct Env { + char *var; + int val; + struct Env *next; +} Env; + +static Env *env = NULL; + +static int env_lookup(const char *var) { + for (Env *e = env; e; e = e->next) { + if (strcmp(e->var, var) == 0) + return e->val; + } + fprintf(stderr, "Undefined variable %s\n", var); + exit(EXIT_FAILURE); +} + +static void env_update(const char *var, int val) { + for (Env *e = env; e; e = e->next) { + if (strcmp(e->var, var) == 0) { + e->val = val; + return; + } + } + Env *e = malloc(sizeof(Env)); + e->var = strdup(var); + e->val = val; + e->next = env; + env = e; +} + +void env_print(void) { + Env *current_env = env; + while (current_env) { + printf("%s = %d\n", current_env->var, current_env->val); + current_env = current_env->next; + } +} + + +static int eval_aexpr(ASTNode *n) { + switch (n->type) { + case NT_INT: return n->u.val; + case NT_VAR: return env_lookup(n->u.var); + case NT_AOP: { + int aexp1 = eval_aexpr(n->u.bin.exp1); + int aexp2 = eval_aexpr(n->u.bin.exp2); + switch (n->u.bin.op.aop) { + case AOP_ADD: return aexp1 + aexp2; + case AOP_SUB: return aexp1 - aexp2; + case AOP_MUL: return aexp1 * aexp2; + } + } + default: + fprintf(stderr, "Bad aexpr node %d\n", n->type); + exit(EXIT_FAILURE); + } +} + +static int eval_bexpr(ASTNode *n) { + switch (n->type) { + case NT_BOP: { + int bexp1 = eval_bexpr(n->u.bin.exp1); + int bexp2 = eval_bexpr(n->u.bin.exp2); + switch (n->u.bin.op.bop) { + case BOP_AND: return bexp1 && bexp2; + case BOP_OR: return bexp1 || bexp2; + } + } + case NT_NOT: + return !eval_bexpr(n->u.bexp); + case NT_ROP: { + int aexp1 = eval_aexpr(n->u.bin.exp1); + int aexp2 = eval_aexpr(n->u.bin.exp2); + switch (n->u.bin.op.rop) { + case ROP_EQ: return aexp1 == aexp2; + case ROP_NE: return aexp1 != aexp2; + case ROP_LT: return aexp1 < aexp2; + case ROP_LE: return aexp1 <= aexp2; + case ROP_GT: return aexp1 > aexp2; + case ROP_GE: return aexp1 >= aexp2; + } + } + default: + fprintf(stderr, "Bad bexpr node %d\n", n->type); + exit(EXIT_FAILURE); + } +} + +void exec_stmt(ASTNode *n) { + while (n) { + switch (n->type) { + case NT_SKIP: + return; + case NT_ASSIGN: { + char *var = n->u.assign.var->u.var; + int val = eval_aexpr(n->u.assign.aexp); + env_update(var, val); + return; + } + case NT_SEQ: + exec_stmt(n->u.seq.s1); + n = n->u.seq.s2; + continue; + case NT_IF: + if (eval_bexpr(n->u.cond.bexp)) + exec_stmt(n->u.cond.s1); + else + exec_stmt(n->u.cond.s2); + return; + case NT_WHILE: + while (eval_bexpr(n->u.cond.bexp)) { + exec_stmt(n->u.cond.s1); + } + return; + default: + fprintf(stderr, "Bad stmt node %d\n", n->type); + exit(EXIT_FAILURE); + } + } +} @@ -0,0 +1,43 @@ +#ifndef AST_H +#define AST_H + +typedef enum { + NT_SKIP, NT_ASSIGN, NT_SEQ, NT_IF, NT_WHILE, + NT_INT, NT_VAR, NT_AOP, NT_BOP, NT_NOT, NT_ROP +} NodeType; + +typedef enum { AOP_ADD, AOP_SUB, AOP_MUL } AOp; +typedef enum { BOP_AND, BOP_OR } BOp; +typedef enum { ROP_EQ, ROP_NE, ROP_LT, ROP_LE, ROP_GT, ROP_GE } ROp; + +typedef struct ASTNode { + NodeType type; + union { + int val; + char *var; + struct { struct ASTNode *var, *aexp; } assign; + struct { struct ASTNode *exp1, *exp2; union { AOp aop; BOp bop; ROp rop; } op;} bin; + struct ASTNode *bexp; + struct { struct ASTNode *bexp, *s1, *s2; } cond; + struct { struct ASTNode *s1, *s2; } seq; + } u; +} ASTNode; + +ASTNode *ast_skip(void); +ASTNode *ast_assign(ASTNode *var, ASTNode *aexp); +ASTNode *ast_seq(ASTNode *s1, ASTNode *s2); +ASTNode *ast_if(ASTNode *bexp, ASTNode *s1, ASTNode *s2); +ASTNode *ast_while(ASTNode *bexp, ASTNode *s); +ASTNode *ast_int(int val); +ASTNode *ast_var(char *var); +ASTNode *ast_aop(AOp, ASTNode *aexp1, ASTNode *aexp2); +ASTNode *ast_bop(BOp, ASTNode *bexp1, ASTNode *bexp2); +ASTNode *ast_not(ASTNode *bexp); +ASTNode *ast_rop(ROp, ASTNode *aexp1, ASTNode *aexp2); + +void exec_stmt(ASTNode *n); +void free_ast(ASTNode *n); +void env_print(void); + +#endif + diff --git a/driver.c b/driver.c new file mode 100644 index 0000000..d89c412 --- /dev/null +++ b/driver.c @@ -0,0 +1,30 @@ +#include <stdio.h> +#include <stdlib.h> +#include "ast.h" + +extern FILE *yyin; +extern int yyparse(void); +extern ASTNode *ast_root; + +int main(int argc, char **argv) { + if (argc > 1) { + yyin = fopen(argv[1], "r"); + if (!yyin) { + perror(argv[1]); + return EXIT_FAILURE; + } + } else { + yyin = stdin; + } + + if (yyparse() != 0) { + fprintf(stderr, "Parsing failed.\n"); + return EXIT_FAILURE; + } + + exec_stmt(ast_root); + env_print(); + free_ast(ast_root); + + return EXIT_SUCCESS; +} diff --git a/example.imp b/example.imp new file mode 100644 index 0000000..d87c891 --- /dev/null +++ b/example.imp @@ -0,0 +1 @@ +(x := 1; while x < 10 do x := (x * 2) end) @@ -0,0 +1,47 @@ +%option noyywrap yylineno + +%{ +#include "parser.tab.h" +%} + +DIGIT [0-9] +IDENT [A-Za-z][A-Za-z0-9]* +WS [ \t\r\n]+ + +%% + +"skip" { return TOKEN_SKIP; } +"if" { return TOKEN_IF; } +"then" { return TOKEN_THEN; } +"else" { return TOKEN_ELSE; } +"end" { return TOKEN_END; } +"while" { return TOKEN_WHILE; } +"do" { return TOKEN_DO; } + +"(" { return TOKEN_LEFT_PARENTHESIS; } +")" { return TOKEN_RIGHT_PARENTHESIS; } +";" { return TOKEN_SEMICOLON; } +":=" { return TOKEN_ASSIGN; } + +"+" { return TOKEN_PLUS; } +"-" { return TOKEN_MINUS; } +"*" { return TOKEN_MULTIPLY; } + +"or" { return TOKEN_OR; } +"and" { return TOKEN_AND; } +"not" { return TOKEN_NOT; } + +"=" { return TOKEN_EQUALS; } +"#" { return TOKEN_NOT_EQUALS; } +"<=" { return TOKEN_LESS_EQUAL; } +"<" { return TOKEN_LESS_THAN; } +">=" { return TOKEN_GREATER_EQUAL; } +">" { return TOKEN_GREATER_THAN; } + +{DIGIT}+ { yylval.num = atoi(yytext); return TOKEN_NUMERAL; } +{IDENT} { yylval.id = strdup(yytext); return TOKEN_IDENTIFIER; } + +{WS} { } +. { fprintf(stderr, "Unknown char: %s\n", yytext); } + +%% diff --git a/parser.y b/parser.y new file mode 100644 index 0000000..5974b96 --- /dev/null +++ b/parser.y @@ -0,0 +1,103 @@ +%{ +#include <stdio.h> +#include "ast.h" + +extern char *yytext; +extern int yylineno; +extern int yylex(); +void yyerror(const char *s) { fprintf(stderr, "Parse error at token \"%s\", line %d: %s\n", yytext, yylineno, s); } +ASTNode *ast_root; +%} + + +%union { + int num; + char *id; + int op; + struct ASTNode *node; +} + +%start program + +%token <id> TOKEN_IDENTIFIER +%token <num> TOKEN_NUMERAL +%token TOKEN_ASSIGN +%token TOKEN_LEFT_PARENTHESIS TOKEN_RIGHT_PARENTHESIS +%token TOKEN_SEMICOLON +%token TOKEN_SKIP +%token TOKEN_IF TOKEN_THEN TOKEN_ELSE TOKEN_END TOKEN_WHILE TOKEN_DO +%token TOKEN_PLUS TOKEN_MINUS TOKEN_MULTIPLY +%token TOKEN_NOT TOKEN_OR TOKEN_AND +%token TOKEN_EQUALS TOKEN_NOT_EQUALS TOKEN_LESS_THAN TOKEN_LESS_EQUAL TOKEN_GREATER_THAN TOKEN_GREATER_EQUAL + +%type <node> program statement variable arithmetic_expression boolean_expression +%type <op> arithmetic_operation boolean_operation relational_operation + + +%% + +program : statement + { ast_root = $1; } + ; + +statement : TOKEN_SKIP + { $$ = ast_skip(); } + | variable TOKEN_ASSIGN arithmetic_expression + { $$ = ast_assign($1, $3); } + | TOKEN_LEFT_PARENTHESIS statement TOKEN_SEMICOLON statement TOKEN_RIGHT_PARENTHESIS + { $$ = ast_seq($2, $4); } + | TOKEN_IF boolean_expression TOKEN_THEN statement TOKEN_ELSE statement TOKEN_END + { $$ = ast_if($2, $4, $6); } + | TOKEN_WHILE boolean_expression TOKEN_DO statement TOKEN_END + { $$ = ast_while($2, $4); } + ; + +variable : TOKEN_IDENTIFIER + { $$ = ast_var($1); } + ; + +arithmetic_expression : TOKEN_LEFT_PARENTHESIS arithmetic_expression arithmetic_operation arithmetic_expression TOKEN_RIGHT_PARENTHESIS + { $$ = ast_aop($3, $2, $4); } + | variable + { $$ = $1; } + | TOKEN_NUMERAL + { $$ = ast_int($1); } + ; + +arithmetic_operation : TOKEN_PLUS + { $$ = AOP_ADD; } + | TOKEN_MINUS + { $$ = AOP_SUB; } + | TOKEN_MULTIPLY + { $$ = AOP_MUL; } + ; + +boolean_expression : TOKEN_LEFT_PARENTHESIS boolean_expression boolean_operation boolean_expression TOKEN_RIGHT_PARENTHESIS + { $$ = ast_bop($3, $2, $4); } + | TOKEN_NOT boolean_expression + { $$ = ast_not($2); } + | arithmetic_expression relational_operation arithmetic_expression + { $$ = ast_rop($2, $1, $3); } + ; + +boolean_operation : TOKEN_OR + { $$ = BOP_OR; } + | TOKEN_AND + { $$ = BOP_AND; } + ; + +relational_operation : TOKEN_EQUALS + { $$ = ROP_EQ; } + | TOKEN_NOT_EQUALS + { $$ = ROP_NE; } + | TOKEN_LESS_THAN + { $$ = ROP_LT; } + | TOKEN_LESS_EQUAL + { $$ = ROP_LE; } + | TOKEN_GREATER_THAN + { $$ = ROP_GT; } + | TOKEN_GREATER_EQUAL + { $$ = ROP_GE; } + ; + +%% |