aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlavian Kaufmann <flavian@flaviankaufmann.ch>2025-05-20 07:10:19 +0200
committerFlavian Kaufmann <flavian@flaviankaufmann.ch>2025-05-20 07:10:19 +0200
commit98be4ba8e6050f33fd47786de066b165f74a115d (patch)
treefc33633698c1941a51241ba06eff4a135572e161
downloadimp-98be4ba8e6050f33fd47786de066b165f74a115d.tar.gz
imp-98be4ba8e6050f33fd47786de066b165f74a115d.zip
initial
-rw-r--r--Makefile51
-rw-r--r--ast.c251
-rw-r--r--ast.h43
-rw-r--r--driver.c30
-rw-r--r--example.imp1
-rw-r--r--lexer.l47
-rw-r--r--parser.y103
7 files changed, 526 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..33bf25d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,51 @@
+CC := gcc
+CFLAGS := -Wall -g -I.
+BISON := bison
+FLEX := flex
+
+BUILD_DIR := build
+
+PARSER_Y := parser.y
+LEXER_L := lexer.l
+DRIVER_C := driver.c
+AST_C := ast.c
+AST_H := ast.h
+
+PARSER_C := $(BUILD_DIR)/parser.tab.c
+PARSER_H := $(BUILD_DIR)/parser.tab.h
+LEXER_C := $(BUILD_DIR)/lex.yy.c
+OBJS := $(BUILD_DIR)/parser.tab.o $(BUILD_DIR)/lex.yy.o $(BUILD_DIR)/driver.o $(BUILD_DIR)/ast.o
+
+TARGET := $(BUILD_DIR)/imp
+
+.PHONY: all clean
+
+all: $(TARGET)
+
+$(TARGET): $(OBJS)
+ $(CC) $(CFLAGS) -o $@ $^
+
+$(BUILD_DIR):
+ mkdir -p $(BUILD_DIR)
+
+$(PARSER_C) $(PARSER_H): $(PARSER_Y) | $(BUILD_DIR)
+ $(BISON) -d --defines=$(PARSER_H) -o $(PARSER_C) $(PARSER_Y)
+
+$(LEXER_C): $(LEXER_L) $(PARSER_H) | $(BUILD_DIR)
+ $(FLEX) -o $@ $<
+
+$(BUILD_DIR)/imp_parser.tab.o: $(PARSER_C) $(PARSER_H)
+ $(CC) $(CFLAGS) -c $(PARSER_C) -o $@
+
+$(BUILD_DIR)/lex.yy.o: $(LEXER_C) $(PARSER_H)
+ $(CC) $(CFLAGS) -c $(LEXER_C) -o $@
+
+$(BUILD_DIR)/driver.o: $(DRIVER_C) $(PARSER_H) $(AST_H) | $(BUILD_DIR)
+ $(CC) $(CFLAGS) -c $(DRIVER_C) -o $@
+
+$(BUILD_DIR)/ast.o: $(AST_C) $(AST_H) | $(BUILD_DIR)
+ $(CC) $(CFLAGS) -c $(AST_C) -o $@
+
+clean:
+ rm -rf $(BUILD_DIR)
+
diff --git a/ast.c b/ast.c
new file mode 100644
index 0000000..ab7e1e0
--- /dev/null
+++ b/ast.c
@@ -0,0 +1,251 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "ast.h"
+
+static ASTNode *new_node(NodeType type) {
+ ASTNode *n = malloc(sizeof(ASTNode));
+ if (!n) {
+ fprintf(stderr, "Out of memory\n");
+ exit(EXIT_FAILURE);
+ }
+ n->type = type;
+ return n;
+}
+
+ASTNode *ast_skip(void) {
+ return new_node(NT_SKIP);
+}
+
+ASTNode *ast_assign(ASTNode *var, ASTNode *aexp) {
+ ASTNode *n = new_node(NT_ASSIGN);
+ n->u.assign.var = var;
+ n->u.assign.aexp = aexp;
+ return n;
+}
+
+ASTNode *ast_seq(ASTNode *s1, ASTNode *s2) {
+ ASTNode *n = new_node(NT_SEQ);
+ n->u.seq.s1 = s1;
+ n->u.seq.s2 = s2;
+ return n;
+}
+
+ASTNode *ast_if(ASTNode *bexp, ASTNode *s1, ASTNode *s2) {
+ ASTNode *n = new_node(NT_IF);
+ n->u.cond.bexp = bexp;
+ n->u.cond.s1 = s1;
+ n->u.cond.s2 = s2;
+ return n;
+}
+
+ASTNode *ast_while(ASTNode *bexp, ASTNode *s) {
+ ASTNode *n = new_node(NT_WHILE);
+ n->u.cond.bexp = bexp;
+ n->u.cond.s1 = s;
+ return n;
+}
+
+ASTNode *ast_int(int val) {
+ ASTNode *n = new_node(NT_INT);
+ n->u.val = val;
+ return n;
+}
+
+ASTNode *ast_var(char *var) {
+ ASTNode *n = new_node(NT_VAR);
+ n->u.var = strdup(var);
+ return n;
+}
+
+ASTNode *ast_aop(AOp op, ASTNode *aexp1, ASTNode *aexp2) {
+ ASTNode *n = new_node(NT_AOP);
+ n->u.bin.exp1 = aexp1;
+ n->u.bin.exp2 = aexp2;
+ n->u.bin.op.aop = op;
+ return n;
+}
+
+ASTNode *ast_bop(BOp op, ASTNode *bexp1, ASTNode *bexp2) {
+ ASTNode *n = new_node(NT_BOP);
+ n->u.bin.exp1 = bexp1;
+ n->u.bin.exp2 = bexp2;
+ n->u.bin.op.bop = op;
+ return n;
+}
+
+ASTNode *ast_not(ASTNode *bexp) {
+ ASTNode *n = new_node(NT_NOT);
+ n->u.bexp = bexp;
+ return n;
+}
+
+ASTNode *ast_rop(ROp op, ASTNode *aexp1, ASTNode *aexp2) {
+ ASTNode *n = new_node(NT_ROP);
+ n->u.bin.exp1 = aexp1;
+ n->u.bin.exp2 = aexp2;
+ n->u.bin.op.rop = op;
+ return n;
+}
+
+void free_ast(ASTNode *n) {
+ if (!n) return;
+ switch (n->type) {
+ case NT_SKIP:
+ break;
+ case NT_ASSIGN:
+ free_ast(n->u.assign.var);
+ free_ast(n->u.assign.aexp);
+ break;
+ case NT_SEQ:
+ free_ast(n->u.seq.s1);
+ free_ast(n->u.seq.s2);
+ break;
+ case NT_IF:
+ free_ast(n->u.cond.bexp);
+ free_ast(n->u.cond.s1);
+ free_ast(n->u.cond.s2);
+ break;
+ case NT_WHILE:
+ free_ast(n->u.cond.bexp);
+ free_ast(n->u.cond.s1);
+ break;
+ case NT_INT:
+ break;
+ case NT_VAR:
+ free(n->u.var);
+ break;
+ case NT_AOP:
+ case NT_BOP:
+ case NT_ROP:
+ free_ast(n->u.bin.exp1);
+ free_ast(n->u.bin.exp2);
+ break;
+ case NT_NOT:
+ free_ast(n->u.bexp);
+ break;
+ }
+ free(n);
+}
+
+typedef struct Env {
+ char *var;
+ int val;
+ struct Env *next;
+} Env;
+
+static Env *env = NULL;
+
+static int env_lookup(const char *var) {
+ for (Env *e = env; e; e = e->next) {
+ if (strcmp(e->var, var) == 0)
+ return e->val;
+ }
+ fprintf(stderr, "Undefined variable %s\n", var);
+ exit(EXIT_FAILURE);
+}
+
+static void env_update(const char *var, int val) {
+ for (Env *e = env; e; e = e->next) {
+ if (strcmp(e->var, var) == 0) {
+ e->val = val;
+ return;
+ }
+ }
+ Env *e = malloc(sizeof(Env));
+ e->var = strdup(var);
+ e->val = val;
+ e->next = env;
+ env = e;
+}
+
+void env_print(void) {
+ Env *current_env = env;
+ while (current_env) {
+ printf("%s = %d\n", current_env->var, current_env->val);
+ current_env = current_env->next;
+ }
+}
+
+
+static int eval_aexpr(ASTNode *n) {
+ switch (n->type) {
+ case NT_INT: return n->u.val;
+ case NT_VAR: return env_lookup(n->u.var);
+ case NT_AOP: {
+ int aexp1 = eval_aexpr(n->u.bin.exp1);
+ int aexp2 = eval_aexpr(n->u.bin.exp2);
+ switch (n->u.bin.op.aop) {
+ case AOP_ADD: return aexp1 + aexp2;
+ case AOP_SUB: return aexp1 - aexp2;
+ case AOP_MUL: return aexp1 * aexp2;
+ }
+ }
+ default:
+ fprintf(stderr, "Bad aexpr node %d\n", n->type);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static int eval_bexpr(ASTNode *n) {
+ switch (n->type) {
+ case NT_BOP: {
+ int bexp1 = eval_bexpr(n->u.bin.exp1);
+ int bexp2 = eval_bexpr(n->u.bin.exp2);
+ switch (n->u.bin.op.bop) {
+ case BOP_AND: return bexp1 && bexp2;
+ case BOP_OR: return bexp1 || bexp2;
+ }
+ }
+ case NT_NOT:
+ return !eval_bexpr(n->u.bexp);
+ case NT_ROP: {
+ int aexp1 = eval_aexpr(n->u.bin.exp1);
+ int aexp2 = eval_aexpr(n->u.bin.exp2);
+ switch (n->u.bin.op.rop) {
+ case ROP_EQ: return aexp1 == aexp2;
+ case ROP_NE: return aexp1 != aexp2;
+ case ROP_LT: return aexp1 < aexp2;
+ case ROP_LE: return aexp1 <= aexp2;
+ case ROP_GT: return aexp1 > aexp2;
+ case ROP_GE: return aexp1 >= aexp2;
+ }
+ }
+ default:
+ fprintf(stderr, "Bad bexpr node %d\n", n->type);
+ exit(EXIT_FAILURE);
+ }
+}
+
+void exec_stmt(ASTNode *n) {
+ while (n) {
+ switch (n->type) {
+ case NT_SKIP:
+ return;
+ case NT_ASSIGN: {
+ char *var = n->u.assign.var->u.var;
+ int val = eval_aexpr(n->u.assign.aexp);
+ env_update(var, val);
+ return;
+ }
+ case NT_SEQ:
+ exec_stmt(n->u.seq.s1);
+ n = n->u.seq.s2;
+ continue;
+ case NT_IF:
+ if (eval_bexpr(n->u.cond.bexp))
+ exec_stmt(n->u.cond.s1);
+ else
+ exec_stmt(n->u.cond.s2);
+ return;
+ case NT_WHILE:
+ while (eval_bexpr(n->u.cond.bexp)) {
+ exec_stmt(n->u.cond.s1);
+ }
+ return;
+ default:
+ fprintf(stderr, "Bad stmt node %d\n", n->type);
+ exit(EXIT_FAILURE);
+ }
+ }
+}
diff --git a/ast.h b/ast.h
new file mode 100644
index 0000000..67d6228
--- /dev/null
+++ b/ast.h
@@ -0,0 +1,43 @@
+#ifndef AST_H
+#define AST_H
+
+typedef enum {
+ NT_SKIP, NT_ASSIGN, NT_SEQ, NT_IF, NT_WHILE,
+ NT_INT, NT_VAR, NT_AOP, NT_BOP, NT_NOT, NT_ROP
+} NodeType;
+
+typedef enum { AOP_ADD, AOP_SUB, AOP_MUL } AOp;
+typedef enum { BOP_AND, BOP_OR } BOp;
+typedef enum { ROP_EQ, ROP_NE, ROP_LT, ROP_LE, ROP_GT, ROP_GE } ROp;
+
+typedef struct ASTNode {
+ NodeType type;
+ union {
+ int val;
+ char *var;
+ struct { struct ASTNode *var, *aexp; } assign;
+ struct { struct ASTNode *exp1, *exp2; union { AOp aop; BOp bop; ROp rop; } op;} bin;
+ struct ASTNode *bexp;
+ struct { struct ASTNode *bexp, *s1, *s2; } cond;
+ struct { struct ASTNode *s1, *s2; } seq;
+ } u;
+} ASTNode;
+
+ASTNode *ast_skip(void);
+ASTNode *ast_assign(ASTNode *var, ASTNode *aexp);
+ASTNode *ast_seq(ASTNode *s1, ASTNode *s2);
+ASTNode *ast_if(ASTNode *bexp, ASTNode *s1, ASTNode *s2);
+ASTNode *ast_while(ASTNode *bexp, ASTNode *s);
+ASTNode *ast_int(int val);
+ASTNode *ast_var(char *var);
+ASTNode *ast_aop(AOp, ASTNode *aexp1, ASTNode *aexp2);
+ASTNode *ast_bop(BOp, ASTNode *bexp1, ASTNode *bexp2);
+ASTNode *ast_not(ASTNode *bexp);
+ASTNode *ast_rop(ROp, ASTNode *aexp1, ASTNode *aexp2);
+
+void exec_stmt(ASTNode *n);
+void free_ast(ASTNode *n);
+void env_print(void);
+
+#endif
+
diff --git a/driver.c b/driver.c
new file mode 100644
index 0000000..d89c412
--- /dev/null
+++ b/driver.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "ast.h"
+
+extern FILE *yyin;
+extern int yyparse(void);
+extern ASTNode *ast_root;
+
+int main(int argc, char **argv) {
+ if (argc > 1) {
+ yyin = fopen(argv[1], "r");
+ if (!yyin) {
+ perror(argv[1]);
+ return EXIT_FAILURE;
+ }
+ } else {
+ yyin = stdin;
+ }
+
+ if (yyparse() != 0) {
+ fprintf(stderr, "Parsing failed.\n");
+ return EXIT_FAILURE;
+ }
+
+ exec_stmt(ast_root);
+ env_print();
+ free_ast(ast_root);
+
+ return EXIT_SUCCESS;
+}
diff --git a/example.imp b/example.imp
new file mode 100644
index 0000000..d87c891
--- /dev/null
+++ b/example.imp
@@ -0,0 +1 @@
+(x := 1; while x < 10 do x := (x * 2) end)
diff --git a/lexer.l b/lexer.l
new file mode 100644
index 0000000..4895b24
--- /dev/null
+++ b/lexer.l
@@ -0,0 +1,47 @@
+%option noyywrap yylineno
+
+%{
+#include "parser.tab.h"
+%}
+
+DIGIT [0-9]
+IDENT [A-Za-z][A-Za-z0-9]*
+WS [ \t\r\n]+
+
+%%
+
+"skip" { return TOKEN_SKIP; }
+"if" { return TOKEN_IF; }
+"then" { return TOKEN_THEN; }
+"else" { return TOKEN_ELSE; }
+"end" { return TOKEN_END; }
+"while" { return TOKEN_WHILE; }
+"do" { return TOKEN_DO; }
+
+"(" { return TOKEN_LEFT_PARENTHESIS; }
+")" { return TOKEN_RIGHT_PARENTHESIS; }
+";" { return TOKEN_SEMICOLON; }
+":=" { return TOKEN_ASSIGN; }
+
+"+" { return TOKEN_PLUS; }
+"-" { return TOKEN_MINUS; }
+"*" { return TOKEN_MULTIPLY; }
+
+"or" { return TOKEN_OR; }
+"and" { return TOKEN_AND; }
+"not" { return TOKEN_NOT; }
+
+"=" { return TOKEN_EQUALS; }
+"#" { return TOKEN_NOT_EQUALS; }
+"<=" { return TOKEN_LESS_EQUAL; }
+"<" { return TOKEN_LESS_THAN; }
+">=" { return TOKEN_GREATER_EQUAL; }
+">" { return TOKEN_GREATER_THAN; }
+
+{DIGIT}+ { yylval.num = atoi(yytext); return TOKEN_NUMERAL; }
+{IDENT} { yylval.id = strdup(yytext); return TOKEN_IDENTIFIER; }
+
+{WS} { }
+. { fprintf(stderr, "Unknown char: %s\n", yytext); }
+
+%%
diff --git a/parser.y b/parser.y
new file mode 100644
index 0000000..5974b96
--- /dev/null
+++ b/parser.y
@@ -0,0 +1,103 @@
+%{
+#include <stdio.h>
+#include "ast.h"
+
+extern char *yytext;
+extern int yylineno;
+extern int yylex();
+void yyerror(const char *s) { fprintf(stderr, "Parse error at token \"%s\", line %d: %s\n", yytext, yylineno, s); }
+ASTNode *ast_root;
+%}
+
+
+%union {
+ int num;
+ char *id;
+ int op;
+ struct ASTNode *node;
+}
+
+%start program
+
+%token <id> TOKEN_IDENTIFIER
+%token <num> TOKEN_NUMERAL
+%token TOKEN_ASSIGN
+%token TOKEN_LEFT_PARENTHESIS TOKEN_RIGHT_PARENTHESIS
+%token TOKEN_SEMICOLON
+%token TOKEN_SKIP
+%token TOKEN_IF TOKEN_THEN TOKEN_ELSE TOKEN_END TOKEN_WHILE TOKEN_DO
+%token TOKEN_PLUS TOKEN_MINUS TOKEN_MULTIPLY
+%token TOKEN_NOT TOKEN_OR TOKEN_AND
+%token TOKEN_EQUALS TOKEN_NOT_EQUALS TOKEN_LESS_THAN TOKEN_LESS_EQUAL TOKEN_GREATER_THAN TOKEN_GREATER_EQUAL
+
+%type <node> program statement variable arithmetic_expression boolean_expression
+%type <op> arithmetic_operation boolean_operation relational_operation
+
+
+%%
+
+program : statement
+ { ast_root = $1; }
+ ;
+
+statement : TOKEN_SKIP
+ { $$ = ast_skip(); }
+ | variable TOKEN_ASSIGN arithmetic_expression
+ { $$ = ast_assign($1, $3); }
+ | TOKEN_LEFT_PARENTHESIS statement TOKEN_SEMICOLON statement TOKEN_RIGHT_PARENTHESIS
+ { $$ = ast_seq($2, $4); }
+ | TOKEN_IF boolean_expression TOKEN_THEN statement TOKEN_ELSE statement TOKEN_END
+ { $$ = ast_if($2, $4, $6); }
+ | TOKEN_WHILE boolean_expression TOKEN_DO statement TOKEN_END
+ { $$ = ast_while($2, $4); }
+ ;
+
+variable : TOKEN_IDENTIFIER
+ { $$ = ast_var($1); }
+ ;
+
+arithmetic_expression : TOKEN_LEFT_PARENTHESIS arithmetic_expression arithmetic_operation arithmetic_expression TOKEN_RIGHT_PARENTHESIS
+ { $$ = ast_aop($3, $2, $4); }
+ | variable
+ { $$ = $1; }
+ | TOKEN_NUMERAL
+ { $$ = ast_int($1); }
+ ;
+
+arithmetic_operation : TOKEN_PLUS
+ { $$ = AOP_ADD; }
+ | TOKEN_MINUS
+ { $$ = AOP_SUB; }
+ | TOKEN_MULTIPLY
+ { $$ = AOP_MUL; }
+ ;
+
+boolean_expression : TOKEN_LEFT_PARENTHESIS boolean_expression boolean_operation boolean_expression TOKEN_RIGHT_PARENTHESIS
+ { $$ = ast_bop($3, $2, $4); }
+ | TOKEN_NOT boolean_expression
+ { $$ = ast_not($2); }
+ | arithmetic_expression relational_operation arithmetic_expression
+ { $$ = ast_rop($2, $1, $3); }
+ ;
+
+boolean_operation : TOKEN_OR
+ { $$ = BOP_OR; }
+ | TOKEN_AND
+ { $$ = BOP_AND; }
+ ;
+
+relational_operation : TOKEN_EQUALS
+ { $$ = ROP_EQ; }
+ | TOKEN_NOT_EQUALS
+ { $$ = ROP_NE; }
+ | TOKEN_LESS_THAN
+ { $$ = ROP_LT; }
+ | TOKEN_LESS_EQUAL
+ { $$ = ROP_LE; }
+ | TOKEN_GREATER_THAN
+ { $$ = ROP_GT; }
+ | TOKEN_GREATER_EQUAL
+ { $$ = ROP_GE; }
+ ;
+
+%%