mips-cc

A little C compiler
git clone git@git.mpah.dev/mips-cc.git
Log | Files | Refs | README

commit ae81e24f4065f5fdd05679e50e8d2e8f98402fcd
Author: Martin Hart <114212671+m4rtinh4rt@users.noreply.github.com>
Date:   Thu, 17 Oct 2024 22:16:56 +0200

Initial commit

Diffstat:
AMakefile | 7+++++++
AREADME.md | 1+
Aast.ml | 157+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abaselib.ml | 144+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acompiler.ml | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alexer.mll | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amips.ml | 126+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser.mly | 201+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asemantics.ml | 124+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asimplifier.ml | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest.ml | 31+++++++++++++++++++++++++++++++
Atests/0.test | 1+
Atests/1.test | 1+
Atests/10.test | 7+++++++
Atests/11.test | 10++++++++++
Atests/12.test | 11+++++++++++
Atests/13.test | 12++++++++++++
Atests/14.test | 11+++++++++++
Atests/15.test | 11+++++++++++
Atests/16.test | 15+++++++++++++++
Atests/17.test | 12++++++++++++
Atests/18.test | 10++++++++++
Atests/19.test | 13+++++++++++++
Atests/2.test | 1+
Atests/3.test | 1+
Atests/4.test | 2++
Atests/5.test | 3+++
Atests/6.test | 3+++
Atests/7.test | 3+++
Atests/8.test | 5+++++
Atests/9.test | 4++++
31 files changed, 1237 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,7 @@ +all: + @ocamlbuild -use-menhir test.byte + +clean: + @ocamlbuild -clean + +.PHONY: all clean diff --git a/README.md b/README.md @@ -0,0 +1 @@ +# mips-cc diff --git a/ast.ml b/ast.ml @@ -0,0 +1,157 @@ +type type_t = + | Int_t + | Float_t + | Char_t + | Str_t + | Void_t + | Bool_t + | Func_t of type_t * type_t list + +let rec string_of_type_t t = + match t with + | Int_t -> "int" + | Float_t -> "float" + | Str_t -> "string" + | Char_t -> "char" + | Void_t -> "void" + | Bool_t -> "bool" + | Func_t (r, a) -> + (if (List.length a) > 1 then "(" else "") + ^ (String.concat ", " (List.map string_of_type_t a)) + ^ (if (List.length a) > 1 then ")" else "") + ^ " -> " ^ (string_of_type_t r) + +module Syntax = struct + type ident = string + type expr = + | Void of { pos: Lexing.position } + | Bool of { value: bool + ; pos: Lexing.position } + | Int of { value: int + ; pos: Lexing.position } + | Float of { value: float + ; pos: Lexing.position } + | Char of { value: string + ; pos: Lexing.position } + | Str of { value: string + ; pos: Lexing.position } + | Var of { name: ident + ; pos: Lexing.position } + | Call of { func: ident + ; args: expr list + ; pos: Lexing.position } + + type instr = + | Assign of { var: ident + ; expr: expr + ; pos: Lexing.position } + | Return of { expr: expr + ; pos: Lexing.position } + | Decl of { var: ident + ; type_: type_t + ; pos: Lexing.position } + | Expr of { expr: expr + ; pos: Lexing.position } + + | Cond of { expr: expr + ; b1: block + ; b2: block + ; pos: Lexing.position } + + | Loop of { expr: expr + ; b: block + ; pos: Lexing.position } + + and block = instr list + + type def = + Func of { ftype: type_t + ; name: ident + ; args: (type_t * ident) list + ; body: block + ; pos: Lexing.position } + and prog = def list +end + +module IR = struct + type ident = string + type expr = + | Void + | Int of int + | Float of float + | Bool of bool + | Char of string + | Str of string + | Var of ident + | Call of ident * expr list + type instr = + | Assign of ident * expr + | Return of expr + | Decl of ident + | Expr of expr + | Cond of expr * block * block + | Loop of expr * block + and block = instr list + + type def = + Func of ident * ident list * block + and prog = def list + + let string_of_ir ast = + let rec fmt_e = function + | Void -> "Void" + | Bool b -> "Bool " ^ (string_of_bool b) + | Int n -> "Int " ^ (string_of_int n) + | Float n -> "Float " ^ (string_of_float n) + | Char c -> "Char " ^ c + | Str s -> "Str \"" ^ s ^ "\"" + | Var v -> "Var \"" ^ v ^ "\"" + | Call (f, a) -> "Call (\"" ^ f ^ "\", [ " + ^ (String.concat " ; " (List.map fmt_e a)) + ^ " ])" + and fmt_i = function + | Loop (e, b) -> "Loop (" ^ (fmt_e e) ^ ", " ^ (fmt_b b) ^ ")" + | Cond (e, b1, b2) -> "Cond (" ^ (fmt_e e) ^ ", " ^ (fmt_b b1) ^ ", " ^ (fmt_b b2) ^ ")" + | Decl v -> "Decl \"" ^ v ^ "\"" + | Assign (v, e) -> "Assign (\"" ^ v ^ "\", " ^ (fmt_e e) ^ ")" + | Return e -> "Return (" ^ (fmt_e e) ^ ")" + | Expr e -> "Expr (" ^ (fmt_e e) ^ ")" + + and fmt_b b = "\t[ " ^ (String.concat "\n\t; " (List.map fmt_i b)) ^ " ]" + and fmt_arg a = "\"" ^ a ^ "\"" + and fmt_d = function + | Func (name, args, b) -> "[ Func (\"" ^ name ^ "\", " ^ "[ " ^ + (String.concat "; " (List.map fmt_arg args)) ^ " ],\n" ^ + fmt_b b ^ ")" ^ "]" + and fmt_p p = "[\n " ^ (String.concat "\n " (List.map fmt_d p)) ^ "\n]" + in fmt_p ast +end + +module IR2 = struct + type ident = string + type expr = + | Nil + | Bool of bool + | Int of int + | Float of float + | Char of char + | Data of string + | Var of ident + | Call of ident * expr list +(* + type lvalue = + | LVar of ident + | LAddr of expr +*) + type instr = + | Decl of ident + | Return of expr + | Expr of expr + | Assign of expr * expr + | Cond of expr * block * block + | Loop of expr * block + and block = instr list + type def = + | Func of ident * ident list * block + type prog = def list +end diff --git a/baselib.ml b/baselib.ml @@ -0,0 +1,144 @@ +open Ast +open Mips + +module Env = Map.Make(String) + +let _types_ = + Env.of_seq + (List.to_seq + [ "_add", Func_t (Int_t, [ Int_t ; Int_t ]) + ; "_sub", Func_t (Int_t, [ Int_t ; Int_t ]) + ; "_mul", Func_t (Int_t, [ Int_t ; Int_t ]) + ; "_div", Func_t (Int_t, [ Int_t ; Int_t ]) + ; "_mod", Func_t (Int_t, [ Int_t ; Int_t ]) + ; "_puti", Func_t (Void_t, [ Int_t ]) + ; "_puts", Func_t (Void_t, [ Str_t ]) + ; "_geti", Func_t (Int_t, [ ]) + ; "_eq", Func_t (Bool_t, [ Int_t ; Int_t ]) + ; "_neq", Func_t (Bool_t, [ Int_t ; Int_t ]) + ; "_ge", Func_t (Bool_t, [ Int_t ; Int_t ]) + ; "_gt", Func_t (Bool_t, [ Int_t ; Int_t ]) + ; "_le", Func_t (Bool_t, [ Int_t ; Int_t ]) + ; "_lt", Func_t (Bool_t, [ Int_t ; Int_t ]) + ; "_xor", Func_t (Bool_t, [ Int_t ; Int_t ]) + ; "_and", Func_t (Bool_t, [ Int_t ; Int_t ]) + ; "_or", Func_t (Bool_t, [ Int_t ; Int_t ]) + ; "_not", Func_t (Bool_t, [ Int_t ]) + ; "_rev", Func_t (Int_t, [ Int_t ]) + ]) + +let builtins = + [ Label "_add" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Add (V0, T0, T1) + ; Jr RA + + ; Label "_sub" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Sub (V0, T0, T1) + ; Jr RA + + ; Label "_mul" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Mul (V0, T0, T1) + ; Jr RA + + ; Label "_div" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Div (V0, T0, T1) + ; Jr RA + + ; Label "_mod" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Div (V0, T0, T1) + ; Mfhi V0 + ; Jr RA + + ; Label "_puti" + ; Lw (A0, Mem (SP, 0)) + ; Li (V0, Syscall.print_int) + ; Syscall + ; Jr RA + + ; Label "_geti" + ; Lw (A0, Mem (SP, 0)) + ; Li (V0, Syscall.read_int) + ; Syscall + ; Jr RA + + ; Label "_puts" + ; Lw (A0, Mem (SP, 0)) + ; Li (V0, Syscall.print_str) + ; Syscall + ; Jr RA + + ; Label "_eq" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Seq (V0, T0, T1) + ; Jr RA + + ; Label "_neq" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Sne (V0, T0, T1) + ; Jr RA + + ; Label "_ge" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Sge (V0, T0, T1) + ; Jr RA + + ; Label "_gt" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Sgt (V0, T0, T1) + ; Jr RA + + ; Label "_le" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Sle (V0, T0, T1) + ; Jr RA + + ; Label "_lt" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Slt (V0, T0, T1) + ; Jr RA + + ; Label "_xor" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Xor (V0, T0, T1) + ; Jr RA + + ; Label "_or" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; Or (V0, T0, T1) + ; Jr RA + + ; Label "_and" + ; Lw (T0, Mem (SP, 0)) + ; Lw (T1, Mem (SP, 4)) + ; And (V0, T0, T1) + ; Jr RA + + ; Label "_rev" + ; Lw (T0, Mem (SP, 0)) + ; Sub (V0, T0, Zero) + ; Jr RA + + ; Label "_not" + ; Lw (T0, Mem (SP, 0)) + ; Seq (V0, T0, Zero) + ; Jr RA + + ] diff --git a/compiler.ml b/compiler.ml @@ -0,0 +1,141 @@ +open Mips +open Ast.IR2 + +module Env = Map.Make(String) + +type cinfo = { code: Mips.instr list + ; env: Mips.loc Env.t + ; fpo: int + ; counter: int + ; return: string } + +let rec compile_expr e env = + match e with +(* | Value v -> compile_value v *) + | Data l -> [ La (V0, Lbl (l)) ] + | Bool b -> [ Li (V0, if b then 1 else 0) ] + | Nil -> [ Li (V0, 0) ] + | Int n -> [ Li (V0, n) ] + | Float f -> [ Lis (F0, f) ] + | Char c -> [ Li (V0, (int_of_char c)) ] + | Var v -> [ Lw (V0, Env.find v env) ] + | Call (f, args) -> + let ca = List.map (fun a -> + compile_expr a env + @ [ Addi (SP, SP, -4) + ; Sw (V0, Mem (SP, 0)) ]) + args in + List.flatten ca + @ [ Jal f + ; Addi (SP, SP, 4 * (List.length args)) ] + +let rec compile_instr i info = + match i with + | Decl v -> + { info with + env = Env.add v (Mem (FP, -info.fpo)) info.env + ; fpo = info.fpo + 4 } + | Return e -> + { info with + code = info.code + @ compile_expr e info.env + @ [ B info.return ] } + | Expr e -> + { info with + code = info.code + @ compile_expr e info.env } + + | Assign (lv, e) -> + { info with + code = (info.code + @ compile_expr e info.env + @ + match lv with + | Var v -> [ Sw (V0, Env.find v info.env) ]) + } + +(* + TODO + | Assign (lv, e) -> { info with + code = info.code + @ compile_expr e info.env + @ (match lv with + | LVar v -> [ Sw (V0, Env.find v info.env) ] + | LAddr a -> [] + @ [ Addi (SP, SP, -4) + ; Sw (V0, Mem (SP, 0)) ] + @ compile_expr a info.env + @ [ Lw (T0, Mem (SP, 0)) + ; Addi (SP, SP, 4) + ; Sw (T0, Mem (V0, 0)) ]) } +*) + | Loop (c, t) -> + let uniq = string_of_int info.counter in + let ct = compile_block t { info with code = [] + ; counter = info.counter + 1 } in + { info with + code = info.code + @ [ Label ("startloop" ^ uniq) ] + @ compile_expr c info.env + @ [ Beqz (V0, "endloop" ^ uniq) ] + @ ct.code + @ [ B ("startloop" ^ uniq) ] + @ [ Label ("endloop" ^ uniq) ] + ; counter = ct.counter } + + | Cond (c, t, e) -> + let uniq = string_of_int info.counter in + let ct = compile_block t { info with code = [] + ; counter = info.counter + 1 } in + let ce = compile_block e { info with code = [] + ; counter = ct.counter } in + { info with + code = info.code + @ compile_expr c info.env + @ [ Beqz (V0, "else" ^ uniq) ] + @ ct.code + @ [ B ("endif" ^ uniq) + ; Label ("else" ^ uniq) ] + @ ce.code + @ [ Label ("endif" ^ uniq) ] + ; counter = ce.counter } + +and compile_block b info = + match b with + | [] -> info + | i :: r -> + compile_block r (compile_instr i info) + +let compile_def (Func (name, args, b)) counter = + let cb = compile_block b + { code = [] + ; env = List.fold_left + (fun e (i, a) -> Env.add a (Mem (FP, 4 * i)) e) + Env.empty (List.mapi (fun i a -> i + 1, a) args) + ; fpo = 8 + ; counter = counter + 1 + ; return = "ret" ^ (string_of_int counter) } + in cb.counter, + [] + @ [ Label name + ; Addi (SP, SP, -cb.fpo) + ; Sw (RA, Mem (SP, cb.fpo - 4)) + ; Sw (FP, Mem (SP, cb.fpo - 8)) + ; Addi (FP, SP, cb.fpo - 4) ] + @ cb.code + @ [ Label cb.return + ; Addi (SP, SP, cb.fpo) + ; Lw (RA, Mem (FP, 0)) + ; Lw (FP, Mem (FP, -4)) + ; Jr (RA) ] + +let rec compile_prog p counter = + match p with + | [] -> [] + | d :: r -> + let new_counter, cd = compile_def d counter in + cd @ (compile_prog r new_counter) + +let compile (code, data) = + { text = Baselib.builtins @ compile_prog code 0 + ; data = List.map (fun (l, s) -> (l, Asciiz s)) data } diff --git a/lexer.mll b/lexer.mll @@ -0,0 +1,85 @@ +{ + open Lexing + open Parser + + exception Error of char + exception StrEndError of string + + let keyword_table = Hashtbl.create 2 + let _ = + List.iter (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok) + [ "int", Lint_k; + "char", Lchar_k; + "float", Lfloat_k; + "void", Lvoid_k; + "bool", Lbool_k ] +} + +let alpha = ['a'-'z' 'A'-'Z'] +let num = ['0'-'9'] +let numf = (num+ '.' num*) +let character = "'"(_ | '\\'['a' 'b' 't' 'n' 'v' 'f' 'r' '0' '\\' '''])"'" +let ident = alpha (alpha | num | '-' | '_')* + +rule token = parse + | eof { Lend } + | [ ' ' '\t' ] { token lexbuf } + | '\n' { Lexing.new_line lexbuf; token lexbuf } + | "//" { comment lexbuf } + | "#" { comment lexbuf } (* Ignore preprocessor *) + | "/*" { multi_line_comment lexbuf } + | ";" { Lsc } + | "return" { Lreturn } + | "if" { Lif } + | "else" { Lelse } + | "while" { Lwhile } + | "true" as b { Ltrue (bool_of_string b)} + | "false" as b { Lfalse (bool_of_string b) } + | num+ as n { Lint(int_of_string n) } + | numf+ as n { Lfloat(float_of_string n) } + | character as c { Lchar(c) } + | '"' { Lstr (String.concat "" (str lexbuf))} + | '+' { Ladd } + | '-' { Lsub } + | '*' { Lmul } + | '%' { Lmod } + | '/' { Ldiv } + | '=' { Lassign } + | '(' { Lopar } + | ')' { Lcpar } + | '{' { Lobra } + | '}' { Lcbra } + | ',' { Lcom } + | "==" { Leq } + | "!=" { Lneq } + | ">" { Lgt } + | ">=" { Lge } + | "<" { Llt } + | "<=" { Lle } + | '|' { Lor } + | '^' { Lxor } + | '&' { Land } + | '!' { Lex } + | "puts" { Lputs } + | "puti" { Lputi } + | "geti" { Lgeti } + | ident * as id { try + Hashtbl.find keyword_table id + with Not_found -> Lvar id } + | _ as c { raise (Error c) } + +and comment = parse + | eof { Lend } + | '\n' { Lexing.new_line lexbuf; token lexbuf } + | _ { comment lexbuf } + +and multi_line_comment = parse + | eof { Lend } + | '\n' { Lexing.new_line lexbuf; multi_line_comment lexbuf } + | "*/" { token lexbuf } + | _ { multi_line_comment lexbuf } + +and str = parse + | eof { raise (StrEndError "error eof inside string") } + | '"' { [] } + | _ as c { (String.make 1 c) :: (str lexbuf) } diff --git a/mips.ml b/mips.ml @@ -0,0 +1,126 @@ +type reg = + | Zero + | SP + | RA + | FP + | V0 + | A0 + | A1 + | T0 + | T1 + | F0 + +type label = string +type loc = + | Lbl of label + | Mem of reg * int + +type instr = + | Label of label + | Li of reg * int + | Lis of reg * float + | La of reg * loc + | Sw of reg * loc + | Lw of reg * loc + | Sb of reg * loc + | Lb of reg * loc + | Move of reg * reg + | Addi of reg * reg * int + | Adds of reg * reg * reg + | Add of reg * reg * reg + | Sub of reg * reg * reg + | Mul of reg * reg * reg + | Div of reg * reg * reg + | Syscall + | B of label + | Beqz of reg * label + | Jal of label + | Jr of reg + | Mfhi of reg + | Mflo of reg + | Seq of reg * reg * reg + | Sne of reg * reg * reg + | Sge of reg * reg * reg + | Sgt of reg * reg * reg + | Sle of reg * reg * reg + | Slt of reg * reg * reg + | Xor of reg * reg * reg + | Or of reg * reg * reg + | And of reg * reg * reg + +type directive = + | Asciiz of string + +type decl = label * directive + +type asm = { text: instr list ; data: decl list } + +module Syscall = struct + let print_int = 1 + let print_str = 4 + let read_int = 5 + let read_str = 8 + let sbrk = 9 +end + +let ps = Printf.sprintf + +let fmt_reg = function + | Zero -> "$zero" + | SP -> "$sp" + | FP -> "$fp" + | RA -> "$ra" + | V0 -> "$v0" + | A0 -> "$a0" + | A1 -> "$a1" + | T0 -> "$t0" + | T1 -> "$t1" + | F0 -> "$f0" + +let fmt_loc = function + | Lbl (l) -> l + | Mem (r, o) -> ps "%d(%s)" o (fmt_reg r) + +let fmt_instr = function + | Label (l) -> ps "%s:" l + | Li (r, i) -> ps " li %s, %d" (fmt_reg r) i + | Lis (r, f) -> ps " li.s %s, %f" (fmt_reg r) f + | La (r, a) -> ps " la %s, %s" (fmt_reg r) (fmt_loc a) + | Sw (r, a) -> ps " sw %s, %s" (fmt_reg r) (fmt_loc a) + | Lw (r, a) -> ps " lw %s, %s" (fmt_reg r) (fmt_loc a) + | Sb (r, a) -> ps " sb %s, %s" (fmt_reg r) (fmt_loc a) + | Lb (r, a) -> ps " lb %s, %s" (fmt_reg r) (fmt_loc a) + | Move (rd, rs) -> ps " move %s, %s" (fmt_reg rd) (fmt_reg rs) + | Addi (rd, rs, i) -> ps " addi %s, %s, %d" (fmt_reg rd) (fmt_reg rs) i + | Add (rd, rs, rt) -> ps " add %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | Sub (rd, rs, rt) -> ps " sub %s, %s, %s" (fmt_reg rd) (fmt_reg rt) (fmt_reg rs) + | Adds (rd, rs, rt) -> ps " add.s %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | Mul (rd, rs, rt) -> ps " mul %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | Div (rd, rs, rt) -> ps " div %s, %s, %s" (fmt_reg rd) (fmt_reg rt) (fmt_reg rs) + | Syscall -> ps " syscall" + | B (l) -> ps " b %s" l + | Beqz (r, l) -> ps " beqz %s, %s" (fmt_reg r) l + | Jal (l) -> ps " jal %s" l + | Jr (r) -> ps " jr %s" (fmt_reg r) + | Mfhi (r) -> ps " mfhi %s" (fmt_reg r) + | Mflo (r) -> ps " mflo %s" (fmt_reg r) + | Seq (rd, rs, rt) -> ps " seq %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | Sne (rd, rs, rt) -> ps " sne %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | Sge (rd, rt, rs) -> ps " sge %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | Sgt (rd, rt, rs) -> ps " sgt %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | Sle (rd, rt, rs) -> ps " sle %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | Slt (rd, rt, rs) -> ps " slt %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | Xor (rd, rs, rt) -> ps " xor %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | Or (rd, rs, rt) -> ps " or %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + | And (rd, rs, rt) -> ps " and %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt) + + + +let fmt_dir = function + | Asciiz (s) -> ps ".asciiz \"%s\"" s + +let print_asm oc asm = + Printf.fprintf oc ".text\n.globl main\n" ; + List.iter (fun i -> Printf.fprintf oc "%s\n" (fmt_instr i)) asm.text ; + Printf.fprintf oc "\n.data\n" ; + List.iter (fun (l, d) -> Printf.fprintf oc "%s: %s\n" l (fmt_dir d)) asm.data diff --git a/parser.mly b/parser.mly @@ -0,0 +1,201 @@ +%{ + open Ast + open Ast.Syntax +%} + +%token <int> Lint +%token <float> Lfloat +%token <bool> Ltrue Lfalse +%token <string> Lchar +%token <string> Lstr +%token <string> Lvar +%token Ladd Lsub Lmul Ldiv Lmod Lopar Lcpar Lobra Lcbra Lcom Lputs Lputi Lgeti +%token Lreturn Lassign Lsc Lend Leq Lneq Lif Lelse Lwhile +%token Lint_k Lfloat_k Lbool_k Lchar_k Lvoid_k +%token Lge Lgt Lle Llt Lor Land Lxor Lex + +%left Ladd Lsub +%left Lmul Ldiv + +%start prog + +%type <Ast.Syntax.prog> prog + +%% + +prog: +| d = def; p = prog { + d :: p +} +| Lend {[]} + +def: +| t = type_; n = Lvar; Lopar; a = args; Lcpar; Lobra; b = block; Lcbra { + Func { ftype = t + ; name = n + ; args = a + ; body = b + ; pos = $startpos(t) } +} +| t = type_; n = Lvar; Lopar; no_arg; Lcpar; Lobra; b = block; Lcbra { + Func { ftype = t + ; name = n + ; args = [] + ; body = b + ; pos = $startpos(t) } +} + +arg: +| t = type_; n = Lvar { + t, n +} + +args: +| {[]} +| a = arg { a :: [] } +| a = arg; Lcom; r = args { a :: r } + +par: +| {[]} +| e = expr { + e :: [] +} +| e = expr; Lcom; p = par { + e :: p +} + +block: +| {[]} +| Lreturn; e = expr; Lsc { + Return { expr = e; pos = $startpos($1) } + :: [] +} +| v = Lvar; Lassign; e = expr; Lsc; b = block { + Assign { var = v; expr = e; pos = $startpos($2) } + :: b +} +| t = type_; v = Lvar; Lsc; b = block { + Decl { var = v; type_ = t; pos = $startpos(t) } + :: b +} +| t = type_; v = Lvar; Lassign; e = expr; Lsc; b = block { + Decl { var = v; type_ = t; pos = $startpos(t) } + :: Assign { var = v; expr = e; pos = $startpos($3) } + :: b +} +| e = expr; Lsc; b = block { + Expr { expr = e; pos = $startpos(e) } + :: b +} +| Lif; e = expr; Lobra; b1 = block; Lcbra; b = block { + Cond { expr = e; b1 = b1; b2 = []; pos = $startpos($1) } + :: b +} +| Lif; e = expr; Lobra; b1 = block; Lcbra; Lelse; Lobra; b2 = block; Lcbra; b = block { + Cond { expr = e; b1 = b1; b2 = b2; pos = $startpos($1) } + :: b +} +| Lif; e = expr; Lobra; b1 = block; Lcbra; Lelse; b2 = block; b = block; { + Cond { expr = e; b1 = b1; b2 = b2; pos = $startpos($1) } + :: b +} +| Lwhile; e = expr; Lobra; b1 = block; Lcbra; b = block { + Loop { expr = e; b = b1; pos = $startpos($1) } + :: b +} + +expr: +| Lopar; e = expr; Lcpar { e } +| b = Ltrue { + Bool { value = b; pos = $startpos(b)} +} +| b = Lfalse { + Bool { value = b; pos = $startpos(b)} +} +| n = Lint { + Int { value = n; pos = $startpos(n)} +} +| n = Lfloat { + Float { value = n; pos = $startpos(n)} +} +| c = Lchar { + Char { value = c; pos = $startpos(c)} +} +| s = Lstr { + Str { value = s; pos = $startpos(s)} +} +| a = expr; Lmul; b = expr { + Call { func = "_mul"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Ladd; b = expr { + Call { func = "_add"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Lsub; b = expr { + Call { func = "_sub"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Ldiv; b = expr { + Call { func = "_div"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Lmod; b = expr { + Call { func = "_mod"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Lge ; b = expr { + Call { func = "_ge"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Lgt ; b = expr { + Call { func = "_gt"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Lle ; b = expr { + Call { func = "_le"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Llt ; b = expr { + Call { func = "_lt"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Leq; b = expr { + Call { func = "_eq"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Lneq; b = expr { + Call { func = "_neq"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Lor ; b = expr { + Call { func = "_or"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Land ; b = expr { + Call { func = "_and"; args = [a; b]; pos = $startpos($2)} +} +| a = expr; Lxor ; b = expr { + Call { func = "_xor"; args = [a; b]; pos = $startpos($2)} +} +| Lputs; Lopar; a = expr; Lcpar { + Call { func = "_puts"; args = [a]; pos = $startpos($1)} +} +| Lputi; Lopar; a = expr; Lcpar { + Call { func = "_puti"; args = [a]; pos = $startpos($1)} +} +| Lgeti; Lopar; Lcpar { + Call { func = "_geti"; args = []; pos = $startpos($1)} +} +| v = Lvar { + Var { name = v; pos = $startpos(v)} +} +| v = Lvar; Lopar; a = par; Lcpar; { + Call { func = v + ; args = a + ; pos = $startpos(v)} +} +| Lsub; e = expr { + Call { func = "_rev"; args = [e]; pos = $startpos($1)} +} +| Lex; e = expr { + Call { func = "_not"; args = [e]; pos = $startpos($1)} +} + +type_: +| Lint_k { Int_t } +| Lbool_k { Bool_t } +| Lfloat_k { Float_t } +| Lchar_k { Char_t } +| Lvoid_k { Void_t } + +no_arg: +| Lvoid_k { Void_t } diff --git a/semantics.ml b/semantics.ml @@ -0,0 +1,124 @@ +open Ast +open Ast.IR +open Baselib + +exception Error of string * Lexing.position + +let print_env key v = + print_endline (key ^ " " ^ (string_of_type_t v)) + +let expr_pos expr = + match expr with + | Syntax.Void v -> v.pos + | Syntax.Int n -> n.pos + | Syntax.Float n -> n.pos + | Syntax.Bool b -> b.pos + | Syntax.Char c -> c.pos + | Syntax.Str s -> s.pos + | Syntax.Var v -> v.pos + | Syntax.Call c -> c.pos + +let errt expected given pos = + raise (Error (Printf.sprintf "expected %s but given %s" + (string_of_type_t expected) + (string_of_type_t given), + pos)) + +let rec analyze_expr expr env = + match expr with + | Syntax.Void v -> Void, Void_t + | Syntax.Int n -> Int n.value, Int_t + | Syntax.Float n -> Float n.value, Float_t + | Syntax.Bool b -> Bool b.value, Bool_t + | Syntax.Char c -> Char c.value, Char_t + | Syntax.Str s -> Str s.value, Str_t + | Syntax.Var v -> + if Env.mem v.name env then + Var v.name, Env.find v.name env + else + raise (Error (Printf.sprintf "unbound variable '%s'" v.name, + v.pos)) + | Syntax.Call c -> + match Env.find_opt c.func env with + | Some (Func_t (rt, at)) -> + if List.length at != List.length c.args then + raise (Error (Printf.sprintf "expected %d arguments but given %d" + (List.length at) (List.length c.args), c.pos)) ; + let args = List.map2 (fun eat a -> let aa, at = analyze_expr a env + in if at = eat then aa + else errt eat at (expr_pos a)) + at c.args in + Call (c.func, args), rt + | Some _ -> raise (Error (Printf.sprintf "'%s' is not a function" c.func, + c.pos)) + | None -> raise (Error (Printf.sprintf "undefined function '%s'" c.func, + c.pos)) + +let rec analyze_instr instr env = + match instr with + | Syntax.Assign a -> + let ae, et = analyze_expr a.expr env in + let t = match Env.find_opt a.var env with + | Some t -> t + | None -> raise (Error (Printf.sprintf "var %s does not exist" a.var, a.pos)) in + if (string_of_type_t t) = (string_of_type_t et) then + Assign (a.var, ae), Env.add a.var et env + else + raise (Error (Printf.sprintf "var %s is of type %s" a.var (string_of_type_t t), a.pos)) + | Syntax.Return r -> + let ae, _ = analyze_expr r.expr env in + Return ae, env + | Syntax.Decl d -> + if Env.mem d.var env then + raise (Error (Printf.sprintf "var %s already declared" d.var, d.pos)) + else + Decl d.var, Env.add d.var d.type_ env + | Syntax.Expr e -> + let ae, t = analyze_expr e.expr env in + Expr ae, env + | Syntax.Cond e -> + let ae, t = analyze_expr e.expr env in + let b1, nenv = analyze_block e.b1 env in + let b2, nenv' = analyze_block e.b2 nenv in + Cond (ae, b1, b2), nenv' + | Syntax.Loop l -> + let ae, t = analyze_expr l.expr env in + let b1, nenv = analyze_block l.b env in + Loop (ae, b1), nenv + +and analyze_block block env = + match block with + | [] -> [], env + | instr :: rest -> + let ai, new_env = analyze_instr instr env in + let ab, nenv = analyze_block rest new_env in + ai :: ab, nenv + +let rec push_arg_in_env arg env = + match arg with + | [] -> env + | (type_, ident) :: cdr -> + push_arg_in_env cdr (Env.add ident type_ env) + +let analyze_def def env = + match def with + | Syntax.Func f -> + if Env.mem f.name env then + raise (Error (Printf.sprintf "func %s already declared" f.name, f.pos)) + else + let types, names = List.split f.args in + let l, nenv = analyze_block f.body (push_arg_in_env f.args env) in + Func (f.name, names, l), Env.add f.name (Func_t (f.ftype, types)) env (* env is the initial, nenv is the func scope *) + +let rec analyze_prog prog env = + match prog with + | [] -> [], env + | d :: r -> + let f, nenv = analyze_def d env in + let l, nenv' = analyze_prog r nenv in + f :: l, nenv' + +let analyze parsed = + let l, env = analyze_prog parsed Baselib._types_ in +(* Env.iter print_env env; *) + l diff --git a/simplifier.ml b/simplifier.ml @@ -0,0 +1,84 @@ +open Ast + +module Env = Map.Make(String) + +let collect_constant_strings code = + let counter = ref 0 in + let env = ref Env.empty in + let rec ccs_expr = function + | IR.Bool b -> IR2.Bool b, [] + | IR.Void -> IR2.Nil, [] + | IR.Int n -> IR2.Int n, [] + | IR.Char c -> IR2.Char c.[0], [] + | IR.Float f -> IR2.Float f, [] + | IR.Str s -> + (match Env.find_opt s !env with + | Some l -> + IR2.Data l, [] + | None -> + incr counter ; + let l = "str" ^ (string_of_int !counter) in + env := Env.add s l !env ; + IR2.Data l, [ (l, s) ]) + | IR.Var v -> + IR2.Var v, [] + | IR.Call (f, args) -> + let args2 = List.map ccs_expr args in + let ccs = List.flatten (List.map (fun (_, s) -> s) args2) in + IR2.Call (f, List.map (fun (e, _) -> e) args2), ccs + in + (* let ccs_lvalue = function *) + (* | IR.LVar v -> *) + (* IR2.LVar v, [] *) + (* | IR.LAddr a -> *) + (* let a2, ccs = ccs_expr a in *) + (* IR2.LAddr a2, ccs *) + (* in *) + let rec ccs_instr = function + | IR.Decl v -> + IR2.Decl v, [] + | IR.Return e -> + let e2, ccs = ccs_expr e in + IR2.Return e2, ccs + | IR.Expr e -> + let e2, ccs = ccs_expr e in + IR2.Expr e2, ccs + + | IR.Assign (lv, e) -> + let lv2, ccs_lv = ccs_expr (IR.Var lv) in + let e2, ccs_e = ccs_expr e in + IR2.Assign (lv2, e2), List.flatten [ ccs_lv; ccs_e ] + + | IR.Cond (t, y, n) -> + let t2, ccs_t = ccs_expr t in + let y2, ccs_y = ccs_block y in + let n2, ccs_n = ccs_block n in + IR2.Cond (t2, y2, n2), List.flatten [ ccs_t ; ccs_y ; ccs_n ] + + | IR.Loop (t, y) -> + let t2, ccs_t = ccs_expr t in + let y2, ccs_y = ccs_block y in + IR2.Loop (t2, y2), List.flatten [ ccs_t ; ccs_y ] + + and ccs_block = function + | [] -> [], [] + | i :: r -> + let i2, ccs_i = ccs_instr i in + let r2, ccs_r = ccs_block r in + i2 :: r2, List.flatten [ ccs_i ; ccs_r ] + in + let ccs_def = function + | IR.Func (name, args, body) -> + let body2, ccs = ccs_block body in + IR2.Func (name, args, body2), ccs + in + let rec ccs_prog = function + | [] -> [], [] + | d :: r -> + let d2, ccs_d = ccs_def d in + let r2, ccs_r = ccs_prog r in + d2 :: r2, List.flatten [ ccs_d ; ccs_r ] + in ccs_prog code + +let simplify code = + collect_constant_strings code diff --git a/test.ml b/test.ml @@ -0,0 +1,31 @@ +open Lexing +open Ast +open Ast.IR2 + +let err msg pos = + Printf.eprintf "Error on line %d col %d: %s.\n" + pos.pos_lnum (pos.pos_cnum - pos.pos_bol) msg ; + exit 1 + +let () = + if (Array.length Sys.argv) != 2 then begin + Printf.eprintf "Usage: %s <file>\n" Sys.argv.(0) ; + exit 1 + end; + let f = open_in Sys.argv.(1) in + let buf = Lexing.from_channel f in + try + let parsed = Parser.prog Lexer.token buf in + close_in f ; + let ast = Semantics.analyze parsed in +(* print_endline (IR.string_of_ir ast); *) + let simplified = Simplifier.simplify ast in + let compiled = Compiler.compile simplified in + Mips.print_asm Stdlib.stdout compiled + with + | Lexer.Error c -> + err (Printf.sprintf "unrecognized char '%c'" c) (Lexing.lexeme_start_p buf) + | Parser.Error -> + err "syntax error" (Lexing.lexeme_start_p buf) + | Semantics.Error (msg, pos) -> + err msg pos diff --git a/tests/0.test b/tests/0.test @@ -0,0 +1 @@ +// fichier vide diff --git a/tests/1.test b/tests/1.test @@ -0,0 +1 @@ +return 1312; diff --git a/tests/10.test b/tests/10.test @@ -0,0 +1,7 @@ +// Hello, world! + +/* This + * is a + * multiline + * comment + */ diff --git a/tests/11.test b/tests/11.test @@ -0,0 +1,10 @@ +char a = 'A'; +a = 'B'; + +char b; +b = 'a'; + +int c; +c = 1 + 10; + +return a; diff --git a/tests/12.test b/tests/12.test @@ -0,0 +1,11 @@ +int add(int a, int b) +{ + int a = 21; + return a; +} + +int main(int argc, char argv) +{ + int a = 42; + return a; +} diff --git a/tests/13.test b/tests/13.test @@ -0,0 +1,12 @@ +int main() +{ + int c; + int d; + return c + d; +} + +int add(int a, int b) +{ + int f = 42; + return d; +} diff --git a/tests/14.test b/tests/14.test @@ -0,0 +1,11 @@ +int main() +{ + int c = 1; + return 0; +} + +int add(int a, int b) +{ + /* int d = c; // should fail */ + return a + b; +} diff --git a/tests/15.test b/tests/15.test @@ -0,0 +1,11 @@ +int add(int a, int b) +{ + int z; + return a + b; +} + +int main(int argc, char argv) +{ + int z = 12; + return z; +} diff --git a/tests/16.test b/tests/16.test @@ -0,0 +1,15 @@ +int foo(int a, int b) +{ + return a + b; +} + +int main() +{ + int a = 1; + int b = 2; + foo(a, b); + + int e = foo(a,b); + + return 0; +} diff --git a/tests/17.test b/tests/17.test @@ -0,0 +1,12 @@ +int add(int a, int b) +{ + return a + b; +} + +int main() +{ + int res = add(21, 21); + int res2 = add(res, add(res, res)); + + return res; +} diff --git a/tests/18.test b/tests/18.test @@ -0,0 +1,10 @@ +void hello() +{ + puts("Hello, wolrd!"); +} + +int main() +{ + hello(); + return 0; +} diff --git a/tests/19.test b/tests/19.test @@ -0,0 +1,13 @@ +int add_int(int a, int b) +{ + return a + b; +} + +int main() +{ + int a = 21; + int b = 21; + + puti(add_int(a, b)); + return 0; +} diff --git a/tests/2.test b/tests/2.test @@ -0,0 +1 @@ +return 21 * 2; diff --git a/tests/3.test b/tests/3.test @@ -0,0 +1 @@ +x = 42; diff --git a/tests/4.test b/tests/4.test @@ -0,0 +1,2 @@ +x = 42; +return x; diff --git a/tests/5.test b/tests/5.test @@ -0,0 +1,3 @@ +x = 500 + 150 + 6; +y = x * 2; +return y; diff --git a/tests/6.test b/tests/6.test @@ -0,0 +1,3 @@ +x = 2 * (2 + 6); +y = x * 2; +return y; diff --git a/tests/7.test b/tests/7.test @@ -0,0 +1,3 @@ +x = 2.0 * (2.2 + 6.1) + 2.1; +y = x * 2; +return y; diff --git a/tests/8.test b/tests/8.test @@ -0,0 +1,5 @@ +w = '\t'; +x = 'a'; +y = '\\'; +z = '\''; +return x; diff --git a/tests/9.test b/tests/9.test @@ -0,0 +1,4 @@ +a = "Hello, world!"; +b = ""; +c = "\nfoo\tbar"; +return a;