commit ae81e24f4065f5fdd05679e50e8d2e8f98402fcd
Author: Martin Hart <114212671+m4rtinh4rt@users.noreply.github.com>
Date: Thu, 17 Oct 2024 22:16:56 +0200
Initial commit
Diffstat:
31 files changed, 1237 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,7 @@
+all:
+ @ocamlbuild -use-menhir test.byte
+
+clean:
+ @ocamlbuild -clean
+
+.PHONY: all clean
diff --git a/README.md b/README.md
@@ -0,0 +1 @@
+# mips-cc
diff --git a/ast.ml b/ast.ml
@@ -0,0 +1,157 @@
+type type_t =
+ | Int_t
+ | Float_t
+ | Char_t
+ | Str_t
+ | Void_t
+ | Bool_t
+ | Func_t of type_t * type_t list
+
+let rec string_of_type_t t =
+ match t with
+ | Int_t -> "int"
+ | Float_t -> "float"
+ | Str_t -> "string"
+ | Char_t -> "char"
+ | Void_t -> "void"
+ | Bool_t -> "bool"
+ | Func_t (r, a) ->
+ (if (List.length a) > 1 then "(" else "")
+ ^ (String.concat ", " (List.map string_of_type_t a))
+ ^ (if (List.length a) > 1 then ")" else "")
+ ^ " -> " ^ (string_of_type_t r)
+
+module Syntax = struct
+ type ident = string
+ type expr =
+ | Void of { pos: Lexing.position }
+ | Bool of { value: bool
+ ; pos: Lexing.position }
+ | Int of { value: int
+ ; pos: Lexing.position }
+ | Float of { value: float
+ ; pos: Lexing.position }
+ | Char of { value: string
+ ; pos: Lexing.position }
+ | Str of { value: string
+ ; pos: Lexing.position }
+ | Var of { name: ident
+ ; pos: Lexing.position }
+ | Call of { func: ident
+ ; args: expr list
+ ; pos: Lexing.position }
+
+ type instr =
+ | Assign of { var: ident
+ ; expr: expr
+ ; pos: Lexing.position }
+ | Return of { expr: expr
+ ; pos: Lexing.position }
+ | Decl of { var: ident
+ ; type_: type_t
+ ; pos: Lexing.position }
+ | Expr of { expr: expr
+ ; pos: Lexing.position }
+
+ | Cond of { expr: expr
+ ; b1: block
+ ; b2: block
+ ; pos: Lexing.position }
+
+ | Loop of { expr: expr
+ ; b: block
+ ; pos: Lexing.position }
+
+ and block = instr list
+
+ type def =
+ Func of { ftype: type_t
+ ; name: ident
+ ; args: (type_t * ident) list
+ ; body: block
+ ; pos: Lexing.position }
+ and prog = def list
+end
+
+module IR = struct
+ type ident = string
+ type expr =
+ | Void
+ | Int of int
+ | Float of float
+ | Bool of bool
+ | Char of string
+ | Str of string
+ | Var of ident
+ | Call of ident * expr list
+ type instr =
+ | Assign of ident * expr
+ | Return of expr
+ | Decl of ident
+ | Expr of expr
+ | Cond of expr * block * block
+ | Loop of expr * block
+ and block = instr list
+
+ type def =
+ Func of ident * ident list * block
+ and prog = def list
+
+ let string_of_ir ast =
+ let rec fmt_e = function
+ | Void -> "Void"
+ | Bool b -> "Bool " ^ (string_of_bool b)
+ | Int n -> "Int " ^ (string_of_int n)
+ | Float n -> "Float " ^ (string_of_float n)
+ | Char c -> "Char " ^ c
+ | Str s -> "Str \"" ^ s ^ "\""
+ | Var v -> "Var \"" ^ v ^ "\""
+ | Call (f, a) -> "Call (\"" ^ f ^ "\", [ "
+ ^ (String.concat " ; " (List.map fmt_e a))
+ ^ " ])"
+ and fmt_i = function
+ | Loop (e, b) -> "Loop (" ^ (fmt_e e) ^ ", " ^ (fmt_b b) ^ ")"
+ | Cond (e, b1, b2) -> "Cond (" ^ (fmt_e e) ^ ", " ^ (fmt_b b1) ^ ", " ^ (fmt_b b2) ^ ")"
+ | Decl v -> "Decl \"" ^ v ^ "\""
+ | Assign (v, e) -> "Assign (\"" ^ v ^ "\", " ^ (fmt_e e) ^ ")"
+ | Return e -> "Return (" ^ (fmt_e e) ^ ")"
+ | Expr e -> "Expr (" ^ (fmt_e e) ^ ")"
+
+ and fmt_b b = "\t[ " ^ (String.concat "\n\t; " (List.map fmt_i b)) ^ " ]"
+ and fmt_arg a = "\"" ^ a ^ "\""
+ and fmt_d = function
+ | Func (name, args, b) -> "[ Func (\"" ^ name ^ "\", " ^ "[ " ^
+ (String.concat "; " (List.map fmt_arg args)) ^ " ],\n" ^
+ fmt_b b ^ ")" ^ "]"
+ and fmt_p p = "[\n " ^ (String.concat "\n " (List.map fmt_d p)) ^ "\n]"
+ in fmt_p ast
+end
+
+module IR2 = struct
+ type ident = string
+ type expr =
+ | Nil
+ | Bool of bool
+ | Int of int
+ | Float of float
+ | Char of char
+ | Data of string
+ | Var of ident
+ | Call of ident * expr list
+(*
+ type lvalue =
+ | LVar of ident
+ | LAddr of expr
+*)
+ type instr =
+ | Decl of ident
+ | Return of expr
+ | Expr of expr
+ | Assign of expr * expr
+ | Cond of expr * block * block
+ | Loop of expr * block
+ and block = instr list
+ type def =
+ | Func of ident * ident list * block
+ type prog = def list
+end
diff --git a/baselib.ml b/baselib.ml
@@ -0,0 +1,144 @@
+open Ast
+open Mips
+
+module Env = Map.Make(String)
+
+let _types_ =
+ Env.of_seq
+ (List.to_seq
+ [ "_add", Func_t (Int_t, [ Int_t ; Int_t ])
+ ; "_sub", Func_t (Int_t, [ Int_t ; Int_t ])
+ ; "_mul", Func_t (Int_t, [ Int_t ; Int_t ])
+ ; "_div", Func_t (Int_t, [ Int_t ; Int_t ])
+ ; "_mod", Func_t (Int_t, [ Int_t ; Int_t ])
+ ; "_puti", Func_t (Void_t, [ Int_t ])
+ ; "_puts", Func_t (Void_t, [ Str_t ])
+ ; "_geti", Func_t (Int_t, [ ])
+ ; "_eq", Func_t (Bool_t, [ Int_t ; Int_t ])
+ ; "_neq", Func_t (Bool_t, [ Int_t ; Int_t ])
+ ; "_ge", Func_t (Bool_t, [ Int_t ; Int_t ])
+ ; "_gt", Func_t (Bool_t, [ Int_t ; Int_t ])
+ ; "_le", Func_t (Bool_t, [ Int_t ; Int_t ])
+ ; "_lt", Func_t (Bool_t, [ Int_t ; Int_t ])
+ ; "_xor", Func_t (Bool_t, [ Int_t ; Int_t ])
+ ; "_and", Func_t (Bool_t, [ Int_t ; Int_t ])
+ ; "_or", Func_t (Bool_t, [ Int_t ; Int_t ])
+ ; "_not", Func_t (Bool_t, [ Int_t ])
+ ; "_rev", Func_t (Int_t, [ Int_t ])
+ ])
+
+let builtins =
+ [ Label "_add"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Add (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_sub"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Sub (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_mul"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Mul (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_div"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Div (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_mod"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Div (V0, T0, T1)
+ ; Mfhi V0
+ ; Jr RA
+
+ ; Label "_puti"
+ ; Lw (A0, Mem (SP, 0))
+ ; Li (V0, Syscall.print_int)
+ ; Syscall
+ ; Jr RA
+
+ ; Label "_geti"
+ ; Lw (A0, Mem (SP, 0))
+ ; Li (V0, Syscall.read_int)
+ ; Syscall
+ ; Jr RA
+
+ ; Label "_puts"
+ ; Lw (A0, Mem (SP, 0))
+ ; Li (V0, Syscall.print_str)
+ ; Syscall
+ ; Jr RA
+
+ ; Label "_eq"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Seq (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_neq"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Sne (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_ge"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Sge (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_gt"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Sgt (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_le"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Sle (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_lt"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Slt (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_xor"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Xor (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_or"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; Or (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_and"
+ ; Lw (T0, Mem (SP, 0))
+ ; Lw (T1, Mem (SP, 4))
+ ; And (V0, T0, T1)
+ ; Jr RA
+
+ ; Label "_rev"
+ ; Lw (T0, Mem (SP, 0))
+ ; Sub (V0, T0, Zero)
+ ; Jr RA
+
+ ; Label "_not"
+ ; Lw (T0, Mem (SP, 0))
+ ; Seq (V0, T0, Zero)
+ ; Jr RA
+
+ ]
diff --git a/compiler.ml b/compiler.ml
@@ -0,0 +1,141 @@
+open Mips
+open Ast.IR2
+
+module Env = Map.Make(String)
+
+type cinfo = { code: Mips.instr list
+ ; env: Mips.loc Env.t
+ ; fpo: int
+ ; counter: int
+ ; return: string }
+
+let rec compile_expr e env =
+ match e with
+(* | Value v -> compile_value v *)
+ | Data l -> [ La (V0, Lbl (l)) ]
+ | Bool b -> [ Li (V0, if b then 1 else 0) ]
+ | Nil -> [ Li (V0, 0) ]
+ | Int n -> [ Li (V0, n) ]
+ | Float f -> [ Lis (F0, f) ]
+ | Char c -> [ Li (V0, (int_of_char c)) ]
+ | Var v -> [ Lw (V0, Env.find v env) ]
+ | Call (f, args) ->
+ let ca = List.map (fun a ->
+ compile_expr a env
+ @ [ Addi (SP, SP, -4)
+ ; Sw (V0, Mem (SP, 0)) ])
+ args in
+ List.flatten ca
+ @ [ Jal f
+ ; Addi (SP, SP, 4 * (List.length args)) ]
+
+let rec compile_instr i info =
+ match i with
+ | Decl v ->
+ { info with
+ env = Env.add v (Mem (FP, -info.fpo)) info.env
+ ; fpo = info.fpo + 4 }
+ | Return e ->
+ { info with
+ code = info.code
+ @ compile_expr e info.env
+ @ [ B info.return ] }
+ | Expr e ->
+ { info with
+ code = info.code
+ @ compile_expr e info.env }
+
+ | Assign (lv, e) ->
+ { info with
+ code = (info.code
+ @ compile_expr e info.env
+ @
+ match lv with
+ | Var v -> [ Sw (V0, Env.find v info.env) ])
+ }
+
+(*
+ TODO
+ | Assign (lv, e) -> { info with
+ code = info.code
+ @ compile_expr e info.env
+ @ (match lv with
+ | LVar v -> [ Sw (V0, Env.find v info.env) ]
+ | LAddr a -> []
+ @ [ Addi (SP, SP, -4)
+ ; Sw (V0, Mem (SP, 0)) ]
+ @ compile_expr a info.env
+ @ [ Lw (T0, Mem (SP, 0))
+ ; Addi (SP, SP, 4)
+ ; Sw (T0, Mem (V0, 0)) ]) }
+*)
+ | Loop (c, t) ->
+ let uniq = string_of_int info.counter in
+ let ct = compile_block t { info with code = []
+ ; counter = info.counter + 1 } in
+ { info with
+ code = info.code
+ @ [ Label ("startloop" ^ uniq) ]
+ @ compile_expr c info.env
+ @ [ Beqz (V0, "endloop" ^ uniq) ]
+ @ ct.code
+ @ [ B ("startloop" ^ uniq) ]
+ @ [ Label ("endloop" ^ uniq) ]
+ ; counter = ct.counter }
+
+ | Cond (c, t, e) ->
+ let uniq = string_of_int info.counter in
+ let ct = compile_block t { info with code = []
+ ; counter = info.counter + 1 } in
+ let ce = compile_block e { info with code = []
+ ; counter = ct.counter } in
+ { info with
+ code = info.code
+ @ compile_expr c info.env
+ @ [ Beqz (V0, "else" ^ uniq) ]
+ @ ct.code
+ @ [ B ("endif" ^ uniq)
+ ; Label ("else" ^ uniq) ]
+ @ ce.code
+ @ [ Label ("endif" ^ uniq) ]
+ ; counter = ce.counter }
+
+and compile_block b info =
+ match b with
+ | [] -> info
+ | i :: r ->
+ compile_block r (compile_instr i info)
+
+let compile_def (Func (name, args, b)) counter =
+ let cb = compile_block b
+ { code = []
+ ; env = List.fold_left
+ (fun e (i, a) -> Env.add a (Mem (FP, 4 * i)) e)
+ Env.empty (List.mapi (fun i a -> i + 1, a) args)
+ ; fpo = 8
+ ; counter = counter + 1
+ ; return = "ret" ^ (string_of_int counter) }
+ in cb.counter,
+ []
+ @ [ Label name
+ ; Addi (SP, SP, -cb.fpo)
+ ; Sw (RA, Mem (SP, cb.fpo - 4))
+ ; Sw (FP, Mem (SP, cb.fpo - 8))
+ ; Addi (FP, SP, cb.fpo - 4) ]
+ @ cb.code
+ @ [ Label cb.return
+ ; Addi (SP, SP, cb.fpo)
+ ; Lw (RA, Mem (FP, 0))
+ ; Lw (FP, Mem (FP, -4))
+ ; Jr (RA) ]
+
+let rec compile_prog p counter =
+ match p with
+ | [] -> []
+ | d :: r ->
+ let new_counter, cd = compile_def d counter in
+ cd @ (compile_prog r new_counter)
+
+let compile (code, data) =
+ { text = Baselib.builtins @ compile_prog code 0
+ ; data = List.map (fun (l, s) -> (l, Asciiz s)) data }
diff --git a/lexer.mll b/lexer.mll
@@ -0,0 +1,85 @@
+{
+ open Lexing
+ open Parser
+
+ exception Error of char
+ exception StrEndError of string
+
+ let keyword_table = Hashtbl.create 2
+ let _ =
+ List.iter (fun (kwd, tok) -> Hashtbl.add keyword_table kwd tok)
+ [ "int", Lint_k;
+ "char", Lchar_k;
+ "float", Lfloat_k;
+ "void", Lvoid_k;
+ "bool", Lbool_k ]
+}
+
+let alpha = ['a'-'z' 'A'-'Z']
+let num = ['0'-'9']
+let numf = (num+ '.' num*)
+let character = "'"(_ | '\\'['a' 'b' 't' 'n' 'v' 'f' 'r' '0' '\\' '''])"'"
+let ident = alpha (alpha | num | '-' | '_')*
+
+rule token = parse
+ | eof { Lend }
+ | [ ' ' '\t' ] { token lexbuf }
+ | '\n' { Lexing.new_line lexbuf; token lexbuf }
+ | "//" { comment lexbuf }
+ | "#" { comment lexbuf } (* Ignore preprocessor *)
+ | "/*" { multi_line_comment lexbuf }
+ | ";" { Lsc }
+ | "return" { Lreturn }
+ | "if" { Lif }
+ | "else" { Lelse }
+ | "while" { Lwhile }
+ | "true" as b { Ltrue (bool_of_string b)}
+ | "false" as b { Lfalse (bool_of_string b) }
+ | num+ as n { Lint(int_of_string n) }
+ | numf+ as n { Lfloat(float_of_string n) }
+ | character as c { Lchar(c) }
+ | '"' { Lstr (String.concat "" (str lexbuf))}
+ | '+' { Ladd }
+ | '-' { Lsub }
+ | '*' { Lmul }
+ | '%' { Lmod }
+ | '/' { Ldiv }
+ | '=' { Lassign }
+ | '(' { Lopar }
+ | ')' { Lcpar }
+ | '{' { Lobra }
+ | '}' { Lcbra }
+ | ',' { Lcom }
+ | "==" { Leq }
+ | "!=" { Lneq }
+ | ">" { Lgt }
+ | ">=" { Lge }
+ | "<" { Llt }
+ | "<=" { Lle }
+ | '|' { Lor }
+ | '^' { Lxor }
+ | '&' { Land }
+ | '!' { Lex }
+ | "puts" { Lputs }
+ | "puti" { Lputi }
+ | "geti" { Lgeti }
+ | ident * as id { try
+ Hashtbl.find keyword_table id
+ with Not_found -> Lvar id }
+ | _ as c { raise (Error c) }
+
+and comment = parse
+ | eof { Lend }
+ | '\n' { Lexing.new_line lexbuf; token lexbuf }
+ | _ { comment lexbuf }
+
+and multi_line_comment = parse
+ | eof { Lend }
+ | '\n' { Lexing.new_line lexbuf; multi_line_comment lexbuf }
+ | "*/" { token lexbuf }
+ | _ { multi_line_comment lexbuf }
+
+and str = parse
+ | eof { raise (StrEndError "error eof inside string") }
+ | '"' { [] }
+ | _ as c { (String.make 1 c) :: (str lexbuf) }
diff --git a/mips.ml b/mips.ml
@@ -0,0 +1,126 @@
+type reg =
+ | Zero
+ | SP
+ | RA
+ | FP
+ | V0
+ | A0
+ | A1
+ | T0
+ | T1
+ | F0
+
+type label = string
+type loc =
+ | Lbl of label
+ | Mem of reg * int
+
+type instr =
+ | Label of label
+ | Li of reg * int
+ | Lis of reg * float
+ | La of reg * loc
+ | Sw of reg * loc
+ | Lw of reg * loc
+ | Sb of reg * loc
+ | Lb of reg * loc
+ | Move of reg * reg
+ | Addi of reg * reg * int
+ | Adds of reg * reg * reg
+ | Add of reg * reg * reg
+ | Sub of reg * reg * reg
+ | Mul of reg * reg * reg
+ | Div of reg * reg * reg
+ | Syscall
+ | B of label
+ | Beqz of reg * label
+ | Jal of label
+ | Jr of reg
+ | Mfhi of reg
+ | Mflo of reg
+ | Seq of reg * reg * reg
+ | Sne of reg * reg * reg
+ | Sge of reg * reg * reg
+ | Sgt of reg * reg * reg
+ | Sle of reg * reg * reg
+ | Slt of reg * reg * reg
+ | Xor of reg * reg * reg
+ | Or of reg * reg * reg
+ | And of reg * reg * reg
+
+type directive =
+ | Asciiz of string
+
+type decl = label * directive
+
+type asm = { text: instr list ; data: decl list }
+
+module Syscall = struct
+ let print_int = 1
+ let print_str = 4
+ let read_int = 5
+ let read_str = 8
+ let sbrk = 9
+end
+
+let ps = Printf.sprintf
+
+let fmt_reg = function
+ | Zero -> "$zero"
+ | SP -> "$sp"
+ | FP -> "$fp"
+ | RA -> "$ra"
+ | V0 -> "$v0"
+ | A0 -> "$a0"
+ | A1 -> "$a1"
+ | T0 -> "$t0"
+ | T1 -> "$t1"
+ | F0 -> "$f0"
+
+let fmt_loc = function
+ | Lbl (l) -> l
+ | Mem (r, o) -> ps "%d(%s)" o (fmt_reg r)
+
+let fmt_instr = function
+ | Label (l) -> ps "%s:" l
+ | Li (r, i) -> ps " li %s, %d" (fmt_reg r) i
+ | Lis (r, f) -> ps " li.s %s, %f" (fmt_reg r) f
+ | La (r, a) -> ps " la %s, %s" (fmt_reg r) (fmt_loc a)
+ | Sw (r, a) -> ps " sw %s, %s" (fmt_reg r) (fmt_loc a)
+ | Lw (r, a) -> ps " lw %s, %s" (fmt_reg r) (fmt_loc a)
+ | Sb (r, a) -> ps " sb %s, %s" (fmt_reg r) (fmt_loc a)
+ | Lb (r, a) -> ps " lb %s, %s" (fmt_reg r) (fmt_loc a)
+ | Move (rd, rs) -> ps " move %s, %s" (fmt_reg rd) (fmt_reg rs)
+ | Addi (rd, rs, i) -> ps " addi %s, %s, %d" (fmt_reg rd) (fmt_reg rs) i
+ | Add (rd, rs, rt) -> ps " add %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | Sub (rd, rs, rt) -> ps " sub %s, %s, %s" (fmt_reg rd) (fmt_reg rt) (fmt_reg rs)
+ | Adds (rd, rs, rt) -> ps " add.s %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | Mul (rd, rs, rt) -> ps " mul %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | Div (rd, rs, rt) -> ps " div %s, %s, %s" (fmt_reg rd) (fmt_reg rt) (fmt_reg rs)
+ | Syscall -> ps " syscall"
+ | B (l) -> ps " b %s" l
+ | Beqz (r, l) -> ps " beqz %s, %s" (fmt_reg r) l
+ | Jal (l) -> ps " jal %s" l
+ | Jr (r) -> ps " jr %s" (fmt_reg r)
+ | Mfhi (r) -> ps " mfhi %s" (fmt_reg r)
+ | Mflo (r) -> ps " mflo %s" (fmt_reg r)
+ | Seq (rd, rs, rt) -> ps " seq %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | Sne (rd, rs, rt) -> ps " sne %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | Sge (rd, rt, rs) -> ps " sge %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | Sgt (rd, rt, rs) -> ps " sgt %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | Sle (rd, rt, rs) -> ps " sle %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | Slt (rd, rt, rs) -> ps " slt %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | Xor (rd, rs, rt) -> ps " xor %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | Or (rd, rs, rt) -> ps " or %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+ | And (rd, rs, rt) -> ps " and %s, %s, %s" (fmt_reg rd) (fmt_reg rs) (fmt_reg rt)
+
+
+
+let fmt_dir = function
+ | Asciiz (s) -> ps ".asciiz \"%s\"" s
+
+let print_asm oc asm =
+ Printf.fprintf oc ".text\n.globl main\n" ;
+ List.iter (fun i -> Printf.fprintf oc "%s\n" (fmt_instr i)) asm.text ;
+ Printf.fprintf oc "\n.data\n" ;
+ List.iter (fun (l, d) -> Printf.fprintf oc "%s: %s\n" l (fmt_dir d)) asm.data
diff --git a/parser.mly b/parser.mly
@@ -0,0 +1,201 @@
+%{
+ open Ast
+ open Ast.Syntax
+%}
+
+%token <int> Lint
+%token <float> Lfloat
+%token <bool> Ltrue Lfalse
+%token <string> Lchar
+%token <string> Lstr
+%token <string> Lvar
+%token Ladd Lsub Lmul Ldiv Lmod Lopar Lcpar Lobra Lcbra Lcom Lputs Lputi Lgeti
+%token Lreturn Lassign Lsc Lend Leq Lneq Lif Lelse Lwhile
+%token Lint_k Lfloat_k Lbool_k Lchar_k Lvoid_k
+%token Lge Lgt Lle Llt Lor Land Lxor Lex
+
+%left Ladd Lsub
+%left Lmul Ldiv
+
+%start prog
+
+%type <Ast.Syntax.prog> prog
+
+%%
+
+prog:
+| d = def; p = prog {
+ d :: p
+}
+| Lend {[]}
+
+def:
+| t = type_; n = Lvar; Lopar; a = args; Lcpar; Lobra; b = block; Lcbra {
+ Func { ftype = t
+ ; name = n
+ ; args = a
+ ; body = b
+ ; pos = $startpos(t) }
+}
+| t = type_; n = Lvar; Lopar; no_arg; Lcpar; Lobra; b = block; Lcbra {
+ Func { ftype = t
+ ; name = n
+ ; args = []
+ ; body = b
+ ; pos = $startpos(t) }
+}
+
+arg:
+| t = type_; n = Lvar {
+ t, n
+}
+
+args:
+| {[]}
+| a = arg { a :: [] }
+| a = arg; Lcom; r = args { a :: r }
+
+par:
+| {[]}
+| e = expr {
+ e :: []
+}
+| e = expr; Lcom; p = par {
+ e :: p
+}
+
+block:
+| {[]}
+| Lreturn; e = expr; Lsc {
+ Return { expr = e; pos = $startpos($1) }
+ :: []
+}
+| v = Lvar; Lassign; e = expr; Lsc; b = block {
+ Assign { var = v; expr = e; pos = $startpos($2) }
+ :: b
+}
+| t = type_; v = Lvar; Lsc; b = block {
+ Decl { var = v; type_ = t; pos = $startpos(t) }
+ :: b
+}
+| t = type_; v = Lvar; Lassign; e = expr; Lsc; b = block {
+ Decl { var = v; type_ = t; pos = $startpos(t) }
+ :: Assign { var = v; expr = e; pos = $startpos($3) }
+ :: b
+}
+| e = expr; Lsc; b = block {
+ Expr { expr = e; pos = $startpos(e) }
+ :: b
+}
+| Lif; e = expr; Lobra; b1 = block; Lcbra; b = block {
+ Cond { expr = e; b1 = b1; b2 = []; pos = $startpos($1) }
+ :: b
+}
+| Lif; e = expr; Lobra; b1 = block; Lcbra; Lelse; Lobra; b2 = block; Lcbra; b = block {
+ Cond { expr = e; b1 = b1; b2 = b2; pos = $startpos($1) }
+ :: b
+}
+| Lif; e = expr; Lobra; b1 = block; Lcbra; Lelse; b2 = block; b = block; {
+ Cond { expr = e; b1 = b1; b2 = b2; pos = $startpos($1) }
+ :: b
+}
+| Lwhile; e = expr; Lobra; b1 = block; Lcbra; b = block {
+ Loop { expr = e; b = b1; pos = $startpos($1) }
+ :: b
+}
+
+expr:
+| Lopar; e = expr; Lcpar { e }
+| b = Ltrue {
+ Bool { value = b; pos = $startpos(b)}
+}
+| b = Lfalse {
+ Bool { value = b; pos = $startpos(b)}
+}
+| n = Lint {
+ Int { value = n; pos = $startpos(n)}
+}
+| n = Lfloat {
+ Float { value = n; pos = $startpos(n)}
+}
+| c = Lchar {
+ Char { value = c; pos = $startpos(c)}
+}
+| s = Lstr {
+ Str { value = s; pos = $startpos(s)}
+}
+| a = expr; Lmul; b = expr {
+ Call { func = "_mul"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Ladd; b = expr {
+ Call { func = "_add"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Lsub; b = expr {
+ Call { func = "_sub"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Ldiv; b = expr {
+ Call { func = "_div"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Lmod; b = expr {
+ Call { func = "_mod"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Lge ; b = expr {
+ Call { func = "_ge"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Lgt ; b = expr {
+ Call { func = "_gt"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Lle ; b = expr {
+ Call { func = "_le"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Llt ; b = expr {
+ Call { func = "_lt"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Leq; b = expr {
+ Call { func = "_eq"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Lneq; b = expr {
+ Call { func = "_neq"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Lor ; b = expr {
+ Call { func = "_or"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Land ; b = expr {
+ Call { func = "_and"; args = [a; b]; pos = $startpos($2)}
+}
+| a = expr; Lxor ; b = expr {
+ Call { func = "_xor"; args = [a; b]; pos = $startpos($2)}
+}
+| Lputs; Lopar; a = expr; Lcpar {
+ Call { func = "_puts"; args = [a]; pos = $startpos($1)}
+}
+| Lputi; Lopar; a = expr; Lcpar {
+ Call { func = "_puti"; args = [a]; pos = $startpos($1)}
+}
+| Lgeti; Lopar; Lcpar {
+ Call { func = "_geti"; args = []; pos = $startpos($1)}
+}
+| v = Lvar {
+ Var { name = v; pos = $startpos(v)}
+}
+| v = Lvar; Lopar; a = par; Lcpar; {
+ Call { func = v
+ ; args = a
+ ; pos = $startpos(v)}
+}
+| Lsub; e = expr {
+ Call { func = "_rev"; args = [e]; pos = $startpos($1)}
+}
+| Lex; e = expr {
+ Call { func = "_not"; args = [e]; pos = $startpos($1)}
+}
+
+type_:
+| Lint_k { Int_t }
+| Lbool_k { Bool_t }
+| Lfloat_k { Float_t }
+| Lchar_k { Char_t }
+| Lvoid_k { Void_t }
+
+no_arg:
+| Lvoid_k { Void_t }
diff --git a/semantics.ml b/semantics.ml
@@ -0,0 +1,124 @@
+open Ast
+open Ast.IR
+open Baselib
+
+exception Error of string * Lexing.position
+
+let print_env key v =
+ print_endline (key ^ " " ^ (string_of_type_t v))
+
+let expr_pos expr =
+ match expr with
+ | Syntax.Void v -> v.pos
+ | Syntax.Int n -> n.pos
+ | Syntax.Float n -> n.pos
+ | Syntax.Bool b -> b.pos
+ | Syntax.Char c -> c.pos
+ | Syntax.Str s -> s.pos
+ | Syntax.Var v -> v.pos
+ | Syntax.Call c -> c.pos
+
+let errt expected given pos =
+ raise (Error (Printf.sprintf "expected %s but given %s"
+ (string_of_type_t expected)
+ (string_of_type_t given),
+ pos))
+
+let rec analyze_expr expr env =
+ match expr with
+ | Syntax.Void v -> Void, Void_t
+ | Syntax.Int n -> Int n.value, Int_t
+ | Syntax.Float n -> Float n.value, Float_t
+ | Syntax.Bool b -> Bool b.value, Bool_t
+ | Syntax.Char c -> Char c.value, Char_t
+ | Syntax.Str s -> Str s.value, Str_t
+ | Syntax.Var v ->
+ if Env.mem v.name env then
+ Var v.name, Env.find v.name env
+ else
+ raise (Error (Printf.sprintf "unbound variable '%s'" v.name,
+ v.pos))
+ | Syntax.Call c ->
+ match Env.find_opt c.func env with
+ | Some (Func_t (rt, at)) ->
+ if List.length at != List.length c.args then
+ raise (Error (Printf.sprintf "expected %d arguments but given %d"
+ (List.length at) (List.length c.args), c.pos)) ;
+ let args = List.map2 (fun eat a -> let aa, at = analyze_expr a env
+ in if at = eat then aa
+ else errt eat at (expr_pos a))
+ at c.args in
+ Call (c.func, args), rt
+ | Some _ -> raise (Error (Printf.sprintf "'%s' is not a function" c.func,
+ c.pos))
+ | None -> raise (Error (Printf.sprintf "undefined function '%s'" c.func,
+ c.pos))
+
+let rec analyze_instr instr env =
+ match instr with
+ | Syntax.Assign a ->
+ let ae, et = analyze_expr a.expr env in
+ let t = match Env.find_opt a.var env with
+ | Some t -> t
+ | None -> raise (Error (Printf.sprintf "var %s does not exist" a.var, a.pos)) in
+ if (string_of_type_t t) = (string_of_type_t et) then
+ Assign (a.var, ae), Env.add a.var et env
+ else
+ raise (Error (Printf.sprintf "var %s is of type %s" a.var (string_of_type_t t), a.pos))
+ | Syntax.Return r ->
+ let ae, _ = analyze_expr r.expr env in
+ Return ae, env
+ | Syntax.Decl d ->
+ if Env.mem d.var env then
+ raise (Error (Printf.sprintf "var %s already declared" d.var, d.pos))
+ else
+ Decl d.var, Env.add d.var d.type_ env
+ | Syntax.Expr e ->
+ let ae, t = analyze_expr e.expr env in
+ Expr ae, env
+ | Syntax.Cond e ->
+ let ae, t = analyze_expr e.expr env in
+ let b1, nenv = analyze_block e.b1 env in
+ let b2, nenv' = analyze_block e.b2 nenv in
+ Cond (ae, b1, b2), nenv'
+ | Syntax.Loop l ->
+ let ae, t = analyze_expr l.expr env in
+ let b1, nenv = analyze_block l.b env in
+ Loop (ae, b1), nenv
+
+and analyze_block block env =
+ match block with
+ | [] -> [], env
+ | instr :: rest ->
+ let ai, new_env = analyze_instr instr env in
+ let ab, nenv = analyze_block rest new_env in
+ ai :: ab, nenv
+
+let rec push_arg_in_env arg env =
+ match arg with
+ | [] -> env
+ | (type_, ident) :: cdr ->
+ push_arg_in_env cdr (Env.add ident type_ env)
+
+let analyze_def def env =
+ match def with
+ | Syntax.Func f ->
+ if Env.mem f.name env then
+ raise (Error (Printf.sprintf "func %s already declared" f.name, f.pos))
+ else
+ let types, names = List.split f.args in
+ let l, nenv = analyze_block f.body (push_arg_in_env f.args env) in
+ Func (f.name, names, l), Env.add f.name (Func_t (f.ftype, types)) env (* env is the initial, nenv is the func scope *)
+
+let rec analyze_prog prog env =
+ match prog with
+ | [] -> [], env
+ | d :: r ->
+ let f, nenv = analyze_def d env in
+ let l, nenv' = analyze_prog r nenv in
+ f :: l, nenv'
+
+let analyze parsed =
+ let l, env = analyze_prog parsed Baselib._types_ in
+(* Env.iter print_env env; *)
+ l
diff --git a/simplifier.ml b/simplifier.ml
@@ -0,0 +1,84 @@
+open Ast
+
+module Env = Map.Make(String)
+
+let collect_constant_strings code =
+ let counter = ref 0 in
+ let env = ref Env.empty in
+ let rec ccs_expr = function
+ | IR.Bool b -> IR2.Bool b, []
+ | IR.Void -> IR2.Nil, []
+ | IR.Int n -> IR2.Int n, []
+ | IR.Char c -> IR2.Char c.[0], []
+ | IR.Float f -> IR2.Float f, []
+ | IR.Str s ->
+ (match Env.find_opt s !env with
+ | Some l ->
+ IR2.Data l, []
+ | None ->
+ incr counter ;
+ let l = "str" ^ (string_of_int !counter) in
+ env := Env.add s l !env ;
+ IR2.Data l, [ (l, s) ])
+ | IR.Var v ->
+ IR2.Var v, []
+ | IR.Call (f, args) ->
+ let args2 = List.map ccs_expr args in
+ let ccs = List.flatten (List.map (fun (_, s) -> s) args2) in
+ IR2.Call (f, List.map (fun (e, _) -> e) args2), ccs
+ in
+ (* let ccs_lvalue = function *)
+ (* | IR.LVar v -> *)
+ (* IR2.LVar v, [] *)
+ (* | IR.LAddr a -> *)
+ (* let a2, ccs = ccs_expr a in *)
+ (* IR2.LAddr a2, ccs *)
+ (* in *)
+ let rec ccs_instr = function
+ | IR.Decl v ->
+ IR2.Decl v, []
+ | IR.Return e ->
+ let e2, ccs = ccs_expr e in
+ IR2.Return e2, ccs
+ | IR.Expr e ->
+ let e2, ccs = ccs_expr e in
+ IR2.Expr e2, ccs
+
+ | IR.Assign (lv, e) ->
+ let lv2, ccs_lv = ccs_expr (IR.Var lv) in
+ let e2, ccs_e = ccs_expr e in
+ IR2.Assign (lv2, e2), List.flatten [ ccs_lv; ccs_e ]
+
+ | IR.Cond (t, y, n) ->
+ let t2, ccs_t = ccs_expr t in
+ let y2, ccs_y = ccs_block y in
+ let n2, ccs_n = ccs_block n in
+ IR2.Cond (t2, y2, n2), List.flatten [ ccs_t ; ccs_y ; ccs_n ]
+
+ | IR.Loop (t, y) ->
+ let t2, ccs_t = ccs_expr t in
+ let y2, ccs_y = ccs_block y in
+ IR2.Loop (t2, y2), List.flatten [ ccs_t ; ccs_y ]
+
+ and ccs_block = function
+ | [] -> [], []
+ | i :: r ->
+ let i2, ccs_i = ccs_instr i in
+ let r2, ccs_r = ccs_block r in
+ i2 :: r2, List.flatten [ ccs_i ; ccs_r ]
+ in
+ let ccs_def = function
+ | IR.Func (name, args, body) ->
+ let body2, ccs = ccs_block body in
+ IR2.Func (name, args, body2), ccs
+ in
+ let rec ccs_prog = function
+ | [] -> [], []
+ | d :: r ->
+ let d2, ccs_d = ccs_def d in
+ let r2, ccs_r = ccs_prog r in
+ d2 :: r2, List.flatten [ ccs_d ; ccs_r ]
+ in ccs_prog code
+
+let simplify code =
+ collect_constant_strings code
diff --git a/test.ml b/test.ml
@@ -0,0 +1,31 @@
+open Lexing
+open Ast
+open Ast.IR2
+
+let err msg pos =
+ Printf.eprintf "Error on line %d col %d: %s.\n"
+ pos.pos_lnum (pos.pos_cnum - pos.pos_bol) msg ;
+ exit 1
+
+let () =
+ if (Array.length Sys.argv) != 2 then begin
+ Printf.eprintf "Usage: %s <file>\n" Sys.argv.(0) ;
+ exit 1
+ end;
+ let f = open_in Sys.argv.(1) in
+ let buf = Lexing.from_channel f in
+ try
+ let parsed = Parser.prog Lexer.token buf in
+ close_in f ;
+ let ast = Semantics.analyze parsed in
+(* print_endline (IR.string_of_ir ast); *)
+ let simplified = Simplifier.simplify ast in
+ let compiled = Compiler.compile simplified in
+ Mips.print_asm Stdlib.stdout compiled
+ with
+ | Lexer.Error c ->
+ err (Printf.sprintf "unrecognized char '%c'" c) (Lexing.lexeme_start_p buf)
+ | Parser.Error ->
+ err "syntax error" (Lexing.lexeme_start_p buf)
+ | Semantics.Error (msg, pos) ->
+ err msg pos
diff --git a/tests/0.test b/tests/0.test
@@ -0,0 +1 @@
+// fichier vide
diff --git a/tests/1.test b/tests/1.test
@@ -0,0 +1 @@
+return 1312;
diff --git a/tests/10.test b/tests/10.test
@@ -0,0 +1,7 @@
+// Hello, world!
+
+/* This
+ * is a
+ * multiline
+ * comment
+ */
diff --git a/tests/11.test b/tests/11.test
@@ -0,0 +1,10 @@
+char a = 'A';
+a = 'B';
+
+char b;
+b = 'a';
+
+int c;
+c = 1 + 10;
+
+return a;
diff --git a/tests/12.test b/tests/12.test
@@ -0,0 +1,11 @@
+int add(int a, int b)
+{
+ int a = 21;
+ return a;
+}
+
+int main(int argc, char argv)
+{
+ int a = 42;
+ return a;
+}
diff --git a/tests/13.test b/tests/13.test
@@ -0,0 +1,12 @@
+int main()
+{
+ int c;
+ int d;
+ return c + d;
+}
+
+int add(int a, int b)
+{
+ int f = 42;
+ return d;
+}
diff --git a/tests/14.test b/tests/14.test
@@ -0,0 +1,11 @@
+int main()
+{
+ int c = 1;
+ return 0;
+}
+
+int add(int a, int b)
+{
+ /* int d = c; // should fail */
+ return a + b;
+}
diff --git a/tests/15.test b/tests/15.test
@@ -0,0 +1,11 @@
+int add(int a, int b)
+{
+ int z;
+ return a + b;
+}
+
+int main(int argc, char argv)
+{
+ int z = 12;
+ return z;
+}
diff --git a/tests/16.test b/tests/16.test
@@ -0,0 +1,15 @@
+int foo(int a, int b)
+{
+ return a + b;
+}
+
+int main()
+{
+ int a = 1;
+ int b = 2;
+ foo(a, b);
+
+ int e = foo(a,b);
+
+ return 0;
+}
diff --git a/tests/17.test b/tests/17.test
@@ -0,0 +1,12 @@
+int add(int a, int b)
+{
+ return a + b;
+}
+
+int main()
+{
+ int res = add(21, 21);
+ int res2 = add(res, add(res, res));
+
+ return res;
+}
diff --git a/tests/18.test b/tests/18.test
@@ -0,0 +1,10 @@
+void hello()
+{
+ puts("Hello, wolrd!");
+}
+
+int main()
+{
+ hello();
+ return 0;
+}
diff --git a/tests/19.test b/tests/19.test
@@ -0,0 +1,13 @@
+int add_int(int a, int b)
+{
+ return a + b;
+}
+
+int main()
+{
+ int a = 21;
+ int b = 21;
+
+ puti(add_int(a, b));
+ return 0;
+}
diff --git a/tests/2.test b/tests/2.test
@@ -0,0 +1 @@
+return 21 * 2;
diff --git a/tests/3.test b/tests/3.test
@@ -0,0 +1 @@
+x = 42;
diff --git a/tests/4.test b/tests/4.test
@@ -0,0 +1,2 @@
+x = 42;
+return x;
diff --git a/tests/5.test b/tests/5.test
@@ -0,0 +1,3 @@
+x = 500 + 150 + 6;
+y = x * 2;
+return y;
diff --git a/tests/6.test b/tests/6.test
@@ -0,0 +1,3 @@
+x = 2 * (2 + 6);
+y = x * 2;
+return y;
diff --git a/tests/7.test b/tests/7.test
@@ -0,0 +1,3 @@
+x = 2.0 * (2.2 + 6.1) + 2.1;
+y = x * 2;
+return y;
diff --git a/tests/8.test b/tests/8.test
@@ -0,0 +1,5 @@
+w = '\t';
+x = 'a';
+y = '\\';
+z = '\'';
+return x;
diff --git a/tests/9.test b/tests/9.test
@@ -0,0 +1,4 @@
+a = "Hello, world!";
+b = "";
+c = "\nfoo\tbar";
+return a;