cse322, programming languages and compilers 1 7/12/2015 lecture #8, april 26, 2007 using comments...

47
Cse322, Programming Languages and Compilers 1 03/21/22 Lecture #8, April 26, 2007 Using comments Example output from Phase1 Language Semantics (Operational, Axiomatic, Denotational) An interpreter for IR1

Post on 22-Dec-2015

213 views

Category:

Documents


0 download

TRANSCRIPT

Cse322, Programming Languages and Compilers

104/19/23

Lecture #8, April 26, 2007•Using comments•Example output from Phase1•Language Semantics (Operational, Axiomatic, Denotational)•An interpreter for IR1

Cse322, Programming Languages and Compilers

204/19/23

Assignments

• Project #1 due Wednesday, May 3, 2007

• Recall Midterm Exam on Tuesday May 1, 2007.

– In class, 1.5 hours

– Next class meeting

– two days before Project 1 is due.

Cse322, Programming Languages and Compilers

304/19/23

Using COMMENTS

and STMT

= MOVE of EXP * EXP

| JUMP of LABEL

| CJUMP of ProgramTypes.RELOP * EXP * EXP * LABEL

| LABEL of LABEL

| CALLST of EXP * EXP list

| RETURN of EXP

| STMTlist of STMT list

| COMMENT of (STMT * string);

Cse322, Programming Languages and Compilers

404/19/23

Printing commentsand sSTMTS [] = "\n" | sSTMTS (COMMENT(LABEL n,s) :: (x :: xs)) = (fill (sSTMT (LABEL n)) 6)^ (fill (sSTMT x) 32 ^ " % "^s^"\n")^ (sSTMTS xs) | sSTMTS (COMMENT(LABEL n,s) :: []) = (fill (sSTMT (LABEL n)) 6)^ (fill " " 32^ " % "^s^"\n") | sSTMTS (COMMENT(x,s) :: xs) = (fill " " 6)^ (fill (sSTMT x) 32 ^ " % "^s^"\n")^ (sSTMTS xs) | sSTMTS (LABEL n :: (x :: xs)) = (fill (sSTMT (LABEL n)) 6)^ (sSTMT x^"\n")^ (sSTMTS xs) | sSTMTS (x::xs) = (" "^(sSTMT x)^"\n")^ (sSTMTS xs)

Cse322, Programming Languages and Compilers

504/19/23

x !! (y && (not z))- test ex3;{L0: if MEM(v1) == 1 GOTO L1 % Entry: x JUMP L4L4: if MEM(v2) == 1 GOTO L5 % Entry: y && (!z) JUMP L2L5: if MEM(param1) == 1 GOTO L2 % Entry: !z JUMP L1L1: t0 := 1 % True: x || (y && (!z)) JUMP L3L2: t0 := 0 % False: x || (y && (!z))L3: % Exit: x || (y && (!z))

}[t0]

Cse322, Programming Languages and Compilers

604/19/23

val it = ESEQ ([COMMENT (LABEL 0,"Entry: x"), CJUMP (EQ,MEM (VAR 1),CONST ("1",Bool),1), JUMP 4,COMMENT (LABEL 4,"Entry: y && (!z)"), CJUMP (EQ,MEM (VAR 2),CONST ("1",Bool),5),JUMP 2, COMMENT (LABEL 5,"Entry: !z"), CJUMP (EQ,MEM (PARAM 1),CONST ("1",Bool),2),JUMP 1, COMMENT (LABEL 1,"True: x || (y && (!z))"), MOVE (TEMP 0,CONST ("1",Bool)),JUMP 3, COMMENT (LABEL 2,"False: x || (y && (!z))"), MOVE (TEMP 0,CONST ("0",Bool)), COMMENT (LABEL 3,"Exit: x || (y && (!z))")],TEMP 0) :

EXP

Cse322, Programming Languages and Compilers

704/19/23

Using the comment when generating code

fun label n comment = COMMENT(LABEL n,comment)

fun pass1S env stmt =case stmt of If(test,tstmt,estmt) => let val startL = newLabel() val thenL = newLabel() val elseL = newLabel() in (label startL ("Test of if "^(showExp test)) ::

shortcut env test thenL elseL) @ (label thenL ("Then of if "^ (showExp test)) :: pass1S env tstmt) @ (label elseL ("Else of if "^ (showExp test)):: pass1S env estmt) end

Cse322, Programming Languages and Compilers

804/19/23

Example Project output• The project should translate a ProgramTypes.Program

to ( IR1.FUNC list )

• You can use the function Driver.test to build a translator driver.

• Driver.test: string -> ClassDec list * TypeChecker.Env• And the IR1.sFUNC to print the results for inspection

fun translate filename =

case Driver.test filename of

(typedCList,typecheckEnv) =>

let val funclist =

Phase1.pass1P []

(Program typedCList)

in List.app IR1.sFUNC funclist end;

Cse322, Programming Languages and Compilers

904/19/23

Test file 1 in miniJava

class test01 {

public static void main(String[] a) {

System.out.println("TEST01");

System.out.println(123);

System.out.println();

System.out.println(true);

}

} test01_main(String (Param 0)) ; prString("TEST01") prInt(123) prString("") prInt(true)

Cse322, Programming Languages and Compilers

1004/19/23

Test file #2class test02 { public static void main(String[] a) { boolean b; int i; double d; b = true; i = 1 + 1; d = 3.0 * 4.0; System.out.println(b); System.out.println(i); System.out.println(d); }}

test02_main(String (Param 0)) boolean (Var 0), int (Var 1), real (Var 2); MEM(v1) := true MEM(v2) := 1 + 1 MEM(v3) := 3.0 * 4.0 prInt(MEM(v1)) prInt(MEM(v2)) prInt(MEM(v3))

Cse322, Programming Languages and Compilers

1104/19/23

Test file #2 results as dataval it =

[FUNC

("test02_main",[ObjType "String"],

[BasicType Bool,BasicType Int,BasicType Real],

[MOVE (MEM (VAR 1),CONST ("true",Bool)),

MOVE (MEM (VAR 2),

BINOP (ADD,CONST ("1",Int),CONST ("1",Int))),

MOVE (MEM (VAR 3),

BINOP (MUL,CONST ("3.0",Real),

CONST ("4.0",Real))),

CALLST (NAME "prInt",[MEM (VAR 1)]),

CALLST (NAME "prInt",[MEM (VAR 2)]),

CALLST (NAME "prInt",[MEM (VAR 3)])])]

: IR1.FUNC list

Cse322, Programming Languages and Compilers

1204/19/23

Test file # 3

class test03 {

public static void main(String[] a) {

boolean b = (1>2) || (3<4) && !false;

int i = 2 + 2 * 4 - 9 / 3;

double d = (0.1 + 1.9) * (6.1 - 4.0);

System.out.println(b);

System.out.println(i);

System.out.println(d);

}

}

Cse322, Programming Languages and Compilers

1304/19/23

Test file #3 translated and pretty printed

test03_main(String (Param 0)) boolean (Var 0), int (Var 1), real (Var 2); v1 := {L0: if 1 > 2 GOTO L1 % Entry: 1 > 2 JUMP L4L4: if 3 < 4 GOTO L5 % Entry: (3 < 4) && (!false) JUMP L2L5: if false == 1 GOTO L2 % Entry: !false JUMP L1L1: t8 := 1 % True: (1 > 2) || ((3 < 4) && (!false)) JUMP L3L2: t8 := 0 % False: (1 > 2) || ((3 < 4) && (!false)L3: % Exit: (1 > 2) || ((3 < 4) && (!false))

} [t8] v2 := 2 + 2 * 4 - 9 / 3 v3 := 0.1 + 1.9 * 6.1 - 4.0 prInt(MEM(v1)) prInt(MEM(v2)) prInt(MEM(v3))

red text is the statement part

of an ESEQ

Cse322, Programming Languages and Compilers

1404/19/23

Test file #3 as dataval it = [FUNC ("test03_main",[ObjType "String"], [BasicType Bool,BasicType Int,BasicType Real], [MOVE (VAR 1, ESEQ ([COMMENT (LABEL 0,"Entry: 1 > 2"), CJUMP (GT,CONST ("1",Int),CONST ("2",Int),1),JUMP 4, COMMENT (LABEL 4,"Entry: (3 < 4) && (!false)"), CJUMP (LT,CONST ("3",Int),CONST ("4",Int),5),JUMP 2, COMMENT (LABEL 5,"Entry: !false"), CJUMP (EQ,CONST ("false",Bool),CONST ("1",Bool),2),JUMP 1, COMMENT (LABEL 1,"True: (1 > 2) || ((3 < 4) && (!false))"), MOVE (TEMP 8,CONST ("1",Bool)),JUMP 3, COMMENT (LABEL 2,"False: (1 > 2) || ((3 < 4) && (!false))"), MOVE (TEMP 8,CONST ("0",Bool)), COMMENT (LABEL 3,"Exit: (1 > 2) || ((3 < 4) && (!false))")], TEMP 8)), MOVE (VAR 2, BINOP (SUB, BINOP (ADD,CONST ("2",Int), BINOP (MUL,CONST ("2",Int),CONST ("4",Int))), BINOP (DIV,CONST ("9",Int),CONST ("3",Int)))), MOVE (VAR 3, BINOP (MUL,BINOP (ADD,CONST ("0.1",Real),CONST ("1.9",Real)), BINOP (SUB,CONST ("6.1",Real),CONST ("4.0",Real)))), CALLST (NAME "prInt",[MEM (VAR 1)]), CALLST (NAME "prInt",[MEM (VAR 2)]), CALLST (NAME "prInt",[MEM (VAR 3)])])] : IR1.FUNC list-

Cse322, Programming Languages and Compilers

1504/19/23

Class fields (instance var) initialization

• When and how are instance variables initialized?

class test03 {

int x = 5;

int y = x + 3;

public int test(int param1, int param2) {

System.out.println(param1 – param2 + y);

}

}

When a new object is allocated.

Cse322, Programming Languages and Compilers

1604/19/23

Two strategies

1. When translating new, inline the code for each instance variable

2. When translating classes, generate a class initialization function. When translating a new exp call the initialization function.

Cse322, Programming Languages and Compilers

1704/19/23

• Translating test03

• Two funcs are generated.

Func(“test03_test”,[param1,param2],[], … )Func(“test03_init”,[self],[, {self.x := 5; self.x := self.y + 3;} )

new test03

translates to a IR1 statement list like

ptr := malloc (test03_size)callST(NAME “test03_init”,[ptr])

class test03 {int x = 5;int y = x + 3;public int test(int param1, int param2) { System.out.println(param1 – param2 + y);}}

Cse322, Programming Languages and Compilers

1804/19/23

Semantics of languages• Semantics = “Meaning”• Programming language semantics describe

behavior of a language (rather than its syntax).• All Languages have informal semantics:

– e.g., “This expression is evaluated by evaluating the argument expressions left-to-right to obtain actual parameter values, and then executing the function procedure specified by ID with its formal parameters bound to the actual parameter values.”

– Usually in English; imprecise; assumes implicit knowledge.

• Idea of formal semantics:– Describe behavior in terms of a formalism.– To be useful, formalism should be simpler and/or better-understood

than original language.Possible formalisms include:

» logic» mathematical theory» abstract machines

Cse322, Programming Languages and Compilers

1904/19/23

Why bother with formal semantics?

• Want a precise description of language behavior that can be used by programmer and implementer.

• Formal semantics gives a machine-independent reference for correctness of implementations.

• Can be used to prove properties of languages.– E.g., Security property: a well-typed program cannot “dump core” at

runtime.

• May improve language design by encouraging “cleaner” semantics (much as BNF aided language syntax design).

Cse322, Programming Languages and Compilers

2004/19/23

Kinds of semantics

• Three traditional rough categories:– Operational Semantics

» Describe behavior in terms of an operational model, such as an abstract machine with a specified instruction set.

– Axiomatic Semantics

» Describe behavior using a logical system containing specified axioms and rules of inference.

– Denotational Semantics

» Describe behavior by giving each language phrase a meaning (“denotation” ) in some mathematical model.

• None of these approaches is entirely satisfactory (esp. compared to BNF approach to syntax).

• No one “best” approach -- different forms may be useful for different purposes.

Cse322, Programming Languages and Compilers

2104/19/23

Syntax and Semantics

• All these kinds of semantics are structured around language syntax.

• Useful formalisms try to be compositional: the meaning of the whole is based on the meaning of the parts:

– semantics specifies meaning of primitive elements of the language (AST leaves)

– and of combining elements in the language (AST internal nodes)

• Semantics can be described or computed by defining an attribute grammar over the language.

Cse322, Programming Languages and Compilers

2204/19/23

Operational Semantics• Define behavior of language constructs by describing

how they affect the state of an abstract machine.

• Abstract machine generally defined by a finite state and a set of legal state transitions (instructions).

– Like a real machine, only simpler.

• Semantics is specified by giving a translation from the source language to the instruction set of the abstract machine (a compiler!)

• Machine can be high-level (complicated states and instructions) or low-level (simple states and instructions).

– The lower the machine’s level, the more is explained by the semantics, but the more complicated they get.

– Note similarity to choice of intermediate code level.

Cse322, Programming Languages and Compilers

2304/19/23

Interpreters

• An interpreter is a simple operational semantics

• Interpreters are easy to write• They act as a reference implementations.• If there is ever any question, run the

interpreter and find out the answer.

Cse322, Programming Languages and Compilers

2404/19/23

Axiomatic Semantics

• Describe language in terms of assertions about how statements affect predicates on program variables.

• The assertion– {P} S {Q}

says that if P is true before the execution of S, then Q will be true after the execution of S.

• Examples:

{y = 3} x := y + 1 {x = 4}

{y = 0 ^ x = c} while x > 0 do

y := y + 1;

x := x - 1

end {x = 0 ^ y = c}

Cse322, Programming Languages and Compilers

2504/19/23

Axioms and rules of inference• Axioms are simple assertions guaranteed to be

true in the language, e.g.:– {P[y/x]} x := y {P}– – where P[y/x] means P with every instance of x replaced by y.

• Rules of inference are rules for deriving a true assertion from other true assertions, e.g.:

{P} S {Q} {Q} T {R}

{P} S;T {R}

{P ^ B} S {P}

{P} while B do S {P ^ ~B}

Cse322, Programming Languages and Compilers

2604/19/23

Uses for Axiomatic Semantics• May be used for proving programs “correct”.

– I.e., given, axioms and rules of inference of the language, show that a given assertion about a given program is true.

– Example: Prove

{k > 0} Prog {sum = 1 + 2 + ... + k}

– where Prog is

i := k; sum := k;

while i > 1 do

i := i - 1;

sum := sum + i

end;

Can be done by repeated application of axioms and rules.

• Axiomatic methods become unwieldy in presence of side effects and aliasing (multiple names for one storage location).

• For handling real programs, automated “proof assistant” is essential.

Cse322, Programming Languages and Compilers

2704/19/23

Denotational Semantics

• Program statements and expressions denote mathematical functions between abstract semantic domains.

– In particular, the program as a whole denotes a function from some domain of inputs to some domain of answers.

• Semantics are specified as a set of denotation functions mapping pieces of program syntax to suitable mathematical functions.

– Functions are attached to corresponding grammatical constructs using synthesized attribute grammars.

• Proper definition of semantic domains is complicated subject -- we’ll ignore.

• Common notation: x.e is an anonymous function with argument x and body e.

x.x+1 y.if y < 0 then -y else y

Cse322, Programming Languages and Compilers

2804/19/23

Denotational Semantics and Straight-line programs

• Semantic domains:– V = Int (values)– Ide (identifiers)– S = Ide -> V (stores)– Exp = S -> V (expressions)– Stm = S -> S (statements)

• Denotation functions (from syntactic class to semantic domain):– I: ID -> Ide– N: NUM -> V– E: exp -> Exp– S: stm -> Stm

• Auxiliary functions:– plus: V x V -> V– update: (S x Ide x V) -> S

Cse322, Programming Languages and Compilers

2904/19/23

Example denotational definitionstm -> ID := exp

S[stm] = s.update(s,I[ID],E[exp]s)stm -> stm1; stm2

S[stm] = s.S[stm2](S[stm1]s)exp -> NUM

E[exp] = s.N[NUM]exp -> ID

E[exp] = s.s(I[ID])exp -> exp1 + exp2

E[exp] = s.plus(E[exp1]s,E[exp2]s)exp -> (exp1)

E[exp] = E[exp1]

N[NUM] = NUM.num

I[ID] = ID.ident

Cse322, Programming Languages and Compilers

3004/19/23

Facts about stores and updates

• Definition of update:update = (s,id,v). id1.if id = id1 then v else s id1

• Fact A: For any s0,x,i:(update(s0,x,i)) x

= ( id1.if x = id1 then i else s0 id1) x= (if x = x then i else s0 x)

= i

• Fact B: For any s0,x,i,j :update(update(s0,x,i),x,j) =

update(s0,x,j)

Cse322, Programming Languages and Compilers

3104/19/23

Review questions 1

• Discuss the reason for machine-independent intermediate code representations.

• Registers are a resource that needs managing. Discuss the relevant issues.

• Discuss two possible ways a variable could be aliased.

• When emitting 3-address code for a commutative operator like plus, does the order in which we emit the code (left operand first, v.s. right operand first) matter?

• Write and use anonymous functions while using the List library.

• Discuss possible ways of representing booleans in intermediate code translation.

Cse322, Programming Languages and Compilers

3204/19/23

Review questions 2• Write and use a short circuit evaluation

intermediate code translator.• Perform array address calculations for general

arrays. What things must be considered?• Discuss and be able to use different string

representations.• Discuss problems and strategies to over come

each in structure layout.• Discuss representation of union types. Give

two examples of union-types in language you know.

• Discuss problems when generating code for control statements like if, while, do etc.

• Discuss strategies for translating case statements. what interactions with the IR need to be considered.

Cse322, Programming Languages and Compilers

3304/19/23

Review questions 3

• Discuss possible layout of memory structures in object-oriented languages.

• Discuss possible layout of memory structures in statically scoped languages like Pascal and ML.

• What translation problems are we abstracting in the use of IR1?

• How do we handle the initialization of variables in the IR1 translation process. Discuss both instance variables and local variables.

• Name three kinds of semantics, and describe a possible use for each one.

• Discuss reasons why a reference-interpreter might be defined.

Cse322, Programming Languages and Compilers

3404/19/23

An Interpreter for IR1

• We will write an interpreter for IR1

• Because we write in ML a mostly functional language, our interpreter is similar in parts to

– denotational (it has domains and denotations) and

– operational semantics (it has an abstract machine).

Cse322, Programming Languages and Compilers

3504/19/23

The abstract machinetype env = { mem : int Array.array , temps : int Array.array , names : (string * VAL ref) list , paramReg: int , varReg: int , stackPtr : int , heapPtr : int , goto : LABEL -> STMT list }

Memory, a register file, statically known method and library functions, several registers, and a set of known labels.

Cse322, Programming Languages and Compilers

3604/19/23

ML info

• In ML a record is lableled product type.type env =

{ mem : int Array.array

, temps : int Array.array

, names : (string * VAL ref) list

, paramReg: int

, varReg: int

, stackPtr : int

, heapPtr : int

, goto : LABEL -> STMT list }

The labels can be used as functions to select pieces of the record.

For example (#mem r) selects the mem field from the record r.

Cse322, Programming Languages and Compilers

3704/19/23

Denotations

• When we compute, we expect three kinds of values.

– integers (represents int and bool and addresses )

– functions (methods and library functions)

– strings (as arguments to print)

datatype VAL

= VNUM of int

| VFUN of (int -> VAL list -> VAL)

| VSTR of string

Cse322, Programming Languages and Compilers

3804/19/23

Meaning functions• Each syntactic class is given meaning by a

function– EXP– STMT– FUNC

evalEXP: env -> EXP -> VAL

evalSTMTs: env -> STMT list -> VAL

evalFUNC : int array -> (string * VAL ref) list -> int array -> FUNC -> int -> 'a list -> Val

Cse322, Programming Languages and Compilers

3904/19/23

Memory

• We view memory as a block (array) of contiguous storage.

• We break it into two parts– The stack

– The heap

• They both grow towards each other.

stackPtrheapPtr

low addresses high addresses

Cse322, Programming Languages and Compilers

4004/19/23

Evaluating expressionsfun evalEXP (r:env) e = case e of

BINOP(bop,e1,e2) =>

(case (evalEXP r e1,evalEXP r e2) of

(VNUM n, VNUM m) =>

VNUM(ProgramTypes.evalBINOP bop n m)

| _ => raise (EvalError "Non-number in BINOP"))

| RELOP(rop,e1,e2) =>

(case (evalEXP r e1,evalEXP r e2) of

(VNUM n, VNUM m) =>

VNUM(bool2int(ProgramTypes.evalRELOP rop n m))

| _ => raise (EvalError " on-number in RELOP"))

| CALL(func,args) =>

fromFun (evalEXP r func)

(#stackPtr r) (map (evalEXP r) args)

Cse322, Programming Languages and Compilers

4104/19/23

Evaluating Operatorsfun evalBINOP binop x y = case binop of ADD => x+y| SUB => x-y| MUL => x*y| DIV => x*y(* Here, we assume positive represents true and zero represents false. *)

| AND => x*y (* n*n > 0 and n*0, 0*0, 0*n = 0 (for n>0) *)| OR => x+y (* 0+0 = 0 and n+n, n+0, 0+n > 0 (for n>0) *)

fun bool2int true = 1 | bool2int false = 0

fun evalRELOP relop x y = case relop of EQ => (x = y)| NE => (x <> y)| LT => (x < y)| LE => (x <= y)| GT => (x > y)| GE => (x >= y)

Cse322, Programming Languages and Compilers

4204/19/23

more expressions| MEM(addr) => VNUM (Array.sub(#mem r, fromNum(evalEXP r addr)))

| NAME(name) => (case List.find (fn (x,v) => name=x) (#names r) of SOME (_,v) => !v | NONE => raise (EvalError ("bad name :" ^ name)))

| TEMP(i) => VNUM (Array.sub(#temps r, i))

| PARAM(i)=> VNUM (Array.sub(#mem r, #paramReg r + i))

| VAR(i) => VNUM (Array.sub(#mem r, #varReg r + i))

Cse322, Programming Languages and Compilers

4304/19/23

even more| MEMBER(obj,i) =>

VNUM (Array.sub(#mem r,

i + fromNum (evalEXP r obj)))

| CONST(str,ProgramTypes.Int) =>

VNUM(valOf(Int.fromString str))

| CONST("0",ProgramTypes.Bool) => VNUM 0

| CONST("1",ProgramTypes.Bool) => VNUM 1

| STRING(str) => VSTR str

| ESEQ(stmts,result) =>

(evalSTMTs (newGoto r stmts) stmts;

evalEXP r result)

Cse322, Programming Languages and Compilers

4404/19/23

and evalSTMTs (r:env) stmts = case stmts of [] => raise (EvalError "empty statement list") | (stmt::rest) => (case stmt of MOVE(dst,src) => (Array.update ( #mem r , fromNum (evalEXP r dst) , fromNum (evalEXP r src) ) ; evalSTMTs r rest ) | LABEL(label) = evalSTMTs r rest | CALLST(func,args) => ( fromFun (evalEXP r func) (#stackPtr r) (map (evalEXP r) args) ; evalSTMTs r rest ) | STMTlist(stmts) => evalSTMTs r (stmts @ rest)

Cse322, Programming Languages and Compilers

4504/19/23

| JUMP(label) =>

evalSTMTs r (#goto r label)

| CJUMP(relop,e1,e2,label) =>

if ProgramTypes.evalRELOP relop

(fromNum (evalEXP r e1))

(fromNum (evalEXP r e2))

then evalSTMTs r (#goto r label)

else evalSTMTs r rest

| RETURN(e) => evalEXP r e

| COMMENT(x,message) => evalSTMTs r (x::rest)

Cse322, Programming Languages and Compilers

4604/19/23

Jumpsfun scan (LABEL thislab :: stmts) lab = if thislab = lab then stmts else scan stmts lab | scan (STMTlist ys :: stmts) lab = scan (ys @ stmts) lab | scan (COMMENT(s,mess) :: stmts) lab = scan (s::stmts) lab | scan (stmt::stmts) lab = scan stmts lab | scan [] lab = raise (EvalError "bad label")

fun newGoto ({mem =a, temps = t, names=b, paramReg=c, varReg = e, stackPtr = x, goto = f}) stmts = {mem =a, temps =t, names=b, paramReg =c, varReg = e, stackPtr = x, goto = scan stmts};

Cse322, Programming Languages and Compilers

4704/19/23

Next time

• Interpreting FUNC objects• Dealing with Library functions• Managing the stack and the heap.