359 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Factor
		
	
	
		
		
			
		
	
	
			359 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Factor
		
	
	
|  | ! Copyright (C) 2009 Slava Pestov. | ||
|  | ! See http://factorcode.org/license.txt for BSD license. | ||
|  | USING: slides help.markup math math.private kernel sequences | ||
|  | slots.private ;
 | ||
|  | IN: jvm-summit-talk | ||
|  | 
 | ||
|  | CONSTANT: jvm-summit-slides | ||
|  | { | ||
|  |     { $slide "Factor language implementation" | ||
|  |         "Goals: expressiveness, metaprogramming, performance" | ||
|  |         "We want a language for anything from scripting DSLs to high-performance numerics" | ||
|  |         "I assume you know a bit about compiler implementation: parser -> frontend -> optimizer -> codegen" | ||
|  |         { "This is " { $strong "not" } " a talk about the Factor language" } | ||
|  |         { "Go to " { $url "http://factorcode.org" } " to learn the language" } | ||
|  |     } | ||
|  |     { $slide "Why are dynamic languages slow?" | ||
|  |         "Branching and indirection!" | ||
|  |         "Runtime type checks and dispatch" | ||
|  |         "Integer overflow checks" | ||
|  |         "Boxed integers and floats" | ||
|  |         "Lots of allocation of temporary objects" | ||
|  |     } | ||
|  |     { $slide "Interactive development" | ||
|  |         "Code can be reloaded at any time" | ||
|  |         "Class hierarchy might change" | ||
|  |         "Slots may be added and removed" | ||
|  |         "Functions might be redefined" | ||
|  |     } | ||
|  |     { $slide "Factor's solution" | ||
|  |         "Factor implements most of the library in Factor" | ||
|  |         "Library contains very generic, high-level code" | ||
|  |         "Always compiles to native code" | ||
|  |         "Compiler removes unused generality from high-level code" | ||
|  |         "Inlining, specialization, partial evaluation" | ||
|  |         "And deoptimize when assumptions change" | ||
|  |     } | ||
|  |     { $slide "Introduction: SSA form" | ||
|  |         "Every identifier only has one global definition" | ||
|  |         { | ||
|  |             "Not SSA:" | ||
|  |             { $code | ||
|  |                 "x = 1" | ||
|  |                 "y = 2" | ||
|  |                 "x = x + y" | ||
|  |                 "if(z < 0)" | ||
|  |                 "    t = x + y" | ||
|  |                 "else" | ||
|  |                 "    t = x - y" | ||
|  |                 "print(t)" | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     { $slide "Introduction: SSA form" | ||
|  |         "Rename re-definitions and subsequent usages" | ||
|  |         { | ||
|  |             "Still not SSA:" | ||
|  |             { $code | ||
|  |                 "x = 1" | ||
|  |                 "y = 2" | ||
|  |                 "x1 = x + y" | ||
|  |                 "if(z < 0)" | ||
|  |                 "    t = x1 + y" | ||
|  |                 "else" | ||
|  |                 "    t = x1 - y" | ||
|  |                 "print(t)" | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     { $slide "Introduction: SSA form" | ||
|  |         "Introduce “φ functions” at control-flow merge points" | ||
|  |         { | ||
|  |             "This is SSA:" | ||
|  |             { $code | ||
|  |                 "x = 1" | ||
|  |                 "y = 2" | ||
|  |                 "x1 = x + y" | ||
|  |                 "if(z < 0)" | ||
|  |                 "    t1 = x1 + y" | ||
|  |                 "else" | ||
|  |                 "    t2 = x1 - y" | ||
|  |                 "t3 = φ(t1,t2)" | ||
|  |                 "print(t3)" | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     { $slide "Why SSA form?" | ||
|  |         { | ||
|  |             "Def-use chains:" | ||
|  |             { $list | ||
|  |                 "Defs-of: instructions that define a value" | ||
|  |                 "Uses-of: instructions that use a value" | ||
|  |             } | ||
|  |             "With SSA, defs-of has exactly one element" | ||
|  |         } | ||
|  |     } | ||
|  |     { $slide "Def-use chains" | ||
|  |         "Simpler def-use makes analysis more accurate." | ||
|  |         { | ||
|  |             "Non-SSA example:" | ||
|  |             { $code | ||
|  |                 "if(x < 0)" | ||
|  |                 "    s = new Circle" | ||
|  |                 "    a = area(s1)" | ||
|  |                 "else" | ||
|  |                 "    s = new Rectangle" | ||
|  |                 "    a = area(s2)" | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     { $slide "Def-use chains" | ||
|  |         { | ||
|  |             "SSA example:" | ||
|  |             { $code | ||
|  |                 "if(x < 0)" | ||
|  |                 "    s1 = new Circle" | ||
|  |                 "    a1 = area(s1)" | ||
|  |                 "else" | ||
|  |                 "    s2 = new Rectangle" | ||
|  |                 "    a2 = area(s2)" | ||
|  |                 "a = φ(a1,a2)" | ||
|  |             } | ||
|  |              | ||
|  |         } | ||
|  |     } | ||
|  |     { $slide "Factor compiler overview" | ||
|  |         "High-level SSA IR constructed from stack code" | ||
|  |         "High level optimizer transforms high-level IR" | ||
|  |         "Low-level SSA IR is constructed from high-level IR" | ||
|  |         "Low level optimizer transforms low-level IR" | ||
|  |         "Register allocator runs on low-level IR" | ||
|  |         "Machine IR is constructed from low-level IR" | ||
|  |         "Code generation" | ||
|  |     } | ||
|  |     { $slide "High-level optimizer" | ||
|  |         "Frontend: expands macros, inline higher order functions" | ||
|  |         "Propagation: inline methods, constant folding" | ||
|  |         "Escape analysis: unbox tuples" | ||
|  |         "Dead code elimination: clean up" | ||
|  |     } | ||
|  |     { $slide "Higher-order functions" | ||
|  |         "Almost all control flow is done with higher-order functions" | ||
|  |         { { $link if } ", " { $link times } ", " { $link each } } | ||
|  |         "Calling a block is an indirect jump" | ||
|  |         "Solution: inline higher order functions at the call site" | ||
|  |         "Inline the block body at the higher order call site in the function" | ||
|  |         "Record inlining in deoptimization database" | ||
|  |     } | ||
|  |     { $slide "Generic functions" | ||
|  |         "A generic function contains multiple method bodies" | ||
|  |         "Dispatches on the class of argument(s)" | ||
|  |         "In Factor, generic functions are single dispatch" | ||
|  |         "Almost equivalent to message passing" | ||
|  |     } | ||
|  |     { $slide "Tuple slot access" | ||
|  |         "Slot readers and writers are generic functions" | ||
|  |         "Generated automatically when you define a tuple class" | ||
|  |         { "The generated methods call " { $link slot } ", " { $link set-slot } " primitives" } | ||
|  |         "These primitives are not type safe; the generic dispatch performs the type checking for us" | ||
|  |         "If class of dispatch value known statically, inline method" | ||
|  |         "This may result in more methods inlining from additional specialization" | ||
|  |     } | ||
|  |     { $slide "Generic arithmetic" | ||
|  |         { { $link + } ", " { $link * } ", etc perform a double dispatch on arguments" } | ||
|  |         { "Fixed-precision integers (" { $link fixnum } "s) upgrade to " { $link bignum } "s automatically" } | ||
|  |         "Floats and complex numbers are boxed, heap-allocated" | ||
|  |         "Propagation of classes helps for floats" | ||
|  |         "But not for fixnums, because of overflow checks" | ||
|  |         "So we also propagate integer intervals" | ||
|  |         "Interval arithmetic: etc, [a,b] + [c,d] = [a+c,b+d]" | ||
|  |     } | ||
|  |     { $slide "Slot value propagation" | ||
|  |         "Complex numbers are even trickier" | ||
|  |         "We can have a complex number with integer components, float components" | ||
|  |         "Even if we inline complex arithmetic methods, still dispatching on components" | ||
|  |         "Solution: propagate slot info" | ||
|  |     } | ||
|  |     { $slide "Constrant propagation" | ||
|  |         "Contrieved example:" | ||
|  |         { $code | ||
|  |             "x = •" | ||
|  |             "b = isa(x,array)" | ||
|  |             "if(b)" | ||
|  |             "    a = length(x)" | ||
|  |             "else" | ||
|  |             "    b = length(x)" | ||
|  |             "c = φ(a,b)" | ||
|  |         } | ||
|  |         { "We should be able to inline the call to " { $snippet "length" } " in the true branch" } | ||
|  |     } | ||
|  |     { $slide "Constrant propagation" | ||
|  |         "We build a table:" | ||
|  |         { $code | ||
|  |             "b true => x is array" | ||
|  |             "b false => x is ~array" | ||
|  |         } | ||
|  |         { "In true branch, apply all " { $snippet "b true" } " constraints" } | ||
|  |         { "In false branch, apply all " { $snippet "b false" } " constraints" } | ||
|  |     } | ||
|  |     { $slide "Going further" | ||
|  |         "High-level optimizer eliminates some dispatch overhead and allocation" | ||
|  |         { | ||
|  |             { "Let's take a look at the " { $link float+ } " primitive" } | ||
|  |             { $list | ||
|  |                 "No type checking anymore... but" | ||
|  |                 "Loads two tagged pointers from operand stack" | ||
|  |                 "Unboxes floats" | ||
|  |                 "Adds two floats" | ||
|  |                 "Boxes float result and perform a GC check" | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     { $slide "Low-level optimizer" | ||
|  |         "Frontend: construct LL SSA IR from HL SSA IR" | ||
|  |         "Alias analysis: remove redundant slot loads/stores" | ||
|  |         "Value numbering: simplify arithmetic" | ||
|  |         "Representation selection: eliminate boxing" | ||
|  |         "Dead code elimination: clean up" | ||
|  |         "Register allocation" | ||
|  |     } | ||
|  |     { $slide "Constructing low-level IR" | ||
|  |         { "Low-level IR is a " { $emphasis "control flow graph" } " of " { $emphasis "basic blocks" } } | ||
|  |         "A basic block is a list of instructions" | ||
|  |         "Register-based IR; infinite, uniform register file" | ||
|  |         { "Instructions:" | ||
|  |             { $list | ||
|  |                 "Subroutine calls" | ||
|  |                 "Machine arithmetic" | ||
|  |                 "Load/store values on operand stack" | ||
|  |                 "Box/unbox values" | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     { $slide "Inline allocation and GC checks" | ||
|  |         { | ||
|  |             "Allocation of small objects can be done in a few instructions:" | ||
|  |             { $list | ||
|  |                 "Bump allocation pointer" | ||
|  |                 "Write object header" | ||
|  |                 "Fill in payload" | ||
|  |             } | ||
|  |         } | ||
|  |         "Multiple allocations in the same basic block only need a single GC check; saves on a conditional branch" | ||
|  |     } | ||
|  |     { $slide "Alias analysis" | ||
|  |         "Factor constructors are just ordinary functions" | ||
|  |         { "They call a primitive constructor: " { $link new } } | ||
|  |         "When a new object is constructed, it has to be initialized" | ||
|  |         "... but the user's constructor probably fills in all the slots again with actual values" | ||
|  |         "Local alias analysis eliminates redundant slot loads and stores" | ||
|  |     } | ||
|  |     { $slide "Value numbering" | ||
|  |         { "A form of " { $emphasis "redundancy elimination" } } | ||
|  |         "Requires use of SSA form in order to work" | ||
|  |         "Define an equivalence relation over SSA values" | ||
|  |         "Assign a “value number” to each SSA value" | ||
|  |         "If two values have the same number, they will always be equal at runtime" | ||
|  |     } | ||
|  |     { $slide "Types of value numbering" | ||
|  |         "Many variations: algebraic simplifications, various rewrite rules can be tacked on" | ||
|  |         "Local value numbering: in basic blocks" | ||
|  |         "Global value numbering: entire procedure" | ||
|  |         "Factor only does local value numbering" | ||
|  |     } | ||
|  |     { $slide "Value graph and expressions" | ||
|  |         { $table | ||
|  |             { | ||
|  |                 { | ||
|  |                     "Basic block:" | ||
|  |                     { $code | ||
|  |                         "x = •" | ||
|  |                         "y = •" | ||
|  |                         "a = x + 1" | ||
|  |                         "b = a + 1" | ||
|  |                         "c = x + 2" | ||
|  |                         "d = b - c" | ||
|  |                         "e = y + d" | ||
|  |                     } | ||
|  |                 } | ||
|  |                 { | ||
|  |                     "Value numbers:" | ||
|  |                     { $code | ||
|  |                         "V1: •" | ||
|  |                         "V2: •" | ||
|  |                         "V3: 1" | ||
|  |                         "V4: 2" | ||
|  |                         "V5: (V1 + V3)" | ||
|  |                         "V6: (V5 + V3)" | ||
|  |                         "V7: (V3 + V4)" | ||
|  |                         "V8: (V6 - V7)" | ||
|  |                         "V9: (V2 + V8)" | ||
|  |                     } | ||
|  |                 } | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     { $slide "Expression simplification" | ||
|  |         { | ||
|  |             "Constant folding: if V1 and V2 are constants " | ||
|  |             { $snippet "(V1 op V2)" } | ||
|  |             " can be evaluated at compile-time" | ||
|  |         } | ||
|  |         { | ||
|  |             "Reassociation: if V2 and V3 are constants " | ||
|  |             { $code "((V1 op V2) op V3) => (V1 op (V2 op V3))" } | ||
|  |         } | ||
|  |         { | ||
|  |             "Algebraic identities: if V2 is constant 0, " | ||
|  |             { $code "(V1 + V2) => V1" } | ||
|  |         } | ||
|  |         { | ||
|  |             "Strength reduction: if V2 is a constant power of two, " | ||
|  |             { $code "(V1 * V2) => (V1 << log2(V2))" } | ||
|  |         } | ||
|  |         "etc, etc, etc" | ||
|  |     } | ||
|  |     { $slide "Representation selection overview" | ||
|  |         "Floats and SIMD vectors need to be boxed" | ||
|  |         "Representation: tagged pointer, unboxed float, unboxed SIMD value..." | ||
|  |         "When IR is built, no boxing or unboxing instructions inserted" | ||
|  |         "Representation selection pass makes IR consistent" | ||
|  |     } | ||
|  |     { $slide "Representation selection algorithm" | ||
|  |         { | ||
|  |             "For each SSA value:" | ||
|  |             { $list | ||
|  |                 "Compute possible representations" | ||
|  |                 "Compute cost of each representation" | ||
|  |                 "Pick representation with minimum cost" | ||
|  |             } | ||
|  |         } | ||
|  |         { | ||
|  |             "For each instruction:" | ||
|  |             { $list | ||
|  |                 "If it expects a value to be in a different representation, insert box or unbox code" | ||
|  |             } | ||
|  |         } | ||
|  |     } | ||
|  |     { $slide "Register allocation" | ||
|  |         "Linear scan algorithm used in Java HotSpot Client" | ||
|  |         "Described in Christian Wimmer's masters thesis" | ||
|  |         "Works fine on x86-64, not too great on x86-32" | ||
|  |         "Good enough since basic blocks tend to be short, with lots of procedure calls" | ||
|  |         "Might switch to graph coloring eventually" | ||
|  |     } | ||
|  |     { $slide "Compiler tools" | ||
|  |         "Printing high level IR" | ||
|  |         "Printing low level IR" | ||
|  |         "Disassembly" | ||
|  |         "Display call tree" | ||
|  |         "Display control flow graph" | ||
|  |         "Display dominator tree" | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | : jvm-summit-talk ( -- )
 | ||
|  |     jvm-summit-slides slides-window ;
 | ||
|  | 
 | ||
|  | MAIN: jvm-summit-talk |