From d8ce35aaccd9b2c0941e7bef0b4f7fce28b3d3b0 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 30 May 2010 20:10:39 -0400 Subject: [PATCH] benchmark.spectral-norm-simd: SIMD implementation of spectral-norm algorithm using SIMD primitives, about 40% faster but takes ages to compile -- good compile-time benchmark --- .../benchmark/spectral-norm-simd/authors.txt | 1 + .../spectral-norm-simd.factor | 68 +++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 extra/benchmark/spectral-norm-simd/authors.txt create mode 100644 extra/benchmark/spectral-norm-simd/spectral-norm-simd.factor diff --git a/extra/benchmark/spectral-norm-simd/authors.txt b/extra/benchmark/spectral-norm-simd/authors.txt new file mode 100644 index 0000000000..7852139357 --- /dev/null +++ b/extra/benchmark/spectral-norm-simd/authors.txt @@ -0,0 +1 @@ +Marc Fauconneau diff --git a/extra/benchmark/spectral-norm-simd/spectral-norm-simd.factor b/extra/benchmark/spectral-norm-simd/spectral-norm-simd.factor new file mode 100644 index 0000000000..ce91813f9d --- /dev/null +++ b/extra/benchmark/spectral-norm-simd/spectral-norm-simd.factor @@ -0,0 +1,68 @@ +! Copyright (C) 2010 Marc Fauconneau. +! See http://factorcode.org/license.txt for BSD license. +USING: alien.c-types specialized-arrays kernel math +math.functions math.vectors sequences sequences.private +prettyprint words typed locals math.vectors.simd +math.vectors.simd.cords ; +SPECIALIZED-ARRAYS: double double-4 ; +IN: benchmark.spectral-norm-simd + +:: inner-loop ( u n quot -- seq ) + n 4 /i iota [| i | + n iota [| j | u i j quot call ] [ v+ ] map-reduce + ] double-4-array{ } map-as ; inline + +: eval-A ( i j -- n ) + [ >float ] bi@ + [ drop ] [ + [ ] [ 1 + ] bi * 0.5 * ] 2bi + + 1 + ; inline + +: vrecip ( u -- v ) double-4{ 1.0 1.0 1.0 1.0 } swap v/ ; inline + +:: eval4-A ( i j -- n ) + i 4 * 0 + j eval-A + i 4 * 1 + j eval-A + i 4 * 2 + j eval-A + i 4 * 3 + j eval-A + double-4-boa vrecip ; inline + +: (eval-A-times-u) ( u i j -- x ) + [ swap nth-unsafe ] [ eval4-A ] bi-curry bi* n*v ; inline + +: eval-A-times-u ( n u -- seq ) + [ (eval-A-times-u) ] inner-loop ; inline + +:: eval4-A' ( i j -- n ) + j i 4 * 0 + eval-A + j i 4 * 1 + eval-A + j i 4 * 2 + eval-A + j i 4 * 3 + eval-A + double-4-boa vrecip ; inline + +: (eval-At-times-u) ( u i j -- x ) + [ swap nth-unsafe ] [ eval4-A' ] bi-curry bi* n*v ; inline + +: eval-At-times-u ( u n -- seq ) + [ double-array-cast ] dip [ (eval-At-times-u) ] inner-loop ; inline + +: eval-AtA-times-u ( u n -- seq ) + [ double-array-cast ] dip [ eval-A-times-u ] [ eval-At-times-u ] bi ; inline + +: ones ( n -- seq ) + 4 /i [ double-4{ 1.0 1.0 1.0 1.0 } ] double-4-array{ } replicate-as ; inline + +:: u/v ( n -- u v ) + n ones dup + 10 [ + drop + n eval-AtA-times-u + [ n eval-AtA-times-u ] keep + ] times ; inline + +TYPED: spectral-norm ( n: fixnum -- norm ) + u/v [ double-array-cast ] bi@ [ v. ] [ norm-sq ] bi /f sqrt ; + +: spectral-norm-main ( -- ) + 2000 spectral-norm . ; + +MAIN: spectral-norm-main