diff --git a/Factor.app/Contents/Info.plist b/Factor.app/Contents/Info.plist index 1c07f95643..591886b196 100644 --- a/Factor.app/Contents/Info.plist +++ b/Factor.app/Contents/Info.plist @@ -32,7 +32,7 @@ CFBundlePackageType APPL CFBundleVersion - 0.93 + 0.94 NSHumanReadableCopyright Copyright © 2003-2010 Factor developers NSServices diff --git a/GNUmakefile b/GNUmakefile index 9f93deedf2..30f44e9eba 100755 --- a/GNUmakefile +++ b/GNUmakefile @@ -4,7 +4,7 @@ ifdef CONFIG AR = ar LD = ld - VERSION = 0.93 + VERSION = 0.94 BUNDLE = Factor.app LIBPATH = -L/usr/X11R6/lib diff --git a/basis/binary-search/binary-search-tests.factor b/basis/binary-search/binary-search-tests.factor index a797219a01..00d67dd7e3 100644 --- a/basis/binary-search/binary-search-tests.factor +++ b/basis/binary-search/binary-search-tests.factor @@ -9,7 +9,9 @@ IN: binary-search.tests [ 4 ] [ 5.5 { 1 2 3 4 5 6 7 8 } [ <=> ] with search drop ] unit-test [ 10 ] [ 10 20 iota [ <=> ] with search drop ] unit-test -[ t ] [ "hello" { "alligator" "cat" "fish" "hello" "ikarus" "java" } sorted-member? ] unit-test +[ 0 ] [ "alligator" { "alligator" "cat" "fish" "hello" "ikarus" "java" } sorted-index ] unit-test [ 3 ] [ "hey" { "alligator" "cat" "fish" "hello" "ikarus" "java" } sorted-index ] unit-test +[ 5 ] [ "java" { "alligator" "cat" "fish" "hello" "ikarus" "java" } sorted-index ] unit-test +[ t ] [ "hello" { "alligator" "cat" "fish" "hello" "ikarus" "java" } sorted-member? ] unit-test [ f ] [ "hello" { "alligator" "cat" "fish" "ikarus" "java" } sorted-member? ] unit-test [ f ] [ "zebra" { "alligator" "cat" "fish" "ikarus" "java" } sorted-member? ] unit-test diff --git a/basis/binary-search/binary-search.factor b/basis/binary-search/binary-search.factor index 83bf9f13f4..36e983a1c8 100644 --- a/basis/binary-search/binary-search.factor +++ b/basis/binary-search/binary-search.factor @@ -1,41 +1,29 @@ -! Copyright (C) 2008, 2009 Slava Pestov. +! Copyright (C) 2008, 2010 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel sequences sequences.private accessors math -math.order combinators hints arrays ; +USING: accessors arrays combinators hints kernel locals math +math.order sequences ; IN: binary-search ) -- i elt ) + from to + 2/ :> midpoint@ + midpoint@ seq nth :> midpoint -: decide ( quot seq -- quot seq <=> ) - [ midpoint swap call ] 2keep rot ; inline - -: finish ( quot slice -- i elt ) - [ [ from>> ] [ midpoint@ ] bi + ] [ seq>> ] bi - [ drop ] [ dup ] [ ] tri* nth ; inline - -DEFER: (search) - -: keep-searching ( seq quot -- slice ) - [ dup midpoint@ ] dip call collapse-slice slice boa (search) ; inline - -: (search) ( ... quot: ( ... elt -- ... <=> ) seq -- ... i elt ) - dup length 1 <= [ - finish + to from - 1 <= [ + midpoint@ midpoint ] [ - decide { - { +eq+ [ finish ] } - { +lt+ [ [ (head) ] keep-searching ] } - { +gt+ [ [ (tail) ] keep-searching ] } + midpoint quot call { + { +eq+ [ midpoint@ midpoint ] } + { +lt+ [ seq from midpoint@ quot (search) ] } + { +gt+ [ seq midpoint@ to quot (search) ] } } case ] if ; inline recursive PRIVATE> -: search ( seq quot -- i elt ) - over empty? [ 2drop f f ] [ swap (search) ] if ; +: search ( seq quot: ( elt -- <=> ) -- i elt ) + over empty? [ 2drop f f ] [ [ 0 over length ] dip (search) ] if ; inline : natural-search ( obj seq -- i elt ) diff --git a/basis/compiler/tests/optimizer.factor b/basis/compiler/tests/optimizer.factor index 2e305b2c39..13917fd6bf 100644 --- a/basis/compiler/tests/optimizer.factor +++ b/basis/compiler/tests/optimizer.factor @@ -193,25 +193,6 @@ M: number detect-number ; ! Regression [ 4 [ + ] ] [ 2 2 [ [ + ] [ call ] keep ] compile-call ] unit-test -! Regression -USE: sorting -USE: binary-search -USE: binary-search.private - -: old-binsearch ( elt quot: ( ..a -- ..b ) seq -- elt quot i ) - dup length 1 <= [ - from>> - ] [ - [ midpoint swap call ] 3keep [ rot ] dip swap dup zero? - [ drop dup from>> swap midpoint@ + ] - [ drop dup midpoint@ head-slice old-binsearch ] if - ] if ; inline recursive - -[ 10 ] [ - 10 20 iota - [ [ - ] swap old-binsearch ] compile-call 2nip -] unit-test - ! Regression : empty-compound ( -- ) ; diff --git a/core/sequences/sequences-docs.factor b/core/sequences/sequences-docs.factor index f7f774ad86..e6c656f2da 100644 --- a/core/sequences/sequences-docs.factor +++ b/core/sequences/sequences-docs.factor @@ -679,16 +679,11 @@ HELP: collapse-slice { $description "Prepares to take the slice of a slice by adjusting the start and end indices accordingly, and replacing the slice with its underlying sequence." } ; -HELP: -{ $values { "seq" sequence } { "slice" slice } } -{ $description "Outputs a slice with the same elements as " { $snippet "seq" } ", and " { $snippet "from" } " equal to 0 and " { $snippet "to" } " equal to the length of " { $snippet "seq" } "." } -{ $notes "Some words create slices then proceed to read the " { $snippet "to" } " and " { $snippet "from" } " slots of the slice. To behave predictably when they are themselves given a slice as input, they apply this word first to get a canonical slice." } ; - HELP: { $values { "from" "a non-negative integer" } { "to" "a non-negative integer" } { "seq" sequence } { "slice" slice } } { $description "Outputs a new virtual sequence sharing storage with the subrange of elements in " { $snippet "seq" } " with indices starting from and including " { $snippet "m" } ", and up to but not including " { $snippet "n" } "." } { $errors "Throws an error if " { $snippet "m" } " or " { $snippet "n" } " is out of bounds." } -{ $notes "Taking the slice of a slice outputs a slice of the underlying sequence of the original slice. Keep this in mind when writing code which depends on the values of " { $snippet "from" } " and " { $snippet "to" } " being equal to the inputs to this word. The " { $link } " word might be helpful in such situations." } ; +{ $notes "Taking the slice of a slice outputs a slice of the underlying sequence, instead of a slice of a slice. This means that you cannot assume that the " { $snippet "from" } " and " { $snippet "to" } " slots of the resulting slice will be equal to the values you passed to " { $link } "." } ; { subseq } related-words @@ -1534,8 +1529,6 @@ $nl { $subsections rest-slice but-last-slice } "Taking a sequence apart into a head and a tail:" { $subsections unclip-slice unclip-last-slice cut-slice } -"A utility for words which use slices as iterators:" -{ $subsections } "Replacing slices with new elements:" { $subsections replace-slice } ; diff --git a/core/sequences/sequences.factor b/core/sequences/sequences.factor index d9c234e717..2155f1439f 100644 --- a/core/sequences/sequences.factor +++ b/core/sequences/sequences.factor @@ -898,11 +898,6 @@ PRIVATE> : unclip-last-slice ( seq -- butlast-slice last ) [ but-last-slice ] [ last ] bi ; inline -: ( seq -- slice ) - dup slice? [ { } like ] when - [ drop 0 ] [ length ] [ ] tri ; - inline - +{ $values + { "pat" sequence } { "bm" boyer-moore } +} +{ $description + "Given a pattern performs pattern preprocessing and returns " + "results as an (opaque) object that is reusable across " + "searches in different sequences via " { $link search-from } + " generic word." +} ; + +HELP: search-from +{ $values + { "seq" sequence } + { "from" "a non-negative integer" } + { "obj" object } + { "i/f" "the index of first match or " { $link f } } +} +{ $description "Performs an attempt to find the first " + "occurence of pattern in " { $snippet "seq" } + " starting from " { $snippet "from" } " using " + "Boyer-Moore search algorithm. Output is the index " + "if the attempt was succeessful and " { $link f } + " otherwise." +} ; + +HELP: search +{ $values + { "seq" sequence } + { "obj" object } + { "i/f" "the index of first match or " { $link f } } +} +{ $description "A simpler variant of " { $link search-from } + " that starts searching from the beginning of the sequence." +} ; + +ARTICLE: "boyer-moore" "The Boyer-Moore algorithm" +{ $heading "Summary" } +"The " { $vocab-link "boyer-moore" } " vocabulary " +"implements a Boyer-Moore string search algorithm with " +"so-called 'strong good suffix shift rule'. Since algorithm is " +"alphabet-independent it is applicable to searching in any " +"collection that implements " { $links "sequence-protocol" } "." + +{ $heading "Complexity" } +"Let " { $snippet "n" } " and " { $snippet "m" } " be lengths " +"of the sequences being searched " { $emphasis "in" } " and " +{ $emphasis "for" } " respectively. Then searching runs in " +{ $snippet "O(n)" } " time in its worst case using additional " +{ $snippet "O(m)" } " space. The preprocessing phase runs in " +{ $snippet "O(m)" } " time." +; + +ABOUT: "boyer-moore" diff --git a/extra/boyer-moore/boyer-moore-tests.factor b/extra/boyer-moore/boyer-moore-tests.factor new file mode 100644 index 0000000000..e444c35189 --- /dev/null +++ b/extra/boyer-moore/boyer-moore-tests.factor @@ -0,0 +1,10 @@ +! Copyright (C) 2010 Dmitry Shubin. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.test boyer-moore ; +IN: boyer-moore.tests + +[ 0 ] [ "qwerty" "" search ] unit-test +[ 0 ] [ "" "" search ] unit-test +[ f ] [ "qw" "qwerty" search ] unit-test +[ 3 ] [ "qwerty" "r" search ] unit-test +[ 8 ] [ "qwerasdfqwer" 2 "qwe" search-from ] unit-test diff --git a/extra/boyer-moore/boyer-moore.factor b/extra/boyer-moore/boyer-moore.factor new file mode 100644 index 0000000000..aba3f614a1 --- /dev/null +++ b/extra/boyer-moore/boyer-moore.factor @@ -0,0 +1,78 @@ +! Copyright (C) 2010 Dmitry Shubin. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays assocs kernel locals math math.order +math.ranges sequences sequences.private z-algorithm ; +IN: boyer-moore + + ] [ [1,b) ] bi ] keep pick + [ (normal-suffixes) ] 2curry each ; inline + +:: (partial-suffixes) ( len old elt i -- len old/new old ) + len elt i 1 + = [ len elt - ] [ old ] if old ; inline + +: partial-suffixes ( zs -- ss ) + [ length dup ] [ ] bi + [ (partial-suffixes) ] map-index 2nip ; inline + +: ( seq -- table ) + z-values [ partial-suffixes ] [ normal-suffixes ] bi + [ [ nip ] when* ] 2map reverse! ; inline + +: insert-bc-shift ( table elt len i -- table ) + 1 + swap - swap pick 2dup key? + [ 3drop ] [ set-at ] if ; inline + +: ( seq -- table ) + H{ } clone swap [ length ] keep + [ insert-bc-shift ] with each-index ; inline + +TUPLE: boyer-moore pattern bc-table gs-table ; + +: gs-shift ( i c bm -- s ) nip gs-table>> nth-unsafe ; inline + +: bc-shift ( i c bm -- s ) bc-table>> at dup 1 ? + ; inline + +: do-shift ( pos i c bm -- newpos ) + [ gs-shift ] [ bc-shift ] bi-curry 2bi max + ; inline + +: match? ( i1 s1 i2 s2 -- ? ) [ nth-unsafe ] 2bi@ = ; inline + +:: mismatch? ( s1 s2 pos len -- i/f ) + len 1 - [ [ pos + s1 ] keep s2 match? not ] + find-last-integer ; inline + +:: (search-from) ( seq from bm -- i/f ) + bm pattern>> :> pat + pat length :> plen + seq length plen - :> lim + from + [ + dup lim <= + [ + seq pat pick plen mismatch? + [ 2dup + seq nth-unsafe bm do-shift t ] [ f ] if* + ] [ drop f f ] if + ] loop ; inline + +PRIVATE> + +: ( pat -- bm ) + dup [ ] [ ] bi + boyer-moore boa ; + +GENERIC: search-from ( seq from obj -- i/f ) + +M: sequence search-from + dup length zero? + [ 3drop 0 ] [ (search-from) ] if ; + +M: boyer-moore search-from (search-from) ; + +: search ( seq obj -- i/f ) [ 0 ] dip search-from ; diff --git a/extra/boyer-moore/summary.txt b/extra/boyer-moore/summary.txt new file mode 100644 index 0000000000..298fcc354b --- /dev/null +++ b/extra/boyer-moore/summary.txt @@ -0,0 +1 @@ +Boyer-Moore string search algorithm diff --git a/extra/boyer-moore/tags.txt b/extra/boyer-moore/tags.txt new file mode 100644 index 0000000000..49b4f2328e --- /dev/null +++ b/extra/boyer-moore/tags.txt @@ -0,0 +1 @@ +algorithms diff --git a/extra/cuda/prefix-sum.cu b/extra/cuda/prefix-sum.cu new file mode 100644 index 0000000000..a77a67f035 --- /dev/null +++ b/extra/cuda/prefix-sum.cu @@ -0,0 +1,103 @@ +#include +#include +#include + +static const int LOG_BANK_COUNT = 4; + +static inline __device__ __host__ unsigned shared_offset(unsigned i) +{ + return i + (i >> LOG_BANK_COUNT); +} + +static inline __device__ __host__ unsigned offset_a(unsigned offset, unsigned i) +{ + return shared_offset(offset * (2*i + 1) - 1); +} + +static inline __device__ __host__ unsigned offset_b(unsigned offset, unsigned i) +{ + return shared_offset(offset * (2*i + 2) - 1); +} + +static inline __device__ __host__ unsigned lpot(unsigned x) +{ + --x; x |= x>>1; x|=x>>2; x|=x>>4; x|=x>>8; x|=x>>16; return ++x; +} + +template +__global__ void prefix_sum_block(T *in, T *out, unsigned n) +{ + extern __shared__ T temp[]; + + int idx = threadIdx.x; + int blocksize = blockDim.x; + + temp[shared_offset(idx )] = (idx < n) ? in[idx ] : 0; + temp[shared_offset(idx + blocksize)] = (idx + blocksize < n) ? in[idx + blocksize] : 0; + + int offset, d; + for (offset = 1, d = blocksize; d > 0; d >>= 1, offset <<= 1) { + __syncthreads(); + if (idx < d) { + unsigned a = offset_a(offset, idx), b = offset_b(offset, idx); + temp[b] += temp[a]; + } + } + + if (idx == 0) temp[shared_offset(blocksize*2 - 1)] = 0; + + for (d = 1; d <= blocksize; d <<= 1) { + offset >>= 1; + __syncthreads(); + + if (idx < d) { + unsigned a = offset_a(offset, idx), b = offset_b(offset, idx); + unsigned t = temp[a]; + temp[a] = temp[b]; + temp[b] += t; + } + } + __syncthreads(); + + if (idx < n) out[idx ] = temp[shared_offset(idx )]; + if (idx + blocksize < n) out[idx + blocksize] = temp[shared_offset(idx + blocksize)]; +} + +template +void prefix_sum(T *in, T *out, unsigned n) +{ + char *device_values; + unsigned n_lpot = lpot(n); + size_t n_pitch; + + cudaError_t error = cudaMallocPitch((void**)&device_values, &n_pitch, sizeof(T)*n, 2); + if (error != 0) { + printf("error %u allocating width %lu height %u\n", error, sizeof(T)*n, 2); + exit(1); + } + + cudaMemcpy(device_values, in, sizeof(T)*n, cudaMemcpyHostToDevice); + + prefix_sum_block<<<1, n_lpot/2, shared_offset(n_lpot)*sizeof(T)>>> + ((T*)device_values, (T*)(device_values + n_pitch), n); + + cudaMemcpy(out, device_values + n_pitch, sizeof(T)*n, cudaMemcpyDeviceToHost); + cudaFree(device_values); +} + +int main() +{ + sranddev(); + + static unsigned in_values[1024], out_values[1024]; + + for (int i = 0; i < 1024; ++i) + in_values[i] = rand() >> 21; + + prefix_sum(in_values, out_values, 1024); + + for (int i = 0; i < 1024; ++i) + printf("%5d => %5d\n", in_values[i], out_values[i]); + + return 0; +} diff --git a/extra/cuda/prefix-sum.ptx b/extra/cuda/prefix-sum.ptx new file mode 100644 index 0000000000..d18917965d --- /dev/null +++ b/extra/cuda/prefix-sum.ptx @@ -0,0 +1,222 @@ + .version 1.4 + .target sm_10, map_f64_to_f32 + // compiled with /usr/local/cuda/bin/../open64/lib//be + // nvopencc 3.0 built on 2010-03-11 + + //----------------------------------------------------------- + // Compiling /tmp/tmpxft_00000236_00000000-7_prefix-sum.cpp3.i (/var/folders/K6/K6oI14wZ2RWhSE+BYqTjA++++TI/-Tmp-/ccBI#.0ATpGM) + //----------------------------------------------------------- + + //----------------------------------------------------------- + // Options: + //----------------------------------------------------------- + // Target:ptx, ISA:sm_10, Endian:little, Pointer Size:32 + // -O3 (Optimization level) + // -g0 (Debug level) + // -m2 (Report advisories) + //----------------------------------------------------------- + + .file 1 "" + .file 2 "/tmp/tmpxft_00000236_00000000-6_prefix-sum.cudafe2.gpu" + .file 3 "/usr/lib/gcc/i686-apple-darwin10/4.2.1/include/stddef.h" + .file 4 "/usr/local/cuda/bin/../include/crt/device_runtime.h" + .file 5 "/usr/local/cuda/bin/../include/host_defines.h" + .file 6 "/usr/local/cuda/bin/../include/builtin_types.h" + .file 7 "/usr/local/cuda/bin/../include/device_types.h" + .file 8 "/usr/local/cuda/bin/../include/driver_types.h" + .file 9 "/usr/local/cuda/bin/../include/texture_types.h" + .file 10 "/usr/local/cuda/bin/../include/vector_types.h" + .file 11 "/usr/local/cuda/bin/../include/device_launch_parameters.h" + .file 12 "/usr/local/cuda/bin/../include/crt/storage_class.h" + .file 13 "/usr/include/i386/_types.h" + .file 14 "/usr/include/time.h" + .file 15 "prefix-sum.cu" + .file 16 "/usr/local/cuda/bin/../include/common_functions.h" + .file 17 "/usr/local/cuda/bin/../include/crt/func_macro.h" + .file 18 "/usr/local/cuda/bin/../include/math_functions.h" + .file 19 "/usr/local/cuda/bin/../include/device_functions.h" + .file 20 "/usr/local/cuda/bin/../include/math_constants.h" + .file 21 "/usr/local/cuda/bin/../include/sm_11_atomic_functions.h" + .file 22 "/usr/local/cuda/bin/../include/sm_12_atomic_functions.h" + .file 23 "/usr/local/cuda/bin/../include/sm_13_double_functions.h" + .file 24 "/usr/local/cuda/bin/../include/common_types.h" + .file 25 "/usr/local/cuda/bin/../include/sm_20_atomic_functions.h" + .file 26 "/usr/local/cuda/bin/../include/sm_20_intrinsics.h" + .file 27 "/usr/local/cuda/bin/../include/texture_fetch_functions.h" + .file 28 "/usr/local/cuda/bin/../include/math_functions_dbl_ptx1.h" + + .extern .shared .align 4 .b8 temp[]; + + .entry _Z16prefix_sum_blockIjEvPT_S1_j ( + .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in, + .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out, + .param .u32 __cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n) + { + .reg .u32 %r<81>; + .reg .pred %p<11>; + .loc 15 28 0 +$LBB1__Z16prefix_sum_blockIjEvPT_S1_j: + ld.param.u32 %r1, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n]; + cvt.s32.u16 %r2, %tid.x; + setp.lt.u32 %p1, %r2, %r1; + @!%p1 bra $Lt_0_7938; + .loc 15 35 0 + ld.param.u32 %r3, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in]; + mul24.lo.u32 %r4, %r2, 4; + add.u32 %r5, %r3, %r4; + ld.global.u32 %r6, [%r5+0]; + bra.uni $Lt_0_7682; +$Lt_0_7938: + mov.u32 %r6, 0; +$Lt_0_7682: + mov.u32 %r7, temp; + shr.u32 %r8, %r2, 4; + add.u32 %r9, %r2, %r8; + mul.lo.u32 %r10, %r9, 4; + add.u32 %r11, %r10, %r7; + st.shared.u32 [%r11+0], %r6; + cvt.s32.u16 %r12, %ntid.x; + add.s32 %r13, %r12, %r2; + .loc 15 28 0 + ld.param.u32 %r1, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_n]; + .loc 15 35 0 + setp.lt.u32 %p2, %r13, %r1; + @!%p2 bra $Lt_0_8450; + .loc 15 36 0 + ld.param.u32 %r14, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_in]; + mul.lo.u32 %r15, %r13, 4; + add.u32 %r16, %r14, %r15; + ld.global.u32 %r17, [%r16+0]; + bra.uni $Lt_0_8194; +$Lt_0_8450: + mov.u32 %r17, 0; +$Lt_0_8194: + shr.u32 %r18, %r13, 4; + add.u32 %r19, %r13, %r18; + mul.lo.u32 %r20, %r19, 4; + add.u32 %r21, %r20, %r7; + st.shared.u32 [%r21+0], %r17; + .loc 15 39 0 + mov.s32 %r22, %r12; + mov.u32 %r23, 0; + setp.le.s32 %p3, %r12, %r23; + mov.s32 %r24, 1; + @%p3 bra $Lt_0_13314; +$Lt_0_9218: + // Loop body line 39, nesting depth: 1, estimated iterations: unknown + .loc 15 40 0 + bar.sync 0; + setp.le.s32 %p4, %r22, %r2; + @%p4 bra $Lt_0_9474; + // Part of loop body line 39, head labeled $Lt_0_9218 + .loc 15 43 0 + mul24.lo.u32 %r25, %r2, 2; + add.u32 %r26, %r25, 1; + add.u32 %r27, %r25, 2; + mul.lo.u32 %r28, %r24, %r26; + mul.lo.u32 %r29, %r24, %r27; + sub.u32 %r30, %r29, 1; + shr.u32 %r31, %r30, 4; + add.u32 %r32, %r29, %r31; + mul.lo.u32 %r33, %r32, 4; + add.u32 %r34, %r33, %r7; + ld.shared.u32 %r35, [%r34+-4]; + sub.u32 %r36, %r28, 1; + shr.u32 %r37, %r36, 4; + add.u32 %r38, %r28, %r37; + mul.lo.u32 %r39, %r38, 4; + add.u32 %r40, %r7, %r39; + ld.shared.u32 %r41, [%r40+-4]; + add.u32 %r42, %r35, %r41; + st.shared.u32 [%r34+-4], %r42; +$Lt_0_9474: + // Part of loop body line 39, head labeled $Lt_0_9218 + .loc 15 39 0 + shr.s32 %r22, %r22, 1; + shl.b32 %r24, %r24, 1; + mov.u32 %r43, 0; + setp.gt.s32 %p5, %r22, %r43; + @%p5 bra $Lt_0_9218; + bra.uni $Lt_0_8706; +$Lt_0_13314: +$Lt_0_8706: + mov.u32 %r44, 0; + setp.ne.s32 %p6, %r2, %r44; + @%p6 bra $Lt_0_10242; + .loc 15 47 0 + mul24.lo.s32 %r45, %r12, 2; + mov.u32 %r46, 0; + sub.u32 %r47, %r45, 1; + shr.u32 %r48, %r47, 4; + add.u32 %r49, %r45, %r48; + mul.lo.u32 %r50, %r49, 4; + add.u32 %r51, %r7, %r50; + st.shared.u32 [%r51+-4], %r46; +$Lt_0_10242: + mov.u32 %r52, 1; + setp.lt.s32 %p7, %r12, %r52; + @%p7 bra $Lt_0_10754; + mov.s32 %r22, 1; +$Lt_0_11266: + // Loop body line 47, nesting depth: 1, estimated iterations: unknown + .loc 15 50 0 + shr.s32 %r24, %r24, 1; + .loc 15 51 0 + bar.sync 0; + setp.le.s32 %p8, %r22, %r2; + @%p8 bra $Lt_0_11522; + // Part of loop body line 47, head labeled $Lt_0_11266 + .loc 15 55 0 + mul24.lo.u32 %r53, %r2, 2; + add.u32 %r54, %r53, 1; + mul.lo.u32 %r55, %r24, %r54; + sub.u32 %r56, %r55, 1; + shr.u32 %r57, %r56, 4; + add.u32 %r58, %r55, %r57; + mul.lo.u32 %r59, %r58, 4; + add.u32 %r60, %r59, %r7; + ld.shared.u32 %r61, [%r60+-4]; + .loc 15 56 0 + add.u32 %r62, %r53, 2; + mul.lo.u32 %r63, %r24, %r62; + sub.u32 %r64, %r63, 1; + shr.u32 %r65, %r64, 4; + add.u32 %r66, %r63, %r65; + mul.lo.u32 %r67, %r66, 4; + add.u32 %r68, %r67, %r7; + ld.shared.u32 %r69, [%r68+-4]; + st.shared.u32 [%r60+-4], %r69; + .loc 15 57 0 + ld.shared.u32 %r70, [%r68+-4]; + add.u32 %r71, %r70, %r61; + st.shared.u32 [%r68+-4], %r71; +$Lt_0_11522: + // Part of loop body line 47, head labeled $Lt_0_11266 + .loc 15 49 0 + shl.b32 %r22, %r22, 1; + setp.le.s32 %p9, %r22, %r12; + @%p9 bra $Lt_0_11266; +$Lt_0_10754: + .loc 15 60 0 + bar.sync 0; + @!%p1 bra $Lt_0_12290; + .loc 15 62 0 + ld.shared.u32 %r72, [%r11+0]; + ld.param.u32 %r73, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out]; + mul24.lo.u32 %r74, %r2, 4; + add.u32 %r75, %r73, %r74; + st.global.u32 [%r75+0], %r72; +$Lt_0_12290: + @!%p2 bra $Lt_0_12802; + .loc 15 63 0 + ld.shared.u32 %r76, [%r21+0]; + ld.param.u32 %r77, [__cudaparm__Z16prefix_sum_blockIjEvPT_S1_j_out]; + mul.lo.u32 %r78, %r13, 4; + add.u32 %r79, %r77, %r78; + st.global.u32 [%r79+0], %r76; +$Lt_0_12802: + .loc 15 64 0 + exit; +$LDWend__Z16prefix_sum_blockIjEvPT_S1_j: + } // _Z16prefix_sum_blockIjEvPT_S1_j + diff --git a/extra/cuda/ptx/ptx.factor b/extra/cuda/ptx/ptx.factor new file mode 100644 index 0000000000..8d4925d55f --- /dev/null +++ b/extra/cuda/ptx/ptx.factor @@ -0,0 +1,758 @@ +! (c)2010 Joe Groff bsd license +USING: accessors arrays combinators io kernel math math.parser +roles sequences strings variants words ; +FROM: roles => TUPLE: ; +IN: cuda.ptx + +UNION: dim integer sequence ; +UNION: ?integer POSTPONE: f integer ; +UNION: ?string POSTPONE: f string ; + +VARIANT: ptx-type + .s8 .s16 .s32 .s64 + .u8 .u16 .u32 .u64 + .f16 .f32 .f64 + .b8 .b16 .b32 .b64 + .pred + .texref .samplerref .surfref + .v2: { { of ptx-type } } + .v4: { { of ptx-type } } + .struct: { { name string } } ; + +VARIANT: ptx-arch + sm_10 sm_11 sm_12 sm_13 sm_20 ; +UNION: ?ptx-arch POSTPONE: f ptx-arch ; + +VARIANT: ptx-texmode + .texmode_unified .texmode_independent ; +UNION: ?ptx-texmode POSTPONE: f ptx-texmode ; + +VARIANT: ptx-storage-space + .reg + .sreg + .const: { { bank ?integer } } + .global + .local + .param + .shared + .tex ; +UNION: ?ptx-storage-space POSTPONE: f ptx-storage-space ; + +TUPLE: ptx-target + { arch ?ptx-arch } + { map_f64_to_f32? boolean } + { texmode ?ptx-texmode } ; + +TUPLE: ptx + { version string } + { target ptx-target } + body ; + +TUPLE: ptx-struct-definition + { name string } + members ; + +TUPLE: ptx-variable + { extern? boolean } + { visible? boolean } + { align ?integer } + { storage-space ptx-storage-space } + { type ptx-type } + { name string } + { parameter ?integer } + { dim dim } + { initializer ?string } ; + +TUPLE: ptx-predicate + { negated? boolean } + { variable string } ; +UNION: ?ptx-predicate POSTPONE: f ptx-predicate ; + +TUPLE: ptx-instruction + { label ?string } + { predicate ?ptx-predicate } ; + +TUPLE: ptx-entry + { name string } + params + directives + body ; + +TUPLE: ptx-func < ptx-entry + { return ptx-variable } ; + +TUPLE: ptx-directive ; + +TUPLE: .file < ptx-directive + { info string } ; +TUPLE: .loc < ptx-directive + { info string } ; +TUPLE: .maxnctapersm < ptx-directive + { ncta integer } ; +TUPLE: .minnctapersm < ptx-directive + { ncta integer } ; +TUPLE: .maxnreg < ptx-directive + { n integer } ; +TUPLE: .maxntid < ptx-directive + { dim dim } ; +TUPLE: .pragma < ptx-directive + { pragma string } ; + +VARIANT: ptx-float-rounding-mode + .rn .rz .rm .rp .approx .full ; +VARIANT: ptx-int-rounding-mode + .rni .rzi .rmi .rpi ; +UNION: ?ptx-float-rounding-mode POSTPONE: f ptx-float-rounding-mode ; +UNION: ?ptx-int-rounding-mode POSTPONE: f ptx-int-rounding-mode ; + +UNION: ptx-rounding-mode + ptx-float-rounding-mode ptx-int-rounding-mode ; +UNION: ?ptx-rounding-mode POSTPONE: f ptx-rounding-mode ; + +TUPLE: ptx-typed-instruction < ptx-instruction + { type ptx-type } + { dest string } ; + +TUPLE: ptx-2op-instruction < ptx-typed-instruction + { a string } ; + +TUPLE: ptx-3op-instruction < ptx-typed-instruction + { a string } + { b string } ; + +TUPLE: ptx-4op-instruction < ptx-typed-instruction + { a string } + { b string } + { c string } ; + +TUPLE: ptx-5op-instruction < ptx-typed-instruction + { a string } + { b string } + { c string } + { d string } ; + +TUPLE: ptx-addsub-instruction < ptx-3op-instruction + { sat? boolean } + { cc? boolean } ; + +VARIANT: ptx-mul-mode + .wide ; +UNION: ?ptx-mul-mode POSTPONE: f ptx-mul-mode ; + +TUPLE: ptx-mul-instruction < ptx-3op-instruction + { mode ?ptx-mul-mode } ; + +TUPLE: ptx-mad-instruction < ptx-4op-instruction + { mode ?ptx-mul-mode } + { sat? boolean } ; + +VARIANT: ptx-prmt-mode + .f4e .b4e .rc8 .ecl .ecr .rc16 ; +UNION: ?ptx-prmt-mode POSTPONE: f ptx-prmt-mode ; + +ROLE: ptx-float-ftz + { ftz? boolean } ; +ROLE: ptx-float-env < ptx-float-ftz + { round ?ptx-float-rounding-mode } ; + +VARIANT: ptx-testp-op + .finite .infinite .number .notanumber .normal .subnormal ; + +VARIANT: ptx-cmp-op + .eq .ne + .lt .le .gt .ge + .ls .hs + .equ .neu + .ltu .leu .gtu .geu + .num .nan ; + +VARIANT: ptx-op + .and .or .xor .cas .exch .add .inc .dec .min .max + .popc ; +UNION: ?ptx-op POSTPONE: f ptx-op ; + +SINGLETONS: .lo .hi ; +INSTANCE: .lo ptx-mul-mode +INSTANCE: .lo ptx-cmp-op +INSTANCE: .hi ptx-mul-mode +INSTANCE: .hi ptx-cmp-op + +TUPLE: ptx-set-instruction < ptx-3op-instruction + { cmp-op ptx-cmp-op } + { bool-op ?ptx-op } + { c ?string } + { ftz? boolean } ; + +VARIANT: ptx-cache-op + .ca .cg .cs .lu .cv + .wb .wt ; +UNION: ?ptx-cache-op POSTPONE: f ptx-cache-op ; + +TUPLE: ptx-ldst-instruction < ptx-2op-instruction + { volatile? boolean } + { storage-space ?ptx-storage-space } + { cache-op ?ptx-cache-op } ; + +VARIANT: ptx-cache-level + .L1 .L2 ; + +TUPLE: ptx-branch-instruction < ptx-instruction + { target string } + { uni? boolean } ; + +VARIANT: ptx-membar-level + .cta .gl .sys ; + +VARIANT: ptx-vote-mode + .all .any .uni .ballot ; + +TUPLE: ptx-instruction-not-supported-yet < ptx-instruction ; + +TUPLE: abs <{ ptx-2op-instruction ptx-float-ftz } ; +TUPLE: add <{ ptx-addsub-instruction ptx-float-env } ; +TUPLE: addc < ptx-addsub-instruction ; +TUPLE: and < ptx-3op-instruction ; +TUPLE: atom < ptx-3op-instruction + { storage-space ?ptx-storage-space } + { op ptx-op } + { c ?string } ; +TUPLE: bar.arrive < ptx-instruction + { a string } + { b string } ; +TUPLE: bar.red < ptx-2op-instruction + { op ptx-op } + { b ?string } + { c string } ; +TUPLE: bar.sync < ptx-instruction + { a string } + { b ?string } ; +TUPLE: bfe < ptx-4op-instruction ; +TUPLE: bfi < ptx-5op-instruction ; +TUPLE: bfind < ptx-2op-instruction + { shiftamt? boolean } ; +TUPLE: bra < ptx-branch-instruction ; +TUPLE: brev < ptx-2op-instruction ; +TUPLE: brkpt < ptx-instruction ; +TUPLE: call < ptx-branch-instruction + { return ?string } + params ; +TUPLE: clz < ptx-2op-instruction ; +TUPLE: cnot < ptx-2op-instruction ; +TUPLE: copysign < ptx-3op-instruction ; +TUPLE: cos <{ ptx-2op-instruction ptx-float-env } ; +TUPLE: cvt < ptx-2op-instruction + { rounding-mode ?ptx-rounding-mode } + { ftz? boolean } + { sat? boolean } + { dest-type ptx-type } ; +TUPLE: cvta < ptx-2op-instruction + { to? boolean } + { storage-space ?ptx-storage-space } ; +TUPLE: div <{ ptx-3op-instruction ptx-float-env } ; +TUPLE: ex2 <{ ptx-2op-instruction ptx-float-env } ; +TUPLE: exit < ptx-instruction ; +TUPLE: fma <{ ptx-mad-instruction ptx-float-env } ; +TUPLE: isspacep < ptx-instruction + { storage-space ?ptx-storage-space } + { dest string } + { a string } ; +TUPLE: ld < ptx-ldst-instruction ; +TUPLE: ldu < ptx-ldst-instruction ; +TUPLE: lg2 <{ ptx-2op-instruction ptx-float-env } ; +TUPLE: mad <{ ptx-mad-instruction ptx-float-env } ; +TUPLE: mad24 < ptx-mad-instruction ; +TUPLE: max <{ ptx-3op-instruction ptx-float-ftz } ; +TUPLE: membar < ptx-instruction + { level ptx-membar-level } ; +TUPLE: min <{ ptx-3op-instruction ptx-float-ftz } ; +TUPLE: mov < ptx-2op-instruction ; +TUPLE: mul <{ ptx-mul-instruction ptx-float-env } ; +TUPLE: mul24 < ptx-mul-instruction ; +TUPLE: neg <{ ptx-2op-instruction ptx-float-ftz } ; +TUPLE: not < ptx-2op-instruction ; +TUPLE: or < ptx-3op-instruction ; +TUPLE: pmevent < ptx-instruction + { a string } ; +TUPLE: popc < ptx-2op-instruction ; +TUPLE: prefetch < ptx-instruction + { a string } + { storage-space ?ptx-storage-space } + { level ptx-cache-level } ; +TUPLE: prefetchu < ptx-instruction + { a string } + { level ptx-cache-level } ; +TUPLE: prmt < ptx-4op-instruction + { mode ?ptx-prmt-mode } ; +TUPLE: rcp <{ ptx-2op-instruction ptx-float-env } ; +TUPLE: red < ptx-2op-instruction + { storage-space ?ptx-storage-space } + { op ptx-op } ; +TUPLE: rem < ptx-3op-instruction ; +TUPLE: ret < ptx-instruction ; +TUPLE: rsqrt <{ ptx-2op-instruction ptx-float-env } ; +TUPLE: sad < ptx-4op-instruction ; +TUPLE: selp < ptx-4op-instruction ; +TUPLE: set < ptx-set-instruction + { dest-type ptx-type } ; +TUPLE: setp < ptx-set-instruction + { |dest ?string } ; +TUPLE: shl < ptx-3op-instruction ; +TUPLE: shr < ptx-3op-instruction ; +TUPLE: sin <{ ptx-2op-instruction ptx-float-env } ; +TUPLE: slct < ptx-4op-instruction + { dest-type ptx-type } + { ftz? boolean } ; +TUPLE: sqrt <{ ptx-2op-instruction ptx-float-env } ; +TUPLE: st < ptx-ldst-instruction ; +TUPLE: sub <{ ptx-addsub-instruction ptx-float-env } ; +TUPLE: subc < ptx-addsub-instruction ; +TUPLE: suld < ptx-instruction-not-supported-yet ; +TUPLE: sured < ptx-instruction-not-supported-yet ; +TUPLE: sust < ptx-instruction-not-supported-yet ; +TUPLE: suq < ptx-instruction-not-supported-yet ; +TUPLE: testp < ptx-2op-instruction + { op ptx-testp-op } ; +TUPLE: tex < ptx-instruction-not-supported-yet ; +TUPLE: txq < ptx-instruction-not-supported-yet ; +TUPLE: trap < ptx-instruction ; +TUPLE: vabsdiff < ptx-instruction-not-supported-yet ; +TUPLE: vadd < ptx-instruction-not-supported-yet ; +TUPLE: vmad < ptx-instruction-not-supported-yet ; +TUPLE: vmax < ptx-instruction-not-supported-yet ; +TUPLE: vmin < ptx-instruction-not-supported-yet ; +TUPLE: vset < ptx-instruction-not-supported-yet ; +TUPLE: vshl < ptx-instruction-not-supported-yet ; +TUPLE: vshr < ptx-instruction-not-supported-yet ; +TUPLE: vsub < ptx-instruction-not-supported-yet ; +TUPLE: vote < ptx-2op-instruction + { mode ptx-vote-mode } ; +TUPLE: xor < ptx-3op-instruction ; + +GENERIC: ptx-element-label ( elt -- label ) +M: object ptx-element-label drop f ; + +GENERIC: (write-ptx-element) ( elt -- ) + +: write-ptx-element ( elt -- ) + dup ptx-element-label [ write ":" write ] when* + "\t" write (write-ptx-element) + ";" print ; + +: write-ptx ( ptx -- ) + "\t.version " write dup version>> write ";" print + dup target>> write-ptx-element + body>> [ write-ptx-element ] each ; + +: write-ptx-symbol ( symbol/f -- ) + [ name>> write ] when* ; + +M: f (write-ptx-element) + drop ; + +M: word (write-ptx-element) + name>> write ; + +M: .const (write-ptx-element) + ".const" write + bank>> [ "[" write number>string write "]" write ] when* ; +M: .v2 (write-ptx-element) + ".v2" write of>> (write-ptx-element) ; +M: .v4 (write-ptx-element) + ".v4" write of>> (write-ptx-element) ; +M: .struct (write-ptx-element) + ".struct " write name>> write ; + +M: ptx-target (write-ptx-element) + ".target " write + [ arch>> [ name>> ] [ f ] if* ] + [ map_f64_to_f32?>> [ "map_f64_to_f32" ] [ f ] if ] + [ texmode>> [ name>> ] [ f ] if* ] tri + 3array sift ", " join write ; + +: write-ptx-dim ( dim -- ) + { + { [ dup zero? ] [ drop "[]" write ] } + { [ dup sequence? ] [ [ "[" write number>string write "]" write ] each ] } + [ "[" write number>string write "]" write ] + } cond ; + +M: ptx-variable (write-ptx-element) + dup extern?>> [ ".extern " write ] when + dup visible?>> [ ".visible " write ] when + dup align>> [ ".align " write number>string write " " write ] when* + dup storage-space>> (write-ptx-element) " " write + dup type>> (write-ptx-element) " " write + dup name>> write + dup parameter>> [ "<" write number>string write ">" write ] when* + dup dim>> [ write-ptx-dim ] when* + dup initializer>> [ " = " write write ] when* + drop ; + +: write-params ( params -- ) + "(" write unclip (write-ptx-element) + [ ", " write (write-ptx-element) ] each + ")" write ; + +: write-body ( params -- ) + "\t{" print + [ write-ptx-element ] each + "\t}" write ; + +: write-entry ( entry -- ) + dup name>> write " " write + dup params>> [ write-params ] when* nl + dup directives>> [ (write-ptx-element) ] each nl + dup body>> write-body + drop ; + +M: ptx-entry (write-ptx-element) + ".entry " write + write-entry ; + +M: ptx-func (write-ptx-element) + ".func " write + dup return>> [ "(" write (write-ptx-element) ") " write ] when* + write-entry ; + +M: .file (write-ptx-element) + ".file " write info>> write ; +M: .loc (write-ptx-element) + ".loc " write info>> write ; +M: .maxnctapersm (write-ptx-element) + ".maxnctapersm " write ncta>> number>string write ; +M: .minnctapersm (write-ptx-element) + ".minnctapersm " write ncta>> number>string write ; +M: .maxnreg (write-ptx-element) + ".maxnreg " write n>> number>string write ; +M: .maxntid (write-ptx-element) + ".maxntid " write + dup sequence? [ [ number>string ] map ", " join write ] [ number>string write ] if ; +M: .pragma (write-ptx-element) + ".pragma \"" write pragma>> write "\"" write ; + +M: ptx-instruction ptx-element-label + label>> ; + +: write-insn ( insn name -- insn ) + over predicate>> + [ "@" write dup negated?>> [ "!" write ] when variable>> write " " write ] when* + write ; + +: write-2op ( insn -- ) + dup type>> (write-ptx-element) " " write + dup dest>> write ", " write + dup a>> write + drop ; + +: write-3op ( insn -- ) + dup write-2op ", " write + dup b>> write + drop ; + +: write-4op ( insn -- ) + dup write-3op ", " write + dup c>> write + drop ; + +: write-5op ( insn -- ) + dup write-4op ", " write + dup d>> write + drop ; + +: write-ftz ( insn -- ) + ftz?>> [ ".ftz" write ] when ; + +: write-sat ( insn -- ) + sat?>> [ ".sat" write ] when ; + +: write-float-env ( insn -- ) + dup round>> (write-ptx-element) + write-ftz ; + +: write-int-addsub ( insn -- ) + dup write-sat + dup cc?>> [ ".cc" write ] when + write-3op ; + +: write-addsub ( insn -- ) + dup write-float-env + write-int-addsub ; + +: write-ldst ( insn -- ) + dup volatile?>> [ ".volatile" write ] when + dup storage-space>> (write-ptx-element) + dup cache-op>> (write-ptx-element) + write-2op ; + +: (write-mul) ( insn -- ) + dup mode>> (write-ptx-element) + drop ; + +: write-mul ( insn -- ) + dup write-float-env + dup (write-mul) + write-3op ; + +: write-mad ( insn -- ) + dup write-float-env + dup (write-mul) + dup write-sat + write-4op ; + +: write-uni ( insn -- ) + uni?>> [ ".uni" write ] when ; + +: write-set ( insn -- ) + dup cmp-op>> (write-ptx-element) + dup bool-op>> (write-ptx-element) + write-ftz ; + +M: abs (write-ptx-element) + "abs" write-insn + dup write-ftz + write-2op ; +M: add (write-ptx-element) + "add" write-insn + write-addsub ; +M: addc (write-ptx-element) + "addc" write-insn + write-int-addsub ; +M: and (write-ptx-element) + "and" write-insn + write-3op ; +M: atom (write-ptx-element) + "atom" write-insn + dup storage-space>> (write-ptx-element) + dup op>> (write-ptx-element) + dup write-3op + c>> [ ", " write write ] when* ; +M: bar.arrive (write-ptx-element) + "bar.arrive " write-insn + dup a>> write ", " write + dup b>> write + drop ; +M: bar.red (write-ptx-element) + "bar.red" write-insn + dup op>> (write-ptx-element) + dup write-2op + dup b>> [ ", " write write ] when* + ", " write c>> write ; +M: bar.sync (write-ptx-element) + "bar.arrive " write-insn + dup a>> write + dup b>> [ ", " write write ] when* + drop ; +M: bfe (write-ptx-element) + "bfe" write-insn + write-4op ; +M: bfi (write-ptx-element) + "bfi" write-insn + write-5op ; +M: bfind (write-ptx-element) + "bfind" write-insn + dup shiftamt?>> [ ".shiftamt" write ] when + write-2op ; +M: bra (write-ptx-element) + "bra" write-insn + dup write-uni + " " write target>> write ; +M: brev (write-ptx-element) + "brev" write-insn + write-2op ; +M: brkpt (write-ptx-element) + "brkpt" write-insn drop ; +M: call (write-ptx-element) + "call" write-insn " " write + dup return>> [ "(" write write "), " write ] when* + dup target>> write + dup params>> [ ", (" write ", " join write ")" write ] unless-empty + drop ; +M: clz (write-ptx-element) + "clz" write-insn + write-2op ; +M: cnot (write-ptx-element) + "cnot" write-insn + write-2op ; +M: copysign (write-ptx-element) + "copysign" write-insn + write-3op ; +M: cos (write-ptx-element) + "cos" write-insn + dup write-float-env + write-2op ; +M: cvt (write-ptx-element) + "cvt" write-insn + dup rounding-mode>> (write-ptx-element) + dup write-ftz + dup write-sat + dup dest-type>> (write-ptx-element) + write-2op ; +M: cvta (write-ptx-element) + "cvta" write-insn + dup to?>> [ ".to" write ] when + dup storage-space>> (write-ptx-element) + write-2op ; +M: div (write-ptx-element) + "div" write-insn + dup write-float-env + write-3op ; +M: ex2 (write-ptx-element) + "ex2" write-insn + dup write-float-env + write-2op ; +M: exit (write-ptx-element) + "exit" write-insn drop ; +M: fma (write-ptx-element) + "fma" write-insn + write-mad ; +M: isspacep (write-ptx-element) + "isspacep" write-insn + dup storage-space>> (write-ptx-element) + " " write + dup dest>> write ", " write a>> write ; +M: ld (write-ptx-element) + "ld" write-insn + write-ldst ; +M: ldu (write-ptx-element) + "ldu" write-insn + write-ldst ; +M: lg2 (write-ptx-element) + "lg2" write-insn + dup write-float-env + write-2op ; +M: mad (write-ptx-element) + "mad" write-insn + write-mad ; +M: mad24 (write-ptx-element) + "mad24" write-insn + dup (write-mul) + dup write-sat + write-4op ; +M: max (write-ptx-element) + "max" write-insn + dup write-ftz + write-3op ; +M: membar (write-ptx-element) + "membar" write-insn + dup level>> (write-ptx-element) + drop ; +M: min (write-ptx-element) + "min" write-insn + dup write-ftz + write-3op ; +M: mov (write-ptx-element) + "mov" write-insn + write-2op ; +M: mul (write-ptx-element) + "mul" write-insn + write-mul ; +M: mul24 (write-ptx-element) + "mul24" write-insn + dup (write-mul) + write-3op ; +M: neg (write-ptx-element) + "neg" write-insn + dup write-ftz + write-2op ; +M: not (write-ptx-element) + "not" write-insn + write-2op ; +M: or (write-ptx-element) + "or" write-insn + write-3op ; +M: pmevent (write-ptx-element) + "pmevent" write-insn " " write a>> write ; +M: popc (write-ptx-element) + "popc" write-insn + write-2op ; +M: prefetch (write-ptx-element) + "prefetch" write-insn + dup storage-space>> (write-ptx-element) + dup level>> (write-ptx-element) + " " write a>> write ; +M: prefetchu (write-ptx-element) + "prefetchu" write-insn + dup level>> (write-ptx-element) + " " write a>> write ; +M: prmt (write-ptx-element) + "prmt" write-insn + dup mode>> (write-ptx-element) + write-4op ; +M: rcp (write-ptx-element) + "rcp" write-insn + dup write-float-env + write-3op ; +M: red (write-ptx-element) + "red" write-insn + dup storage-space>> (write-ptx-element) + dup op>> (write-ptx-element) + write-2op ; +M: rem (write-ptx-element) + "rem" write-insn + write-3op ; +M: ret (write-ptx-element) + "ret" write-insn drop ; +M: rsqrt (write-ptx-element) + "rsqrt" write-insn + dup write-float-env + write-2op ; +M: sad (write-ptx-element) + "sad" write-insn + write-4op ; +M: selp (write-ptx-element) + "selp" write-insn + write-4op ; +M: set (write-ptx-element) + "set" write-insn + dup write-set + dup dest-type>> (write-ptx-element) + dup write-3op + c>> [ ", " write write ] when* ; +M: setp (write-ptx-element) + "setp" write-insn + dup write-set + dup type>> (write-ptx-element) " " write + dup dest>> write + dup |dest>> [ "|" write write ] when* ", " write + dup a>> write ", " write + dup b>> write + c>> [ ", " write write ] when* ; +M: shl (write-ptx-element) + "shl" write-insn + write-3op ; +M: shr (write-ptx-element) + "shr" write-insn + write-3op ; +M: sin (write-ptx-element) + "sin" write-insn + dup write-float-env + write-2op ; +M: slct (write-ptx-element) + "slct" write-insn + dup write-ftz + dup dest-type>> (write-ptx-element) + write-4op ; +M: sqrt (write-ptx-element) + "sqrt" write-insn + dup write-float-env + write-2op ; +M: st (write-ptx-element) + "st" write-insn + write-ldst ; +M: sub (write-ptx-element) + "sub" write-insn + write-addsub ; +M: subc (write-ptx-element) + "subc" write-insn + write-int-addsub ; +M: testp (write-ptx-element) + "testp" write-insn + dup op>> (write-ptx-element) + write-2op ; +M: vote (write-ptx-element) + "vote" write-insn + dup mode>> (write-ptx-element) + write-2op ; +M: xor (write-ptx-element) + "or" write-insn + write-3op ; diff --git a/extra/mason/config/config.factor b/extra/mason/config/config.factor index 5ec44df0a9..48f4d307c8 100644 --- a/extra/mason/config/config.factor +++ b/extra/mason/config/config.factor @@ -1,4 +1,4 @@ -! Copyright (C) 2008 Eduardo Cavazos, Slava Pestov. +! Copyright (C) 2008, 2010 Eduardo Cavazos, Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: system io.files io.pathnames namespaces kernel accessors assocs ; @@ -39,11 +39,11 @@ target-os get-global [ ! Keep test-log around? SYMBOL: builder-debug -! Host to send status notifications to. -SYMBOL: status-host +! URL for status notifications. +SYMBOL: status-url -! Username to log in. -SYMBOL: status-username +! Password for status notifications. +SYMBOL: status-secret SYMBOL: upload-help? diff --git a/extra/mason/notify/notify.factor b/extra/mason/notify/notify.factor index d7319c0f20..144f0de122 100644 --- a/extra/mason/notify/notify.factor +++ b/extra/mason/notify/notify.factor @@ -1,57 +1,50 @@ ! Copyright (C) 2009, 2010 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: arrays accessors io io.sockets io.encodings.utf8 io.files -io.launcher kernel make mason.config mason.common mason.email -mason.twitter namespaces sequences prettyprint fry ; +USING: accessors fry http.client io io.encodings.utf8 io.files +kernel mason.common mason.config mason.email mason.twitter +namespaces prettyprint sequences ; IN: mason.notify -: status-notify ( input-file args -- ) - status-host get [ - [ - "ssh" , status-host get , "-l" , status-username get , - "./mason-notify" , - short-host-name , - target-cpu get , - target-os get , - ] { } make prepend - [ 5 ] 2dip '[ - - _ >>stdin - _ >>command - short-running-process - ] retry - ] [ 2drop ] if ; +: status-notify ( report arg message -- ) + [ + short-host-name "host-name" set + target-cpu get "target-cpu" set + target-os get "target-os" set + status-secret get "secret" set + "message" set + "arg" set + "report" set + ] H{ } make-assoc + [ 5 ] dip '[ _ status-url get http-post 2drop ] retry ; : notify-heartbeat ( -- ) - f { "heartbeat" } status-notify ; + f f "heartbeat" status-notify ; : notify-begin-build ( git-id -- ) [ "Starting build of GIT ID " write print flush ] - [ f swap "git-id" swap 2array status-notify ] + [ f swap "git-id" status-notify ] bi ; : notify-make-vm ( -- ) "Compiling VM" print flush - f { "make-vm" } status-notify ; + f f "make-vm" status-notify ; : notify-boot ( -- ) "Bootstrapping" print flush - f { "boot" } status-notify ; + f f "boot" status-notify ; : notify-test ( -- ) "Running tests" print flush - f { "test" } status-notify ; + f f "test" status-notify ; : notify-report ( status -- ) [ "Build finished with status: " write . flush ] [ - [ "report" ] dip - [ [ utf8 file-contents ] dip email-report ] - [ "report" swap name>> 2array status-notify ] - 2bi + [ "report" utf8 file-contents ] dip + [ name>> "report" status-notify ] [ email-report ] 2bi ] bi ; : notify-release ( archive-name -- ) [ "Uploaded " prepend [ print flush ] [ mason-tweet ] bi ] - [ f swap "release" swap 2array status-notify ] + [ f swap "release" status-notify ] bi ; diff --git a/extra/mason/server/notify/authors.txt b/extra/mason/server/notify/authors.txt deleted file mode 100644 index d4f5d6b3ae..0000000000 --- a/extra/mason/server/notify/authors.txt +++ /dev/null @@ -1 +0,0 @@ -Slava Pestov \ No newline at end of file diff --git a/extra/mason/server/notify/notify.factor b/extra/mason/server/notify/notify.factor deleted file mode 100644 index bfa1027d92..0000000000 --- a/extra/mason/server/notify/notify.factor +++ /dev/null @@ -1,80 +0,0 @@ -! Copyright (C) 2009, 2010 Slava Pestov. -! See http://factorcode.org/license.txt for BSD license. -USING: accessors calendar combinators combinators.smart -command-line db.tuples io io.encodings.utf8 io.files kernel -mason.server namespaces present sequences ; -IN: mason.server.notify - -SYMBOLS: host-name target-os target-cpu message message-arg ; - -: parse-args ( command-line -- ) - dup last message-arg set - [ - { - [ host-name set ] - [ target-cpu set ] - [ target-os set ] - [ message set ] - } spread - ] input>host-name - target-os get >>os - target-cpu get >>cpu - dup select-tuple [ ] [ dup insert-tuple ] ?if ; - -: heartbeat ( builder -- ) now >>heartbeat-timestamp drop ; - -: git-id ( builder id -- ) >>current-git-id +starting+ >>status drop ; - -: make-vm ( builder -- ) +make-vm+ >>status drop ; - -: boot ( builder -- ) +boot+ >>status drop ; - -: test ( builder -- ) +test+ >>status drop ; - -: report ( builder status content -- ) - [ >>status ] [ >>last-report ] bi* - dup status>> +clean+ = [ - dup current-git-id>> >>clean-git-id - dup current-timestamp>> >>clean-timestamp - ] when - dup current-git-id>> >>last-git-id - dup current-timestamp>> >>last-timestamp - drop ; - -: release ( builder name -- ) - >>last-release - dup clean-git-id>> >>release-git-id - drop ; - -: update-builder ( builder -- ) - message get { - { "heartbeat" [ heartbeat ] } - { "git-id" [ message-arg get git-id ] } - { "make-vm" [ make-vm ] } - { "boot" [ boot ] } - { "test" [ test ] } - { "report" [ message-arg get contents report ] } - { "release" [ message-arg get release ] } - } case ; - -: handle-update ( command-line timestamp -- ) - [ - [ parse-args find-builder ] dip >>current-timestamp - [ update-builder ] [ update-tuple ] bi - ] with-mason-db ; - -CONSTANT: log-file "resource:mason.log" - -: log-update ( command-line timestamp -- ) - log-file utf8 [ - present write ": " write " " join print - ] with-file-appender ; - -: main ( -- ) - command-line get now [ log-update ] [ handle-update ] 2bi ; - -MAIN: main diff --git a/extra/mason/server/server.factor b/extra/mason/server/server.factor index 26be4df57c..d0fe29b917 100644 --- a/extra/mason/server/server.factor +++ b/extra/mason/server/server.factor @@ -17,8 +17,7 @@ clean-git-id clean-timestamp last-release release-git-id last-git-id last-timestamp last-report current-git-id current-timestamp -status -heartbeat-timestamp ; +status ; builder "BUILDERS" { { "host-name" "HOST_NAME" TEXT +user-assigned-id+ } @@ -39,8 +38,6 @@ builder "BUILDERS" { ! Can't name it CURRENT_TIMESTAMP because of bug in db library { "current-timestamp" "CURR_TIMESTAMP" TIMESTAMP } { "status" "STATUS" TEXT } - - { "heartbeat-timestamp" "HEARTBEAT_TIMESTAMP" TIMESTAMP } } define-persistent : mason-db ( -- db ) "resource:mason.db" ; diff --git a/extra/mason/version/files/files.factor b/extra/mason/version/files/files.factor index 1335885c3d..ba09c6274c 100644 --- a/extra/mason/version/files/files.factor +++ b/extra/mason/version/files/files.factor @@ -10,9 +10,6 @@ IN: mason.version.files : remote-directory ( string -- string' ) [ upload-directory get ] dip "/" glue ; -: remote ( string version -- string ) - remote-directory swap "/" glue ; - : platform ( builder -- string ) [ os>> ] [ cpu>> ] bi (platform) ; @@ -30,10 +27,10 @@ IN: mason.version.files ] [ drop ] 2bi release-directory ; : remote-binary-release-name ( version builder -- string ) - [ binary-release-name ] [ drop ] 2bi remote ; + binary-release-name remote-directory ; : source-release-name ( version -- string ) [ "factor-src-" ".zip" surround ] keep release-directory ; : remote-source-release-name ( version -- string ) - [ source-release-name ] keep remote ; + source-release-name remote-directory ; diff --git a/extra/mason/version/version.factor b/extra/mason/version/version.factor index a2093124f7..bb0fcbf2c3 100644 --- a/extra/mason/version/version.factor +++ b/extra/mason/version/version.factor @@ -13,7 +13,7 @@ IN: mason.version : make-release-directory ( version -- ) "Creating release directory..." print flush - [ "mkdir -p " % "" release-directory % "\n" % ] "" make + [ "mkdir -p " % "" release-directory remote-directory % "\n" % ] "" make execute-on-server ; : tweet-release ( version announcement-url -- ) diff --git a/extra/webapps/mason/download-package.xml b/extra/webapps/mason/download-package.xml index cff9dbe789..27102056f8 100644 --- a/extra/webapps/mason/download-package.xml +++ b/extra/webapps/mason/download-package.xml @@ -28,7 +28,7 @@ - + diff --git a/extra/webapps/mason/make-release/make-release.factor b/extra/webapps/mason/make-release/make-release.factor index c90aaad297..e7cd13a895 100644 --- a/extra/webapps/mason/make-release/make-release.factor +++ b/extra/webapps/mason/make-release/make-release.factor @@ -6,7 +6,12 @@ IN: webapps.mason.make-release : ( -- action ) - [ { { "version" [ v-one-line ] } } validate-params ] >>validate + [ + { + { "version" [ v-one-line ] } + { "announcement-url" [ v-url ] } + } validate-params + ] >>validate [ [ "version" value "announcement-url" value do-release diff --git a/extra/webapps/mason/mason.factor b/extra/webapps/mason/mason.factor index ecb1348532..81eb36a17d 100644 --- a/extra/webapps/mason/mason.factor +++ b/extra/webapps/mason/mason.factor @@ -4,7 +4,7 @@ USING: accessors furnace.auth furnace.db http.server.dispatchers mason.server webapps.mason.grids webapps.mason.make-release webapps.mason.package webapps.mason.release webapps.mason.report -webapps.mason.downloads ; +webapps.mason.downloads webapps.mason.status-update ; IN: webapps.mason TUPLE: mason-app < dispatcher ; @@ -35,5 +35,7 @@ can-make-releases? define-capability "make releases" >>description { can-make-releases? } >>capabilities + "make-release" add-responder - "make-release" add-responder ; + + "status-update" add-responder ; diff --git a/extra/webapps/mason/package/package.factor b/extra/webapps/mason/package/package.factor index 5c36a7f23a..504ba7093f 100644 --- a/extra/webapps/mason/package/package.factor +++ b/extra/webapps/mason/package/package.factor @@ -66,7 +66,7 @@ IN: webapps.mason.package [ current-status "status" set-value ] [ last-build-status "last-build" set-value ] [ clean-build-status "last-clean-build" set-value ] - [ heartbeat-timestamp>> "heartbeat-timestamp" set-value ] + [ current-timestamp>> "current-timestamp" set-value ] [ packages-link "binaries" set-value ] [ clean-image-link "clean-images" set-value ] [ report-link "last-report" set-value ] diff --git a/extra/webapps/mason/status-update/authors.txt b/extra/webapps/mason/status-update/authors.txt new file mode 100644 index 0000000000..1901f27a24 --- /dev/null +++ b/extra/webapps/mason/status-update/authors.txt @@ -0,0 +1 @@ +Slava Pestov diff --git a/extra/webapps/mason/status-update/status-update.factor b/extra/webapps/mason/status-update/status-update.factor new file mode 100644 index 0000000000..5156b1ef70 --- /dev/null +++ b/extra/webapps/mason/status-update/status-update.factor @@ -0,0 +1,74 @@ +! Copyright (C) 2010 Slava Pestov. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors calendar combinators db.tuples furnace.actions +furnace.redirection html.forms http.server.responses io kernel +mason.config mason.server namespaces validators ; +IN: webapps.mason.status-update + +: find-builder ( -- builder ) + builder new + "host-name" value >>host-name + "target-os" value >>os + "target-cpu" value >>cpu + dup select-tuple [ ] [ dup insert-tuple ] ?if ; + +: git-id ( builder id -- ) >>current-git-id +starting+ >>status drop ; + +: make-vm ( builder -- ) +make-vm+ >>status drop ; + +: boot ( builder -- ) +boot+ >>status drop ; + +: test ( builder -- ) +test+ >>status drop ; + +: report ( builder status content -- ) + [ >>status ] [ >>last-report ] bi* + dup status>> +clean+ = [ + dup current-git-id>> >>clean-git-id + dup current-timestamp>> >>clean-timestamp + ] when + dup current-git-id>> >>last-git-id + dup current-timestamp>> >>last-timestamp + drop ; + +: release ( builder name -- ) + >>last-release + dup clean-git-id>> >>release-git-id + drop ; + +: update-builder ( builder -- ) + "message" value { + { "heartbeat" [ drop ] } + { "git-id" [ "arg" value git-id ] } + { "make-vm" [ make-vm ] } + { "boot" [ boot ] } + { "test" [ test ] } + { "report" [ "arg" value "report" value report ] } + { "release" [ "arg" value release ] } + } case ; + +: ( -- action ) + + [ + { + { "host-name" [ v-one-line ] } + { "target-cpu" [ v-one-line ] } + { "target-os" [ v-one-line ] } + { "message" [ v-one-line ] } + { "arg" [ [ v-one-line ] v-optional ] } + { "report" [ ] } + { "secret" [ v-one-line ] } + } validate-params + + "secret" value status-secret get = [ validation-failed ] unless + ] >>validate + + [ + [ + [ + find-builder + now >>current-timestamp + [ update-builder ] [ update-tuple ] bi + ] with-mason-db + "OK" "text/html" + ] if-secure + ] >>submit ; diff --git a/extra/z-algorithm/authors.txt b/extra/z-algorithm/authors.txt new file mode 100644 index 0000000000..e1702c7130 --- /dev/null +++ b/extra/z-algorithm/authors.txt @@ -0,0 +1 @@ +Dmitry Shubin diff --git a/extra/z-algorithm/summary.txt b/extra/z-algorithm/summary.txt new file mode 100644 index 0000000000..c7fadf9e81 --- /dev/null +++ b/extra/z-algorithm/summary.txt @@ -0,0 +1 @@ +Z algorithm for pattern preprocessing diff --git a/extra/z-algorithm/tags.txt b/extra/z-algorithm/tags.txt new file mode 100644 index 0000000000..49b4f2328e --- /dev/null +++ b/extra/z-algorithm/tags.txt @@ -0,0 +1 @@ +algorithms diff --git a/extra/z-algorithm/z-algorithm-docs.factor b/extra/z-algorithm/z-algorithm-docs.factor new file mode 100644 index 0000000000..395dd4952d --- /dev/null +++ b/extra/z-algorithm/z-algorithm-docs.factor @@ -0,0 +1,49 @@ +! Copyright (C) 2010 Dmitry Shubin. +! See http://factorcode.org/license.txt for BSD license. +USING: arrays help.markup help.syntax sequences ; +IN: z-algorithm + +HELP: lcp +{ $values + { "seq1" sequence } { "seq2" sequence } + { "n" "a non-negative integer" } +} +{ $description + "Outputs the length of longest common prefix of two sequences." +} ; + +HELP: z-values +{ $values + { "seq" sequence } { "Z" array } +} +{ $description + "Outputs an array of the same length as " { $snippet "seq" } + ", containing Z-values for given sequence. See " + { $link "z-algorithm" } " for details." +} ; + +ARTICLE: "z-algorithm" "Z algorithm" +{ $heading "Definition" } +"Given the sequence " { $snippet "S" } " and the index " +{ $snippet "i" } ", let " { $snippet "i" } "-th Z value of " +{ $snippet "S" } " be the length of the longest subsequence of " +{ $snippet "S" } " that starts at " { $snippet "i" } +" and matches the prefix of " { $snippet "S" } "." + +{ $heading "Example" } +"Here is an example for string " { $snippet "\"abababaca\"" } ":" +{ $table + { { $snippet "i:" } "0" "1" "2" "3" "4" "5" "6" "7" "8" } + { { $snippet "S:" } "a" "b" "a" "b" "a" "b" "a" "c" "a" } + { { $snippet "Z:" } "9" "0" "5" "0" "3" "0" "1" "0" "1" } +} + +{ $heading "Summary" } +"The " { $vocab-link "z-algorithm" } +" vocabulary implements algorithm for finding all Z values for sequence " +{ $snippet "S" } +" in linear time. In contrast to naive approach which takes " +{ $snippet "Θ(n^2)" } " time." +; + +ABOUT: "z-algorithm" diff --git a/extra/z-algorithm/z-algorithm-tests.factor b/extra/z-algorithm/z-algorithm-tests.factor new file mode 100644 index 0000000000..8a8fd97480 --- /dev/null +++ b/extra/z-algorithm/z-algorithm-tests.factor @@ -0,0 +1,13 @@ +! Copyright (C) 2010 Dmitry Shubin. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.test z-algorithm ; +IN: z-algorithm.tests + +[ 0 ] [ "qwerty" "" lcp ] unit-test +[ 0 ] [ "qwerty" "asdf" lcp ] unit-test +[ 3 ] [ "qwerty" "qwe" lcp ] unit-test +[ 3 ] [ "qwerty" "qwet" lcp ] unit-test + +[ { } ] [ "" z-values ] unit-test +[ { 1 } ] [ "q" z-values ] unit-test +[ { 9 0 5 0 3 0 1 0 1 } ] [ "abababaca" z-values ] unit-test diff --git a/extra/z-algorithm/z-algorithm.factor b/extra/z-algorithm/z-algorithm.factor new file mode 100644 index 0000000000..bd312755a3 --- /dev/null +++ b/extra/z-algorithm/z-algorithm.factor @@ -0,0 +1,38 @@ +! Copyright (C) 2010 Dmitry Shubin. +! See http://factorcode.org/license.txt for BSD license. +USING: arrays combinators.smart kernel locals math math.ranges +sequences sequences.private ; +IN: z-algorithm + +: lcp ( seq1 seq2 -- n ) + [ min-length ] 2keep mismatch [ nip ] when* ; + + Zk + Zk Z push seq Z + Zk 0 > [ k Zk k + 1 - ] [ l r ] if ; inline + +:: inside-zbox ( seq Z l r k -- seq Z l r ) + k l - Z nth :> Zk' + r k - 1 + :> b + seq Z Zk' b < + [ Zk' Z push l r ] ! still inside + [ + seq r 1 + seq b [ tail-slice ] 2bi@ lcp :> q + q b + Z push k q r + + ] if ; inline + +: (z-value) ( seq Z l r k -- seq Z l r ) + 2dup < [ out-of-zbox ] [ inside-zbox ] if ; inline + +:: (z-values) ( seq -- Z ) + V{ } clone 0 0 seq length :> ( Z l r len ) + len Z push [ seq Z l r 1 len [a,b) [ (z-value) ] each ] + drop-outputs Z ; inline + +PRIVATE> + +: z-values ( seq -- Z ) + dup length 0 > [ (z-values) ] when >array ; diff --git a/misc/fuel/fuel-mode.el b/misc/fuel/fuel-mode.el index 6f42b4efc4..98aad10e22 100644 --- a/misc/fuel/fuel-mode.el +++ b/misc/fuel/fuel-mode.el @@ -174,8 +174,11 @@ interacting with a factor listener is at your disposal. (setq fuel-stack-mode-string "/S") (when fuel-mode-stack-p (fuel-stack-mode fuel-mode)) - (when (and fuel-mode (not (file-exists-p (buffer-file-name)))) - (fuel-scaffold--maybe-insert))) + (let ((file-name (buffer-file-name))) + (when (and fuel-mode + file-name + (not (file-exists-p file-name))) + (fuel-scaffold--maybe-insert)))) ;;; Keys:
Host name:
Last heartbeat:
Last heartbeat:
Current status:
Last build:
Last clean build: