From e4a2b671d30beda698b120e35a3c440d2bd9ef84 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 24 Nov 2008 22:16:29 -0600 Subject: [PATCH 1/9] remove flags from transition-tables --- basis/regexp/transition-tables/transition-tables.factor | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/basis/regexp/transition-tables/transition-tables.factor b/basis/regexp/transition-tables/transition-tables.factor index 80317a1b66..5375d813e1 100644 --- a/basis/regexp/transition-tables/transition-tables.factor +++ b/basis/regexp/transition-tables/transition-tables.factor @@ -20,18 +20,19 @@ TUPLE: default ; : ( from to obj -- transition ) literal-transition make-transition ; + : ( from to obj -- transition ) class-transition make-transition ; + : ( from to -- transition ) t default-transition make-transition ; -TUPLE: transition-table transitions start-state final-states flags ; +TUPLE: transition-table transitions start-state final-states ; : ( -- transition-table ) transition-table new H{ } clone >>transitions - H{ } clone >>final-states - H{ } clone >>flags ; + H{ } clone >>final-states ; : maybe-initialize-key ( key hashtable -- ) 2dup key? [ 2drop ] [ H{ } clone -rot set-at ] if ; From 384a11eceec67996d05c67e73aeea0ef459ca52a Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 24 Nov 2008 22:17:47 -0600 Subject: [PATCH 2/9] move things around. the parser no longer adjusts nodes based on options, instead opting for nfa to handle it (case-insensitive, multiline, dotall, reversed..) --- basis/regexp/backend/backend.factor | 5 +- basis/regexp/classes/classes.factor | 25 ++++- basis/regexp/nfa/nfa.factor | 85 +++++++++++------ basis/regexp/parser/parser-tests.factor | 4 +- basis/regexp/parser/parser.factor | 118 ++++++++++-------------- basis/regexp/regexp-tests.factor | 39 ++++---- basis/regexp/regexp.factor | 21 +---- basis/regexp/utils/utils.factor | 17 ---- 8 files changed, 158 insertions(+), 156 deletions(-) diff --git a/basis/regexp/backend/backend.factor b/basis/regexp/backend/backend.factor index 4c82876650..5eff0579c8 100644 --- a/basis/regexp/backend/backend.factor +++ b/basis/regexp/backend/backend.factor @@ -5,12 +5,13 @@ IN: regexp.backend TUPLE: regexp raw - { stack vector } - parse-tree { options hashtable } + stack + parse-tree nfa-table dfa-table minimized-table + matchers { nfa-traversal-flags hashtable } { dfa-traversal-flags hashtable } { state integer } diff --git a/basis/regexp/classes/classes.factor b/basis/regexp/classes/classes.factor index f143bebdf7..eec0d309b1 100644 --- a/basis/regexp/classes/classes.factor +++ b/basis/regexp/classes/classes.factor @@ -1,12 +1,25 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors kernel math math.order symbols regexp.parser +USING: accessors kernel math math.order symbols words regexp.utils unicode.categories combinators.short-circuit ; IN: regexp.classes +SINGLETONS: any-char any-char-no-nl +letter-class LETTER-class Letter-class digit-class +alpha-class non-newline-blank-class +ascii-class punctuation-class java-printable-class blank-class +control-character-class hex-digit-class java-blank-class c-identifier-class +unmatchable-class terminator-class word-boundary-class ; + +SINGLETONS: beginning-of-input beginning-of-line +end-of-input end-of-line ; + +MIXIN: node +TUPLE: character-class-range from to ; INSTANCE: character-class-range node + GENERIC: class-member? ( obj class -- ? ) -M: word class-member? ( obj class -- ? ) 2drop f ; +M: t class-member? ( obj class -- ? ) 2drop f ; M: integer class-member? ( obj class -- ? ) 2drop f ; @@ -18,7 +31,7 @@ M: any-char class-member? ( obj class -- ? ) M: any-char-no-nl class-member? ( obj class -- ? ) drop CHAR: \n = not ; - + M: letter-class class-member? ( obj class -- ? ) drop letter? ; @@ -70,3 +83,9 @@ M: terminator-class class-member? ( obj class -- ? ) [ CHAR: \u002028 = ] [ CHAR: \u002029 = ] } 1|| ; + +M: beginning-of-line class-member? ( obj class -- ? ) + 2drop f ; + +M: end-of-line class-member? ( obj class -- ? ) + 2drop f ; diff --git a/basis/regexp/nfa/nfa.factor b/basis/regexp/nfa/nfa.factor index 99d94b4bcb..7620652948 100644 --- a/basis/regexp/nfa/nfa.factor +++ b/basis/regexp/nfa/nfa.factor @@ -3,7 +3,7 @@ USING: accessors arrays assocs grouping kernel regexp.backend locals math namespaces regexp.parser sequences fry quotations math.order math.ranges vectors unicode.categories regexp.utils -regexp.transition-tables words sets ; +regexp.transition-tables words sets regexp.classes unicode.case ; IN: regexp.nfa SYMBOL: negation-mode @@ -22,8 +22,13 @@ SINGLETON: front-anchor INSTANCE: front-anchor traversal-flag SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag -: add-global-flag ( flag -- ) - current-regexp get nfa-table>> flags>> conjoin ; +: options ( -- obj ) current-regexp get options>> ; + +: option? ( obj -- ? ) options key? ; + +: option-on ( obj -- ) options conjoin ; + +: option-off ( obj -- ) options delete-at ; : next-state ( regexp -- state ) [ state>> ] [ [ 1+ ] change-state drop ] bi ; @@ -106,6 +111,7 @@ M: kleene-star nfa-node ( node -- ) M: concatenation nfa-node ( node -- ) seq>> + reversed-regexp option? [ ] when [ [ nfa-node ] each ] [ length 1- [ concatenate-nodes ] times ] bi ; @@ -115,16 +121,59 @@ M: alternation nfa-node ( node -- ) [ length 1- [ alternate-nodes ] times ] bi ; M: constant nfa-node ( node -- ) - char>> literal-transition add-simple-entry ; + case-insensitive option? [ + dup char>> [ ch>lower ] [ ch>upper ] bi + 2dup = [ + 2drop + char>> literal-transition add-simple-entry + ] [ + [ literal-transition add-simple-entry ] bi@ + alternate-nodes drop + ] if + ] [ + char>> literal-transition add-simple-entry + ] if ; M: epsilon nfa-node ( node -- ) drop eps literal-transition add-simple-entry ; -M: word nfa-node ( node -- ) +M: word nfa-node ( node -- ) class-transition add-simple-entry ; + +M: any-char nfa-node ( node -- ) + [ dotall option? ] dip any-char-no-nl ? class-transition add-simple-entry ; +! M: beginning-of-text nfa-node ( node -- ) ; + +M: beginning-of-line nfa-node ( node -- ) class-transition add-simple-entry ; + +M: end-of-line nfa-node ( node -- ) class-transition add-simple-entry ; + +: choose-letter-class ( node -- node' ) + case-insensitive option? Letter-class rot ? ; + +M: letter-class nfa-node ( node -- ) + choose-letter-class class-transition add-simple-entry ; + +M: LETTER-class nfa-node ( node -- ) + choose-letter-class class-transition add-simple-entry ; + M: character-class-range nfa-node ( node -- ) - class-transition add-simple-entry ; + case-insensitive option? [ + dup [ from>> ] [ to>> ] bi + 2dup [ Letter? ] bi@ and [ + rot drop + [ [ ch>lower ] bi@ character-class-range boa ] + [ [ ch>upper ] bi@ character-class-range boa ] 2bi + [ class-transition add-simple-entry ] bi@ + alternate-nodes + ] [ + 2drop + class-transition add-simple-entry + ] if + ] [ + class-transition add-simple-entry + ] if ; M: capture-group nfa-node ( node -- ) eps literal-transition add-simple-entry @@ -141,26 +190,6 @@ M: non-capture-group nfa-node ( node -- ) M: reluctant-kleene-star nfa-node ( node -- ) term>> nfa-node ; -M: beginning-of-line nfa-node ( node -- ) - drop - eps literal-transition add-simple-entry - beginning-of-line add-global-flag ; - -M: end-of-line nfa-node ( node -- ) - drop - eps literal-transition add-simple-entry - end-of-line add-global-flag ; - -M: beginning-of-input nfa-node ( node -- ) - drop - eps literal-transition add-simple-entry - beginning-of-input add-global-flag ; - -M: end-of-input nfa-node ( node -- ) - drop - eps literal-transition add-simple-entry - end-of-input add-global-flag ; - M: negation nfa-node ( node -- ) negation-mode inc term>> nfa-node @@ -182,6 +211,10 @@ M: lookbehind nfa-node ( node -- ) lookbehind-off add-traversal-flag 2 [ concatenate-nodes ] times ; +M: option nfa-node ( node -- ) + [ option>> ] [ on?>> ] bi [ option-on ] [ option-off ] if + eps literal-transition add-simple-entry ; + : construct-nfa ( regexp -- ) [ reset-regexp diff --git a/basis/regexp/parser/parser-tests.factor b/basis/regexp/parser/parser-tests.factor index 0f25b2e3bf..fe4d2f1d1a 100644 --- a/basis/regexp/parser/parser-tests.factor +++ b/basis/regexp/parser/parser-tests.factor @@ -19,8 +19,8 @@ IN: regexp.parser [ ] [ "(?:a)" test-regexp ] unit-test [ ] [ "(?i:a)" test-regexp ] unit-test [ ] [ "(?-i:a)" test-regexp ] unit-test -[ "(?z:a)" test-regexp ] [ bad-option? ] must-fail-with -[ "(?-z:a)" test-regexp ] [ bad-option? ] must-fail-with +[ "(?z:a)" test-regexp ] [ unknown-regexp-option? ] must-fail-with +[ "(?-z:a)" test-regexp ] [ unknown-regexp-option? ] must-fail-with [ ] [ "(?=a)" test-regexp ] unit-test diff --git a/basis/regexp/parser/parser.factor b/basis/regexp/parser/parser.factor index 71a3e067f3..4d8f3ddfbc 100644 --- a/basis/regexp/parser/parser.factor +++ b/basis/regexp/parser/parser.factor @@ -4,12 +4,11 @@ USING: accessors arrays assocs combinators io io.streams.string kernel math math.parser namespaces qualified sets quotations sequences splitting symbols vectors math.order unicode.categories strings regexp.backend regexp.utils -unicode.case words locals ; +unicode.case words locals regexp.classes ; IN: regexp.parser FROM: math.ranges => [a,b] ; -MIXIN: node TUPLE: concatenation seq ; INSTANCE: concatenation node TUPLE: alternation seq ; INSTANCE: alternation node TUPLE: kleene-star term ; INSTANCE: kleene-star node @@ -40,38 +39,31 @@ INSTANCE: independent-group parentheses-group TUPLE: comment-group term ; INSTANCE: comment-group node INSTANCE: comment-group parentheses-group -TUPLE: character-class-range from to ; INSTANCE: character-class-range node SINGLETON: epsilon INSTANCE: epsilon node -SINGLETON: any-char INSTANCE: any-char node -SINGLETON: any-char-no-nl INSTANCE: any-char-no-nl node -SINGLETON: beginning-of-input INSTANCE: beginning-of-input node -SINGLETON: end-of-input INSTANCE: end-of-input node -SINGLETON: beginning-of-line INSTANCE: beginning-of-line node -SINGLETON: end-of-line INSTANCE: end-of-line node -TUPLE: option-on option ; INSTANCE: option-on node -TUPLE: option-off option ; INSTANCE: option-off node +TUPLE: option option on? ; INSTANCE: option node + SINGLETONS: unix-lines dotall multiline comments case-insensitive unicode-case reversed-regexp ; -SINGLETONS: letter-class LETTER-class Letter-class digit-class -alpha-class non-newline-blank-class -ascii-class punctuation-class java-printable-class blank-class -control-character-class hex-digit-class java-blank-class c-identifier-class -unmatchable-class terminator-class word-boundary-class ; - -SINGLETONS: beginning-of-group end-of-group -beginning-of-character-class end-of-character-class +SINGLETONS: beginning-of-character-class end-of-character-class left-parenthesis pipe caret dash ; -: get-option ( option -- ? ) current-regexp get options>> at ; -: get-unix-lines ( -- ? ) unix-lines get-option ; -: get-dotall ( -- ? ) dotall get-option ; -: get-multiline ( -- ? ) multiline get-option ; -: get-comments ( -- ? ) comments get-option ; -: get-case-insensitive ( -- ? ) case-insensitive get-option ; -: get-unicode-case ( -- ? ) unicode-case get-option ; -: get-reversed-regexp ( -- ? ) reversed-regexp get-option ; +: push1 ( obj -- ) input-stream get stream>> push ; +: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ; +: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ; +: drop1 ( -- ) read1 drop ; + +: stack ( -- obj ) current-regexp get stack>> ; +: change-whole-stack ( quot -- ) + current-regexp get + [ stack>> swap call ] keep (>>stack) ; inline +: push-stack ( obj -- ) stack push ; +: pop-stack ( -- obj ) stack pop ; +: cut-out ( vector n -- vector' vector ) cut rest ; +ERROR: cut-stack-error ; +: cut-stack ( obj vector -- vector' vector ) + tuck last-index [ cut-stack-error ] unless* cut-out swap ; : ( obj -- kleene ) possessive-kleene-star boa ; : ( obj -- kleene ) reluctant-kleene-star boa ; @@ -80,18 +72,11 @@ left-parenthesis pipe caret dash ; : ( obj -- negation ) negation boa ; : ( seq -- concatenation ) - >vector get-reversed-regexp [ reverse ] when - [ epsilon ] [ concatenation boa ] if-empty ; + >vector [ epsilon ] [ concatenation boa ] if-empty ; : ( seq -- alternation ) >vector alternation boa ; : ( obj -- capture-group ) capture-group boa ; : ( obj -- kleene-star ) kleene-star boa ; -: ( obj -- constant ) - dup Letter? get-case-insensitive and [ - [ ch>lower ] [ ch>upper ] bi - [ constant boa ] bi@ 2array - ] [ - constant boa - ] if ; +: ( obj -- constant ) constant boa ; : first|concatenation ( seq -- first/concatenation ) dup length 1 = [ first ] [ ] if ; @@ -100,21 +85,14 @@ left-parenthesis pipe caret dash ; dup length 1 = [ first ] [ ] if ; : ( from to -- obj ) - 2dup [ Letter? ] bi@ or get-case-insensitive and [ - [ [ ch>lower ] bi@ character-class-range boa ] - [ [ ch>upper ] bi@ character-class-range boa ] 2bi - 2array [ [ from>> ] [ to>> ] bi < ] filter - [ unmatchable-class ] [ first|alternation ] if-empty - ] [ - 2dup < - [ character-class-range boa ] [ 2drop unmatchable-class ] if - ] if ; + 2dup < + [ character-class-range boa ] [ 2drop unmatchable-class ] if ; ERROR: unmatched-parentheses ; -ERROR: bad-option ch ; +ERROR: unknown-regexp-option option ; -: option ( ch -- singleton ) +: ch>option ( ch -- singleton ) { { CHAR: i [ case-insensitive ] } { CHAR: d [ unix-lines ] } @@ -124,13 +102,21 @@ ERROR: bad-option ch ; { CHAR: s [ dotall ] } { CHAR: u [ unicode-case ] } { CHAR: x [ comments ] } - [ bad-option ] + [ unknown-regexp-option ] } case ; -: option-on ( option -- ) current-regexp get options>> conjoin ; -: option-off ( option -- ) current-regexp get options>> delete-at ; +: option>ch ( option -- string ) + { + { case-insensitive [ CHAR: i ] } + { multiline [ CHAR: m ] } + { reversed-regexp [ CHAR: r ] } + { dotall [ CHAR: s ] } + [ unknown-regexp-option ] + } case ; + +: toggle-option ( ch ? -- ) + [ ch>option ] dip option boa push-stack ; -: toggle-option ( ch ? -- ) [ option ] dip [ option-on ] [ option-off ] if ; : (parse-options) ( string ? -- ) [ toggle-option ] curry each ; : parse-options ( string -- ) @@ -176,7 +162,7 @@ DEFER: (parse-regexp) [ drop1 (parse-special-group) ] [ capture-group f nested-parse-regexp ] if ; -: handle-dot ( -- ) get-dotall any-char any-char-no-nl ? push-stack ; +: handle-dot ( -- ) any-char push-stack ; : handle-pipe ( -- ) pipe push-stack ; : (handle-star) ( obj -- kleene-star ) peek1 { @@ -234,11 +220,8 @@ ERROR: invalid-range a b ; [ [ nip at-most-n ] [ at-least-n ] if* ] if ] [ drop 0 max exactly-n ] if ; -: handle-front-anchor ( -- ) - get-multiline beginning-of-line beginning-of-input ? push-stack ; - -: handle-back-anchor ( -- ) - get-multiline end-of-line end-of-input ? push-stack ; +: handle-front-anchor ( -- ) beginning-of-line push-stack ; +: handle-back-anchor ( -- ) end-of-line push-stack ; ERROR: bad-character-class obj ; ERROR: expected-posix-class ; @@ -247,8 +230,8 @@ ERROR: expected-posix-class ; read1 CHAR: { = [ expected-posix-class ] unless "}" read-until [ bad-character-class ] unless { - { "Lower" [ get-case-insensitive Letter-class letter-class ? ] } - { "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] } + { "Lower" [ letter-class ] } + { "Upper" [ LETTER-class ] } { "Alpha" [ Letter-class ] } { "ASCII" [ ascii-class ] } { "Digit" [ digit-class ] } @@ -412,7 +395,8 @@ DEFER: handle-left-bracket [ first|concatenation ] map first|alternation ; : handle-right-parenthesis ( -- ) - stack dup [ parentheses-group "members" word-prop member? ] find-last -rot cut rest + stack dup [ parentheses-group "members" word-prop member? ] find-last + -rot cut rest [ [ push ] keep current-regexp get (>>stack) ] [ finish-regexp-parse push-stack ] bi* ; @@ -429,12 +413,9 @@ DEFER: handle-left-bracket { CHAR: [ [ handle-left-bracket t ] } { CHAR: \ [ handle-escape t ] } [ - dup CHAR: $ = peek1 f = and [ - drop - handle-back-anchor f - ] [ - push-constant t - ] if + dup CHAR: $ = peek1 f = and + [ drop handle-back-anchor f ] + [ push-constant t ] if ] } case ; @@ -451,7 +432,6 @@ DEFER: handle-left-bracket parse-regexp-beginning (parse-regexp) ] with-input-stream ] unless-empty - current-regexp get - stack finish-regexp-parse - >>parse-tree drop + current-regexp get [ finish-regexp-parse ] change-stack + dup stack>> >>parse-tree drop ] with-variable ; diff --git a/basis/regexp/regexp-tests.factor b/basis/regexp/regexp-tests.factor index e4bab990a4..27936eea1c 100644 --- a/basis/regexp/regexp-tests.factor +++ b/basis/regexp/regexp-tests.factor @@ -238,7 +238,7 @@ IN: regexp-tests [ t ] [ "abc" R/ abc/r matches? ] unit-test [ t ] [ "abc" R/ a[bB][cC]/r matches? ] unit-test -[ t ] [ "adcbe" R/ a(?r)(bcd)(?-r)e/r matches? ] unit-test +[ t ] [ "adcbe" R/ a(?r)(bcd)(?-r)e/ matches? ] unit-test [ t ] [ "s@f" "[a-z.-]@[a-z]" matches? ] unit-test [ f ] [ "a" "[a-z.-]@[a-z]" matches? ] unit-test @@ -307,17 +307,30 @@ IN: regexp-tests ! Bug in parsing word [ t ] [ "a" R' a' matches? ] unit-test +! Convert to lowercase until E +[ f ] [ "AA" R/ \LAA\E/ matches? ] unit-test +[ t ] [ "aa" R/ \LAA\E/ matches? ] unit-test + +! Convert to uppercase until E +[ t ] [ "AA" R/ \Uaa\E/ matches? ] unit-test +[ f ] [ "aa" R/ \Uaa\E/ matches? ] unit-test + ! [ "{Lower}" ] [ invalid-range? ] must-fail-with -[ t ] [ "a" R/ ^a/ matches? ] unit-test -[ f ] [ "\na" R/ ^a/ matches? ] unit-test -[ f ] [ "\r\na" R/ ^a/ matches? ] unit-test -[ f ] [ "\ra" R/ ^a/ matches? ] unit-test +! [ t ] [ "a" R/ ^a/ matches? ] unit-test +! [ f ] [ "\na" R/ ^a/ matches? ] unit-test +! [ f ] [ "\r\na" R/ ^a/ matches? ] unit-test +! [ f ] [ "\ra" R/ ^a/ matches? ] unit-test -[ t ] [ "a" R/ a$/ matches? ] unit-test -[ f ] [ "a\n" R/ a$/ matches? ] unit-test -[ f ] [ "a\r" R/ a$/ matches? ] unit-test -[ f ] [ "a\r\n" R/ a$/ matches? ] unit-test +! [ t ] [ "a" R/ a$/ matches? ] unit-test +! [ f ] [ "a\n" R/ a$/ matches? ] unit-test +! [ f ] [ "a\r" R/ a$/ matches? ] unit-test +! [ f ] [ "a\r\n" R/ a$/ matches? ] unit-test + +! [ t ] [ "a" R/ a$|b$/ matches? ] unit-test +! [ t ] [ "b" R/ a$|b$/ matches? ] unit-test +! [ t ] [ "ab" R/ a$|b$/ matches? ] unit-test +! [ t ] [ "ba" R/ ba$|b$/ matches? ] unit-test ! [ t ] [ "a" R/ \Aa/ matches? ] unit-test ! [ f ] [ "\na" R/ \Aaa/ matches? ] unit-test @@ -347,14 +360,6 @@ IN: regexp-tests ! [ t ] [ "\r\na" R/ ^a/m matches? ] unit-test ! [ t ] [ "\ra" R/ ^a/m matches? ] unit-test -! Convert to lowercase until E -[ f ] [ "AA" R/ \LAA\E/ matches? ] unit-test -[ t ] [ "aa" R/ \LAA\E/ matches? ] unit-test - -! Convert to uppercase until E -[ t ] [ "AA" R/ \Uaa\E/ matches? ] unit-test -[ f ] [ "aa" R/ \Uaa\E/ matches? ] unit-test - ! [ t ] [ "a" "a$" R/ a$/m matches? ] unit-test ! [ t ] [ "a\n" "a$" R/ a$/m matches? ] unit-test ! [ t ] [ "a\r" "a$" R/ a$/m matches? ] unit-test diff --git a/basis/regexp/regexp.factor b/basis/regexp/regexp.factor index e61d5692f4..b41e4d271e 100644 --- a/basis/regexp/regexp.factor +++ b/basis/regexp/regexp.factor @@ -16,6 +16,7 @@ IN: regexp H{ } clone >>nfa-traversal-flags H{ } clone >>dfa-traversal-flags H{ } clone >>options + H{ } clone >>matchers reset-regexp ; : construct-regexp ( regexp -- regexp' ) @@ -93,26 +94,6 @@ IN: regexp { "R| " "|" } } swap [ subseq? not nip ] curry assoc-find drop ; -ERROR: unknown-regexp-option option ; - -: option>ch ( option -- string ) - { - { case-insensitive [ CHAR: i ] } - { multiline [ CHAR: m ] } - { reversed-regexp [ CHAR: r ] } - { dotall [ CHAR: s ] } - [ unknown-regexp-option ] - } case ; - -: ch>option ( ch -- option ) - { - { CHAR: i [ case-insensitive ] } - { CHAR: m [ multiline ] } - { CHAR: r [ reversed-regexp ] } - { CHAR: s [ dotall ] } - [ unknown-regexp-option ] - } case ; - : string>options ( string -- options ) [ ch>option dup ] H{ } map>assoc ; diff --git a/basis/regexp/utils/utils.factor b/basis/regexp/utils/utils.factor index 5116dd2b7e..af1b2fa1fb 100644 --- a/basis/regexp/utils/utils.factor +++ b/basis/regexp/utils/utils.factor @@ -26,23 +26,6 @@ IN: regexp.utils : ?insert-at ( value key hash/f -- hash ) [ H{ } clone ] unless* [ insert-at ] keep ; -: last-state ( regexp -- range ) stack>> peek first2 [a,b] ; -: push1 ( obj -- ) input-stream get stream>> push ; -: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ; -: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ; -: drop1 ( -- ) read1 drop ; - -: stack ( -- obj ) current-regexp get stack>> ; -: change-whole-stack ( quot -- ) - current-regexp get - [ stack>> swap call ] keep (>>stack) ; inline -: push-stack ( obj -- ) stack push ; -: pop-stack ( -- obj ) stack pop ; -: cut-out ( vector n -- vector' vector ) cut rest ; -ERROR: cut-stack-error ; -: cut-stack ( obj vector -- vector' vector ) - tuck last-index [ cut-stack-error ] unless* cut-out swap ; - ERROR: bad-octal number ; ERROR: bad-hex number ; : check-octal ( octal -- octal ) dup 255 > [ bad-octal ] when ; From 7b13604b7b48ce265c7d7089717ea867c80a3618 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 28 Nov 2008 16:14:55 -0600 Subject: [PATCH 3/9] clarify docs for io.files.unique, add a couple more tests, remove usages of >r r> --- extra/io/files/unique/unique-docs.factor | 53 ++++++++++++----------- extra/io/files/unique/unique-tests.factor | 19 +++++--- extra/io/files/unique/unique.factor | 29 ++++++++----- 3 files changed, 57 insertions(+), 44 deletions(-) diff --git a/extra/io/files/unique/unique-docs.factor b/extra/io/files/unique/unique-docs.factor index bb4e9ef01f..825eb212f1 100644 --- a/extra/io/files/unique/unique-docs.factor +++ b/extra/io/files/unique/unique-docs.factor @@ -2,7 +2,32 @@ USING: help.markup help.syntax io io.ports kernel math io.files.unique.private math.parser io.files ; IN: io.files.unique -ARTICLE: "unique" "Making and using unique files" +HELP: make-unique-file ( prefix suffix -- path ) +{ $values { "prefix" "a string" } { "suffix" "a string" } +{ "path" "a pathname string" } } +{ $description "Creates a file that is guaranteed not to exist in a platform-specific temporary directory. The file name is composed of a prefix, a number of random digits and letters, and the suffix. Returns the full pathname." } +{ $errors "Throws an error if a new unique file cannot be created after a number of tries. The most likely error is incorrect directory permissions on the temporary directory." } +{ $see-also with-unique-file } ; + +HELP: with-unique-file ( prefix suffix quot: ( path -- ) -- ) +{ $values { "prefix" "a string" } { "suffix" "a string" } +{ "quot" "a quotation" } } +{ $description "Creates a file with " { $link make-unique-file } " and calls the quotation with the path name on the stack." } +{ $notes "The unique file will be deleted after calling this word." } ; + +HELP: make-unique-directory ( -- path ) +{ $values { "path" "a pathname string" } } +{ $description "Creates a directory that is guaranteed not to exist in a platform-specific temporary directory and returns the full pathname." } +{ $errors "Throws an error if the directory cannot be created after a number of tries. The most likely error is incorrect directory permissions on the temporary directory." } +{ $see-also with-unique-directory } ; + +HELP: with-unique-directory ( quot -- ) +{ $values { "quot" "a quotation" } } +{ $description "Creates a directory with " { $link make-unique-directory } " and calls the quotation with the pathname on the stack using the " { $link with-directory } " combinator. The quotation can access the " { $link current-directory } " symbol for the name of the temporary directory." } +{ $notes "The directory will be deleted after calling this word, even if an error is thrown in the quotation." } ; + +ARTICLE: "io.files.unique" "Temporary files" +"The " { $vocab-link "io.files.unique" } " vocabulary implements cross-platform temporary file creation in a high-level and secure way." $nl "Files:" { $subsection make-unique-file } { $subsection with-unique-file } @@ -10,28 +35,4 @@ ARTICLE: "unique" "Making and using unique files" { $subsection make-unique-directory } { $subsection with-unique-directory } ; -ABOUT: "unique" - -HELP: make-unique-file ( prefix suffix -- path ) -{ $values { "prefix" "a string" } { "suffix" "a string" } -{ "path" "a pathname string" } } -{ $description "Creates a file that is guaranteed not to exist in a platform-specific temporary directory. The file name is composed of a prefix, a number of random digits and letters, and the suffix. Returns the full pathname." } -{ $errors "Throws an error if a new unique file cannot be created after a number of tries. Since each try generates a new random name, the most likely error is incorrect directory permissions on the temporary directory." } -{ $see-also with-unique-file } ; - -HELP: make-unique-directory ( -- path ) -{ $values { "path" "a pathname string" } } -{ $description "Creates a directory that is guaranteed not to exist in a platform-specific temporary directory and returns the full pathname." } -{ $errors "Throws an error if the directory cannot be created after a number of tries. Since each try generates a new random name, the most likely error is incorrect directory permissions on the temporary directory." } -{ $see-also with-unique-directory } ; - -HELP: with-unique-file ( prefix suffix quot -- ) -{ $values { "prefix" "a string" } { "suffix" "a string" } -{ "quot" "a quotation" } } -{ $description "Creates a file with " { $link make-unique-file } " and calls the quotation with the path name on the stack." } -{ $notes "The unique file will be deleted after calling this word." } ; - -HELP: with-unique-directory ( quot -- ) -{ $values { "quot" "a quotation" } } -{ $description "Creates a directory with " { $link make-unique-directory } " and calls the quotation with the pathname on the stack." } -{ $notes "The directory will be deleted after calling this word." } ; +ABOUT: "io.files.unique" diff --git a/extra/io/files/unique/unique-tests.factor b/extra/io/files/unique/unique-tests.factor index c29a94f395..178e4572d0 100644 --- a/extra/io/files/unique/unique-tests.factor +++ b/extra/io/files/unique/unique-tests.factor @@ -1,13 +1,20 @@ USING: io.encodings.ascii sequences strings io io.files accessors -tools.test kernel io.files.unique ; +tools.test kernel io.files.unique namespaces continuations ; IN: io.files.unique.tests [ 123 ] [ "core" ".test" [ - [ - ascii [ - 123 CHAR: a >string write - ] with-file-writer - ] keep file-info size>> + [ [ 123 CHAR: a ] dip ascii set-file-contents ] + [ file-info size>> ] bi ] with-unique-file ] unit-test + +[ t ] [ + [ current-directory get file-info directory? ] with-unique-directory +] unit-test + +[ t ] [ + current-directory get + [ [ "FAILDOG" throw ] with-unique-directory ] [ drop ] recover + current-directory get = +] unit-test diff --git a/extra/io/files/unique/unique.factor b/extra/io/files/unique/unique.factor index db11471a7a..3a6c556846 100644 --- a/extra/io/files/unique/unique.factor +++ b/extra/io/files/unique/unique.factor @@ -3,10 +3,17 @@ USING: kernel math math.bitwise combinators.lib math.parser random sequences sequences.lib continuations namespaces io.files io arrays io.files.unique.backend system -combinators vocabs.loader ; +combinators vocabs.loader fry ; IN: io.files.unique +SYMBOL: unique-length +SYMBOL: unique-retries + +10 unique-length set-global +10 unique-retries set-global + : make-unique-file ( prefix suffix -- path ) temporary-path -rot [ - unique-length random-name swap 3append append-path + unique-length get random-name swap 3append append-path dup (make-unique-file) - ] 3curry unique-retries retry ; + ] 3curry unique-retries get retry ; -: with-unique-file ( prefix suffix quot -- ) - >r make-unique-file r> keep delete-file ; inline +: with-unique-file ( prefix suffix quot: ( path -- ) -- ) + [ make-unique-file ] dip [ delete-file ] bi ; inline : make-unique-directory ( -- path ) [ - temporary-path unique-length random-name append-path + temporary-path unique-length get random-name append-path dup make-directory - ] unique-retries retry ; + ] unique-retries get retry ; -: with-unique-directory ( quot -- ) - >r make-unique-directory r> - [ with-directory ] curry keep delete-tree ; inline +: with-unique-directory ( quot: ( -- ) -- ) + [ make-unique-directory ] dip + '[ _ with-directory ] [ delete-tree ] bi ; inline { { [ os unix? ] [ "io.unix.files.unique" ] } From 8c7a3c8a81175123291fb409048a32c9f3cdcfe3 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sat, 29 Nov 2008 00:30:36 -0600 Subject: [PATCH 4/9] better docs for io.encodings --- core/io/encodings/encodings-docs.factor | 110 +++++++++++++++++------- 1 file changed, 81 insertions(+), 29 deletions(-) diff --git a/core/io/encodings/encodings-docs.factor b/core/io/encodings/encodings-docs.factor index ba25e79509..b893e7f717 100644 --- a/core/io/encodings/encodings-docs.factor +++ b/core/io/encodings/encodings-docs.factor @@ -1,21 +1,6 @@ -USING: help.markup help.syntax ; +USING: help.markup help.syntax io quotations ; IN: io.encodings -ABOUT: "io.encodings" - -ARTICLE: "io.encodings" "I/O encodings" -"Bytes can't be understood in isolation as text. They must be interpreted under a certain encoding. Factor provides utilities for dealing with encoded text by declaring that a stream has a particular encoding, and utilities to encode and decode strings." -{ $subsection "encodings-descriptors" } -{ $subsection "encodings-constructors" } -{ $subsection "io.encodings.string" } -"New types of encodings can be defined:" -{ $subsection "encodings-protocol" } ; - -ARTICLE: "encodings-constructors" "Manually constructing an encoded stream" -"The following words can be used to construct encoded streams. Note that they are usually not used directly, but rather by the stream constructors themselves. Most stream constructors take an encoding descriptor as a parameter and internally call these constructors." -{ $subsection } -{ $subsection } ; - HELP: { $values { "stream" "an output stream" } { "encoding" "an encoding descriptor" } @@ -30,8 +15,66 @@ HELP: { $description "Wraps the given stream in a new stream using the given encoding for all input. The encoding descriptor can either be a class or an instance of something conforming to the " { $link "encodings-protocol" } "." } $low-level-note ; +HELP: decode-char +{ $values { "stream" "an underlying input stream" } + { "encoding" "An encoding descriptor tuple" } { "char/f" "a code point or " { $link f } } } +{ $contract "Reads a single code point from the underlying stream, interpreting it by the encoding. Returns " { $link f } " if the stream is reached." } +$low-level-note ; + +HELP: encode-char +{ $values { "char" "a character" } + { "stream" "an underlying output stream" } + { "encoding" "an encoding descriptor" } } +{ $contract "Writes the code point to the underlying stream in the given encoding." } +$low-level-note ; + +{ encode-char decode-char } related-words + +HELP: decode-input +{ $values + { "encoding" "an encoding descriptor" } +} +{ $description "Changes the encoding of the current input stream stored in the " { $link input-stream } " variable." } ; + +HELP: encode-output +{ $values + { "encoding" "an encoding descriptor" } +} +{ $description "Changes the encoding of the current output stream stored in the " { $link output-stream } " variable." } ; + +HELP: re-decode +{ $values + { "stream" "a stream" } { "encoding" "an encoding descriptor" } + { "newstream" "a new stream" } +} +{ $description "Creates a new decoding stream with the supplied encoding descriptor from an existing stream by calling the " { $link } " word." } ; + +HELP: re-encode +{ $values + { "stream" "a stream" } { "encoding" "an encoding descriptor" } + { "newstream" "a new stream" } +} +{ $description "Creates a new encoding stream with the supplied encoding descriptor from an existing stream by calling the " { $link } " word." } ; + +{ re-decode re-encode } related-words + +HELP: with-decoded-input +{ $values + { "encoding" "an encoding descriptor" } { "quot" quotation } +} +{ $description "Creates a new decoding stream with the given encoding descriptor and calls the quotation with this stream set to the " { $link input-stream } " variable. The original decoder stream is restored after the quotation returns and the stream is kept open for future input operations." } ; + +HELP: with-encoded-output +{ $values + { "encoding" "an encoding descriptor" } { "quot" quotation } +} +{ $description "Creates a new encoder with the given encoding descriptor and calls the quotation using this encoder. The original encoder object is restored after the quotation returns and the stream is kept open for future output operations." } ; + +HELP: replacement-char +{ $description "A code point that replaces input that could not be decoded. The presence of this character in the decoded data usually signifies an error." } ; + ARTICLE: "encodings-descriptors" "Encoding descriptors" -"An encoding descriptor is something which can be used for input or output streams to encode or decode files. It must conform to the " { $link "encodings-protocol" } ". Encodings which you can use are defined in the following vocabularies:" +"An encoding descriptor is something which can be used for input or output streams to encode or decode bytes stored in a certain representation. It must conform to the " { $link "encodings-protocol" } ". Encodings which you can use are defined in the following vocabularies:" { $subsection "io.encodings.binary" } { $subsection "io.encodings.utf8" } { $subsection "io.encodings.utf16" } @@ -50,17 +93,26 @@ ARTICLE: "encodings-protocol" "Encoding protocol" { $subsection encode-char } { $see-also "encodings-introduction" } ; -HELP: decode-char -{ $values { "stream" "an underlying input stream" } - { "encoding" "An encoding descriptor tuple" } { "char/f" "a code point or " { $link f } } } -{ $contract "Reads a single code point from the underlying stream, interpreting it by the encoding." } -$low-level-note ; +ARTICLE: "encodings-constructors" "Manually constructing an encoded stream" +"The following words can be used to construct encoded streams. Note that they are usually not used directly, but rather by the stream constructors themselves. Most stream constructors take an encoding descriptor as a parameter and call these constructors internally." +{ $subsection } +{ $subsection } ; -HELP: encode-char -{ $values { "char" "a character" } - { "stream" "an underlying output stream" } - { "encoding" "an encoding descriptor" } } -{ $contract "Writes the code point in the encoding to the underlying stream given." } -$low-level-note ; +ARTICLE: "io.encodings" "I/O encodings" +"The " { $vocab-link "io.encodings" } " vocabulary provides utilities for encoding and decoding bytes that represent text. Both strings and streams may be encoded." +{ $subsection "encodings-descriptors" } +{ $subsection "encodings-constructors" } +{ $subsection "io.encodings.string" } +"New types of encodings can be defined:" +{ $subsection "encodings-protocol" } +"Setting encodings on the current streams:" +{ $subsection encode-output } +{ $subsection decode-input } +"Setting encodings on streams:" +{ $subsection re-encode } +{ $subsection re-decode } +"Combinators to change the encoding:" +{ $subsection with-encoded-output } +{ $subsection with-decoded-input } ; -{ encode-char decode-char } related-words +ABOUT: "io.encodings" From 9d03a591cffa27689aeeefe152e3c166dfa243f5 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sat, 29 Nov 2008 00:35:50 -0600 Subject: [PATCH 5/9] trivial doc for error-stream --- core/io/io-docs.factor | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core/io/io-docs.factor b/core/io/io-docs.factor index 43f66657a7..c55377e4a0 100644 --- a/core/io/io-docs.factor +++ b/core/io/io-docs.factor @@ -114,6 +114,9 @@ HELP: input-stream HELP: output-stream { $var-description "Holds an output stream for various implicit stream operations. Rebound using " { $link with-output-stream } " and " { $link with-output-stream* } "." } ; +HELP: error-stream +{ $var-description "Holds an error stream." } ; + HELP: readln { $values { "str/f" "a string or " { $link f } } } { $description "Reads a line of input from " { $link input-stream } ". Outputs " { $link f } " on stream exhaustion." } From a4860c3173ba2e118630e80a5d0f92ef11003724 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sat, 29 Nov 2008 00:37:36 -0600 Subject: [PATCH 6/9] add toggle-bit word to math.bitwise --- basis/math/bitwise/bitwise-docs.factor | 19 ++++++++++++++++++- basis/math/bitwise/bitwise-tests.factor | 3 +++ basis/math/bitwise/bitwise.factor | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/basis/math/bitwise/bitwise-docs.factor b/basis/math/bitwise/bitwise-docs.factor index 9ed164330b..18ae8e1497 100644 --- a/basis/math/bitwise/bitwise-docs.factor +++ b/basis/math/bitwise/bitwise-docs.factor @@ -204,8 +204,25 @@ HELP: on-bits "64 on-bits .h" "ffffffffffffffff" } +} ; + +HELP: toggle-bit +{ $values + { "m" integer } + { "n" integer } + { "m'" integer } } -; +{ $description "Toggles the nth bit of an integer." } +{ $examples + { $example "USING: math.bitwise kernel prettyprint ;" + "0 3 toggle-bit .b" + "1000" + } + { $example "USING: math.bitwise kernel prettyprint ;" + "BIN: 1000 3 toggle-bit .b" + "0" + } +} ; HELP: set-bit { $values diff --git a/basis/math/bitwise/bitwise-tests.factor b/basis/math/bitwise/bitwise-tests.factor index 4422992956..979c62dcfb 100644 --- a/basis/math/bitwise/bitwise-tests.factor +++ b/basis/math/bitwise/bitwise-tests.factor @@ -29,3 +29,6 @@ IN: math.bitwise.tests \ foo must-infer [ 1 ] [ { 1 } flags ] unit-test + +[ 8 ] [ 0 3 toggle-bit ] unit-test +[ 0 ] [ 8 3 toggle-bit ] unit-test diff --git a/basis/math/bitwise/bitwise.factor b/basis/math/bitwise/bitwise.factor index 71ada945cd..89a21b65ab 100644 --- a/basis/math/bitwise/bitwise.factor +++ b/basis/math/bitwise/bitwise.factor @@ -17,6 +17,7 @@ IN: math.bitwise : bits ( m n -- m' ) 2^ wrap ; inline : mask-bit ( m n -- m' ) 2^ mask ; inline : on-bits ( n -- m ) 2^ 1- ; inline +: toggle-bit ( m n -- m' ) 2^ bitxor ; inline : shift-mod ( n s w -- n ) [ shift ] dip 2^ wrap ; inline From 5b3e01f20a4abeb8810c1e11c71d03c19d6fb55c Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sat, 29 Nov 2008 00:42:15 -0600 Subject: [PATCH 7/9] hexdump should only work on byte-arrays --- basis/tools/hexdump/hexdump-docs.factor | 7 ++++--- basis/tools/hexdump/hexdump-tests.factor | 8 ++++---- basis/tools/hexdump/hexdump.factor | 8 +++++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/basis/tools/hexdump/hexdump-docs.factor b/basis/tools/hexdump/hexdump-docs.factor index 9579fb7f81..612195d997 100644 --- a/basis/tools/hexdump/hexdump-docs.factor +++ b/basis/tools/hexdump/hexdump-docs.factor @@ -1,14 +1,15 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: help.markup help.syntax kernel sequences strings ; +USING: help.markup help.syntax kernel sequences byte-arrays +strings ; IN: tools.hexdump HELP: hexdump. -{ $values { "seq" sequence } } +{ $values { "byte-array" byte-array } } { $description "Converts a sequence to its hexadecimal and ASCII representation sixteen characters at a time and writes it to standard out." } ; HELP: hexdump -{ $values { "seq" sequence } { "str" string } } +{ $values { "byte-array" byte-array } { "str" string } } { $description "Converts a sequence to its hexadecimal and ASCII representation sixteen characters at a time. Lines are separated by a newline character." } { $see-also hexdump. } ; diff --git a/basis/tools/hexdump/hexdump-tests.factor b/basis/tools/hexdump/hexdump-tests.factor index 7202e4402c..1a8ed35510 100644 --- a/basis/tools/hexdump/hexdump-tests.factor +++ b/basis/tools/hexdump/hexdump-tests.factor @@ -1,10 +1,10 @@ -USING: tools.hexdump kernel sequences tools.test ; +USING: tools.hexdump kernel sequences tools.test byte-arrays ; IN: tools.hexdump.tests -[ t ] [ "" hexdump "Length: 0, 0h\n" = ] unit-test -[ t ] [ "abcdefghijklmnopqrstuvwxyz" hexdump "Length: 26, 1ah\n00000000h: 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70 abcdefghijklmnop\n00000010h: 71 72 73 74 75 76 77 78 79 7a qrstuvwxyz\n" = ] unit-test +[ t ] [ B{ } hexdump "Length: 0, 0h\n" = ] unit-test +[ t ] [ "abcdefghijklmnopqrstuvwxyz" >byte-array hexdump "Length: 26, 1ah\n00000000h: 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70 abcdefghijklmnop\n00000010h: 71 72 73 74 75 76 77 78 79 7a qrstuvwxyz\n" = ] unit-test -[ t ] [ 256 [ ] map hexdump "Length: 256, 100h\n00000000h: 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f ................\n00000010h: 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f ................\n00000020h: 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f !\"#$%&'()*+,-./\n00000030h: 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 0123456789:;<=>?\n00000040h: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO\n00000050h: 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f PQRSTUVWXYZ[\\]^_\n00000060h: 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f `abcdefghijklmno\n00000070h: 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f pqrstuvwxyz{|}~.\n00000080h: 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f ................\n00000090h: 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f ................\n000000a0h: a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af ................\n000000b0h: b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf ................\n000000c0h: c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf ................\n000000d0h: d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df ................\n000000e0h: e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef ................\n000000f0h: f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff ................\n" = ] unit-test +[ t ] [ 256 [ ] B{ } map-as hexdump "Length: 256, 100h\n00000000h: 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f ................\n00000010h: 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f ................\n00000020h: 20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f !\"#$%&'()*+,-./\n00000030h: 30 31 32 33 34 35 36 37 38 39 3a 3b 3c 3d 3e 3f 0123456789:;<=>?\n00000040h: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO\n00000050h: 50 51 52 53 54 55 56 57 58 59 5a 5b 5c 5d 5e 5f PQRSTUVWXYZ[\\]^_\n00000060h: 60 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f `abcdefghijklmno\n00000070h: 70 71 72 73 74 75 76 77 78 79 7a 7b 7c 7d 7e 7f pqrstuvwxyz{|}~.\n00000080h: 80 81 82 83 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f ................\n00000090h: 90 91 92 93 94 95 96 97 98 99 9a 9b 9c 9d 9e 9f ................\n000000a0h: a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 aa ab ac ad ae af ................\n000000b0h: b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf ................\n000000c0h: c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf ................\n000000d0h: d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db dc dd de df ................\n000000e0h: e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ea eb ec ed ee ef ................\n000000f0h: f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff ................\n" = ] unit-test [ diff --git a/basis/tools/hexdump/hexdump.factor b/basis/tools/hexdump/hexdump.factor index c8b9f4accc..d16d6b2595 100644 --- a/basis/tools/hexdump/hexdump.factor +++ b/basis/tools/hexdump/hexdump.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: arrays io io.streams.string kernel math math.parser -namespaces sequences splitting grouping strings ascii ; +namespaces sequences splitting grouping strings ascii byte-arrays ; IN: tools.hexdump -: hexdump. ( seq -- ) +GENERIC: hexdump. ( byte-array -- ) + +M: byte-array hexdump. [ length write-header ] [ 16 [ write-hex-line ] each-index ] bi ; -: hexdump ( seq -- str ) +: hexdump ( byte-array -- str ) [ hexdump. ] with-string-writer ; From 3e59cca630081e7fdf5ab8269a22e74a1c3f3a8a Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sat, 29 Nov 2008 00:44:39 -0600 Subject: [PATCH 8/9] use math.bitwise here, add a couple tests --- extra/math/floating-point/floating-point-tests.factor | 5 ++++- extra/math/floating-point/floating-point.factor | 7 +++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/extra/math/floating-point/floating-point-tests.factor b/extra/math/floating-point/floating-point-tests.factor index 2a60d30d02..7f3a87f9a5 100644 --- a/extra/math/floating-point/floating-point-tests.factor +++ b/extra/math/floating-point/floating-point-tests.factor @@ -1,4 +1,7 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: tools.test math.floating-point ; +USING: tools.test math.floating-point math.constants kernel ; IN: math.floating-point.tests + +[ t ] [ pi >double< >double pi = ] unit-test +[ t ] [ -1.0 >double< >double -1.0 = ] unit-test diff --git a/extra/math/floating-point/floating-point.factor b/extra/math/floating-point/floating-point.factor index 3792d6ba9b..0d224bfc9d 100644 --- a/extra/math/floating-point/floating-point.factor +++ b/extra/math/floating-point/floating-point.factor @@ -1,20 +1,20 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: kernel math sequences prettyprint math.parser io -math.functions ; +math.functions math.bitwise ; IN: math.floating-point : (double-sign) ( bits -- n ) -63 shift ; inline : double-sign ( double -- n ) double>bits (double-sign) ; : (double-exponent-bits) ( bits -- n ) - -52 shift 11 2^ 1- bitand ; inline + -52 shift 11 on-bits mask ; inline : double-exponent-bits ( double -- n ) double>bits (double-exponent-bits) ; : (double-mantissa-bits) ( double -- n ) - 52 2^ 1- bitand ; + 52 on-bits mask ; : double-mantissa-bits ( double -- n ) double>bits (double-mantissa-bits) ; @@ -37,4 +37,3 @@ IN: math.floating-point (double-mantissa-bits) >bin 52 CHAR: 0 pad-left 11 [ bl ] times print ] tri ; - From 26c002d23fbaa46fa3aa5a903e0920ffe2d1d9a6 Mon Sep 17 00:00:00 2001 From: sheeple Date: Sat, 29 Nov 2008 00:44:46 -0600 Subject: [PATCH 9/9] Fix fixnum* problem; result was shifted to the right by 3 bits if both input registers were equal --- basis/compiler/tests/codegen.factor | 3 +++ basis/cpu/ppc/ppc.factor | 12 ++++++------ extra/math/matrices/matrices-tests.factor | 2 ++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/basis/compiler/tests/codegen.factor b/basis/compiler/tests/codegen.factor index eb818972fc..b32a55fdd5 100644 --- a/basis/compiler/tests/codegen.factor +++ b/basis/compiler/tests/codegen.factor @@ -261,3 +261,6 @@ TUPLE: id obj ; [ 3 ] [ 1 t fixnum-overflow-control-flow-test ] unit-test [ 2 ] [ 1 f fixnum-overflow-control-flow-test ] unit-test + +[ 4 ] [ 2 [ dup fixnum* ] compile-call ] unit-test +[ 7 ] [ 2 [ dup fixnum* 3 fixnum+fast ] compile-call ] unit-test diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index aa9126fef0..8632d236cc 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -228,12 +228,12 @@ M:: ppc %fixnum-mul ( src1 src2 -- ) "no-overflow" define-label 0 0 LI 0 MTXER - src1 src1 tag-bits get SRAWI - scratch-reg src1 src2 MULLWO. + scratch-reg src1 tag-bits get SRAWI + scratch-reg scratch-reg src2 MULLWO. scratch-reg ds-reg 0 STW "no-overflow" get BNO src2 src2 tag-bits get SRAWI - src1 src2 move>args + scratch-reg src2 move>args %prepare-alien-invoke "overflow_fixnum_multiply" f %alien-invoke "no-overflow" resolve-label ; @@ -242,14 +242,14 @@ M:: ppc %fixnum-mul-tail ( src1 src2 -- ) "overflow" define-label 0 0 LI 0 MTXER - src1 src1 tag-bits get SRAWI - scratch-reg src1 src2 MULLWO. + scratch-reg src1 tag-bits get SRAWI + scratch-reg scratch-reg src2 MULLWO. "overflow" get BO scratch-reg ds-reg 0 STW BLR "overflow" resolve-label src2 src2 tag-bits get SRAWI - src1 src2 move>args + scratch-reg src2 move>args %prepare-alien-invoke "overflow_fixnum_multiply" f %alien-invoke-tail ; diff --git a/extra/math/matrices/matrices-tests.factor b/extra/math/matrices/matrices-tests.factor index ee2516e9a6..6f87109ba0 100644 --- a/extra/math/matrices/matrices-tests.factor +++ b/extra/math/matrices/matrices-tests.factor @@ -102,3 +102,5 @@ USING: math.matrices math.vectors tools.test math ; [ { 0 0 -1 } ] [ { 1 0 0 } { 0 1 0 } cross ] unit-test [ { 1 0 0 } ] [ { 0 1 0 } { 0 0 1 } cross ] unit-test [ { 0 1 0 } ] [ { 0 0 1 } { 1 0 0 } cross ] unit-test + +[ { 1 0 0 } ] [ { 1 1 0 } { 1 0 0 } proj ] unit-test