move things around. the parser no longer adjusts nodes based on options, instead opting for nfa to handle it (case-insensitive, multiline, dotall, reversed..)

2008-11-24 22:17:47 -06:00 · 2008-11-24 22:17:47 -06:00 · 384a11ecee
parent e4a2b671d3
commit 384a11ecee
8 changed files with 158 additions and 156 deletions
--- a/basis/regexp/backend/backend.factor
+++ b/basis/regexp/backend/backend.factor
@ -5,12 +5,13 @@ IN: regexp.backend

 TUPLE: regexp
    raw
-    { stack vector }
-    parse-tree
    { options hashtable }
+    stack
+    parse-tree
    nfa-table
    dfa-table
    minimized-table
+    matchers
    { nfa-traversal-flags hashtable }
    { dfa-traversal-flags hashtable }
    { state integer }
--- a/basis/regexp/classes/classes.factor
+++ b/basis/regexp/classes/classes.factor
@ -1,12 +1,25 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors kernel math math.order symbols regexp.parser
+USING: accessors kernel math math.order symbols 
 words regexp.utils unicode.categories combinators.short-circuit ;
 IN: regexp.classes

+SINGLETONS: any-char any-char-no-nl
+letter-class LETTER-class Letter-class digit-class
+alpha-class non-newline-blank-class
+ascii-class punctuation-class java-printable-class blank-class
+control-character-class hex-digit-class java-blank-class c-identifier-class
+unmatchable-class terminator-class word-boundary-class ;
+
+SINGLETONS: beginning-of-input beginning-of-line
+end-of-input end-of-line ;
+
+MIXIN: node
+TUPLE: character-class-range from to ; INSTANCE: character-class-range node
+
 GENERIC: class-member? ( obj class -- ? )

-M: word class-member? ( obj class -- ? ) 2drop f ;
+M: t class-member? ( obj class -- ? ) 2drop f ;

 M: integer class-member? ( obj class -- ? ) 2drop f ;

@ -18,7 +31,7 @@ M: any-char class-member? ( obj class -- ? )

 M: any-char-no-nl class-member? ( obj class -- ? )
    drop CHAR: \n = not ;
-    
+
 M: letter-class class-member? ( obj class -- ? )
    drop letter? ;
            
@ -70,3 +83,9 @@ M: terminator-class class-member? ( obj class -- ? )
        [ CHAR: \u002028 = ]
        [ CHAR: \u002029 = ]
    } 1|| ;
+
+M: beginning-of-line class-member? ( obj class -- ? )
+    2drop f ;
+
+M: end-of-line class-member? ( obj class -- ? )
+    2drop f ;
--- a/basis/regexp/nfa/nfa.factor
+++ b/basis/regexp/nfa/nfa.factor
@ -3,7 +3,7 @@
 USING: accessors arrays assocs grouping kernel regexp.backend
 locals math namespaces regexp.parser sequences fry quotations
 math.order math.ranges vectors unicode.categories regexp.utils
-regexp.transition-tables words sets ;
+regexp.transition-tables words sets regexp.classes unicode.case ;
 IN: regexp.nfa

 SYMBOL: negation-mode
@ -22,8 +22,13 @@ SINGLETON: front-anchor INSTANCE: front-anchor traversal-flag
 SINGLETON: back-anchor INSTANCE: back-anchor traversal-flag
 SINGLETON: word-boundary INSTANCE: word-boundary traversal-flag

-: add-global-flag ( flag -- )
-    current-regexp get nfa-table>> flags>> conjoin ;
+: options ( -- obj ) current-regexp get options>> ;
+
+: option? ( obj -- ? ) options key? ;
+
+: option-on ( obj -- ) options conjoin ;
+
+: option-off ( obj -- ) options delete-at ;

 : next-state ( regexp -- state )
    [ state>> ] [ [ 1+ ] change-state drop ] bi ;
@ -106,6 +111,7 @@ M: kleene-star nfa-node ( node -- )

 M: concatenation nfa-node ( node -- )
    seq>>
+    reversed-regexp option? [ <reversed> ] when
    [ [ nfa-node ] each ]
    [ length 1- [ concatenate-nodes ] times ] bi ;

@ -115,16 +121,59 @@ M: alternation nfa-node ( node -- )
    [ length 1- [ alternate-nodes ] times ] bi ;

 M: constant nfa-node ( node -- )
-    char>> literal-transition add-simple-entry ;
+    case-insensitive option? [
+        dup char>> [ ch>lower ] [ ch>upper ] bi
+        2dup = [
+            2drop
+            char>> literal-transition add-simple-entry
+        ] [
+            [ literal-transition add-simple-entry ] bi@
+            alternate-nodes drop
+        ] if
+    ] [
+        char>> literal-transition add-simple-entry
+    ] if ;

 M: epsilon nfa-node ( node -- )
    drop eps literal-transition add-simple-entry ;

-M: word nfa-node ( node -- )
+M: word nfa-node ( node -- ) class-transition add-simple-entry ;
+
+M: any-char nfa-node ( node -- )
+    [ dotall option? ] dip any-char-no-nl ?
    class-transition add-simple-entry ;

+! M: beginning-of-text nfa-node ( node -- ) ;
+
+M: beginning-of-line nfa-node ( node -- ) class-transition add-simple-entry ;
+
+M: end-of-line nfa-node ( node -- ) class-transition add-simple-entry ;
+
+: choose-letter-class ( node -- node' )
+    case-insensitive option? Letter-class rot ? ;
+
+M: letter-class nfa-node ( node -- )
+    choose-letter-class class-transition add-simple-entry ;
+
+M: LETTER-class nfa-node ( node -- )
+    choose-letter-class class-transition add-simple-entry ;
+
 M: character-class-range nfa-node ( node -- )
-    class-transition add-simple-entry ;
+    case-insensitive option? [
+        dup [ from>> ] [ to>> ] bi
+        2dup [ Letter? ] bi@ and [
+            rot drop
+            [ [ ch>lower ] bi@ character-class-range boa ]
+            [ [ ch>upper ] bi@ character-class-range boa ] 2bi 
+            [ class-transition add-simple-entry ] bi@
+            alternate-nodes
+        ] [
+            2drop
+            class-transition add-simple-entry
+        ] if
+    ] [
+        class-transition add-simple-entry
+    ] if ;

 M: capture-group nfa-node ( node -- )
    eps literal-transition add-simple-entry
@ -141,26 +190,6 @@ M: non-capture-group nfa-node ( node -- )
 M: reluctant-kleene-star nfa-node ( node -- )
    term>> <kleene-star> nfa-node ;

-M: beginning-of-line nfa-node ( node -- )
-    drop 
-    eps literal-transition add-simple-entry
-    beginning-of-line add-global-flag ;
-
-M: end-of-line nfa-node ( node -- )
-    drop
-    eps literal-transition add-simple-entry
-    end-of-line add-global-flag ;
-
-M: beginning-of-input nfa-node ( node -- )
-    drop
-    eps literal-transition add-simple-entry
-    beginning-of-input add-global-flag ;
-
-M: end-of-input nfa-node ( node -- )
-    drop
-    eps literal-transition add-simple-entry
-    end-of-input add-global-flag ;
-
 M: negation nfa-node ( node -- )
    negation-mode inc
    term>> nfa-node 
@ -182,6 +211,10 @@ M: lookbehind nfa-node ( node -- )
    lookbehind-off add-traversal-flag
    2 [ concatenate-nodes ] times ;

+M: option nfa-node ( node -- )
+    [ option>> ] [ on?>> ] bi [ option-on ] [ option-off ] if
+    eps literal-transition add-simple-entry ;
+
 : construct-nfa ( regexp -- )
    [
        reset-regexp
--- a/basis/regexp/parser/parser-tests.factor
+++ b/basis/regexp/parser/parser-tests.factor
@ -19,8 +19,8 @@ IN: regexp.parser
 [ ] [ "(?:a)" test-regexp ] unit-test
 [ ] [ "(?i:a)" test-regexp ] unit-test
 [ ] [ "(?-i:a)" test-regexp ] unit-test
-[ "(?z:a)" test-regexp ] [ bad-option? ] must-fail-with
-[ "(?-z:a)" test-regexp ] [ bad-option? ] must-fail-with
+[ "(?z:a)" test-regexp ] [ unknown-regexp-option? ] must-fail-with
+[ "(?-z:a)" test-regexp ] [ unknown-regexp-option? ] must-fail-with

 [ ] [ "(?=a)" test-regexp ] unit-test

--- a/basis/regexp/parser/parser.factor
+++ b/basis/regexp/parser/parser.factor
@ -4,12 +4,11 @@ USING: accessors arrays assocs combinators io io.streams.string
 kernel math math.parser namespaces qualified sets
 quotations sequences splitting symbols vectors math.order
 unicode.categories strings regexp.backend regexp.utils
-unicode.case words locals ;
+unicode.case words locals regexp.classes ;
 IN: regexp.parser

 FROM: math.ranges => [a,b] ;

-MIXIN: node
 TUPLE: concatenation seq ; INSTANCE: concatenation node
 TUPLE: alternation seq ; INSTANCE: alternation node
 TUPLE: kleene-star term ; INSTANCE: kleene-star node
@ -40,38 +39,31 @@ INSTANCE: independent-group parentheses-group
 TUPLE: comment-group term ; INSTANCE: comment-group node
 INSTANCE: comment-group parentheses-group

-TUPLE: character-class-range from to ; INSTANCE: character-class-range node
 SINGLETON: epsilon INSTANCE: epsilon node
-SINGLETON: any-char INSTANCE: any-char node
-SINGLETON: any-char-no-nl INSTANCE: any-char-no-nl node
-SINGLETON: beginning-of-input INSTANCE: beginning-of-input node
-SINGLETON: end-of-input INSTANCE: end-of-input node
-SINGLETON: beginning-of-line INSTANCE: beginning-of-line node
-SINGLETON: end-of-line INSTANCE: end-of-line node

-TUPLE: option-on option ; INSTANCE: option-on node
-TUPLE: option-off option ; INSTANCE: option-off node
+TUPLE: option option on? ; INSTANCE: option node
+
 SINGLETONS: unix-lines dotall multiline comments case-insensitive
 unicode-case reversed-regexp ;

-SINGLETONS: letter-class LETTER-class Letter-class digit-class
-alpha-class non-newline-blank-class
-ascii-class punctuation-class java-printable-class blank-class
-control-character-class hex-digit-class java-blank-class c-identifier-class
-unmatchable-class terminator-class word-boundary-class ;
-
-SINGLETONS: beginning-of-group end-of-group
-beginning-of-character-class end-of-character-class
+SINGLETONS: beginning-of-character-class end-of-character-class
 left-parenthesis pipe caret dash ;

-: get-option ( option -- ? ) current-regexp get options>> at ;
-: get-unix-lines ( -- ? ) unix-lines get-option ;
-: get-dotall ( -- ? ) dotall get-option ;
-: get-multiline ( -- ? ) multiline get-option ;
-: get-comments ( -- ? ) comments get-option ;
-: get-case-insensitive ( -- ? ) case-insensitive get-option ;
-: get-unicode-case ( -- ? ) unicode-case get-option ;
-: get-reversed-regexp ( -- ? ) reversed-regexp get-option ;
+: push1 ( obj -- ) input-stream get stream>> push ;
+: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
+: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
+: drop1 ( -- ) read1 drop ;
+
+: stack ( -- obj ) current-regexp get stack>> ;
+: change-whole-stack ( quot -- )
+    current-regexp get
+    [ stack>> swap call ] keep (>>stack) ; inline
+: push-stack ( obj -- ) stack push ;
+: pop-stack ( -- obj ) stack pop ;
+: cut-out ( vector n -- vector' vector ) cut rest ;
+ERROR: cut-stack-error ;
+: cut-stack ( obj vector -- vector' vector )
+    tuck last-index [ cut-stack-error ] unless* cut-out swap ;

 : <possessive-kleene-star> ( obj -- kleene ) possessive-kleene-star boa ;
 : <reluctant-kleene-star> ( obj -- kleene ) reluctant-kleene-star boa ;
@ -80,18 +72,11 @@ left-parenthesis pipe caret dash ;

 : <negation> ( obj -- negation ) negation boa ;
 : <concatenation> ( seq -- concatenation )
-    >vector get-reversed-regexp [ reverse ] when
-    [ epsilon ] [ concatenation boa ] if-empty ;
+    >vector [ epsilon ] [ concatenation boa ] if-empty ;
 : <alternation> ( seq -- alternation ) >vector alternation boa ;
 : <capture-group> ( obj -- capture-group ) capture-group boa ;
 : <kleene-star> ( obj -- kleene-star ) kleene-star boa ;
-: <constant> ( obj -- constant )
-    dup Letter? get-case-insensitive and [
-        [ ch>lower ] [ ch>upper ] bi
-        [ constant boa ] bi@ 2array <alternation>
-    ] [
-        constant boa
-    ] if ;
+: <constant> ( obj -- constant ) constant boa ;

 : first|concatenation ( seq -- first/concatenation )
    dup length 1 = [ first ] [ <concatenation> ] if ;
@ -100,21 +85,14 @@ left-parenthesis pipe caret dash ;
    dup length 1 = [ first ] [ <alternation> ] if ;

 : <character-class-range> ( from to -- obj )
-    2dup [ Letter? ] bi@ or get-case-insensitive and [
-        [ [ ch>lower ] bi@ character-class-range boa ]
-        [ [ ch>upper ] bi@ character-class-range boa ] 2bi
-        2array [ [ from>> ] [ to>> ] bi < ] filter
-        [ unmatchable-class ] [ first|alternation ] if-empty
-    ] [
-        2dup <
-        [ character-class-range boa ] [ 2drop unmatchable-class ] if
-    ] if ;
+    2dup <
+    [ character-class-range boa ] [ 2drop unmatchable-class ] if ;

 ERROR: unmatched-parentheses ;

-ERROR: bad-option ch ;
+ERROR: unknown-regexp-option option ;

-: option ( ch -- singleton )
+: ch>option ( ch -- singleton )
    {
        { CHAR: i [ case-insensitive ] }
        { CHAR: d [ unix-lines ] }
@ -124,13 +102,21 @@ ERROR: bad-option ch ;
        { CHAR: s [ dotall ] }
        { CHAR: u [ unicode-case ] }
        { CHAR: x [ comments ] }
-        [ bad-option ]
+        [ unknown-regexp-option ]
    } case ;

-: option-on ( option -- ) current-regexp get options>> conjoin ;
-: option-off ( option -- ) current-regexp get options>> delete-at ;
+: option>ch ( option -- string )
+    {
+        { case-insensitive [ CHAR: i ] }
+        { multiline [ CHAR: m ] }
+        { reversed-regexp [ CHAR: r ] }
+        { dotall [ CHAR: s ] }
+        [ unknown-regexp-option ]
+    } case ;
+
+: toggle-option ( ch ? -- ) 
+    [ ch>option ] dip option boa push-stack ;

-: toggle-option ( ch ? -- ) [ option ] dip [ option-on ] [ option-off ] if ;
 : (parse-options) ( string ? -- ) [ toggle-option ] curry each ;

 : parse-options ( string -- )
@ -176,7 +162,7 @@ DEFER: (parse-regexp)
    [ drop1 (parse-special-group) ]
    [ capture-group f nested-parse-regexp ] if ;

-: handle-dot ( -- ) get-dotall any-char any-char-no-nl ? push-stack ;
+: handle-dot ( -- ) any-char push-stack ;
 : handle-pipe ( -- ) pipe push-stack ;
 : (handle-star) ( obj -- kleene-star )
    peek1 {
@ -234,11 +220,8 @@ ERROR: invalid-range a b ;
        [ [ nip at-most-n ] [ at-least-n ] if* ] if
    ] [ drop 0 max exactly-n ] if ;

-: handle-front-anchor ( -- )
-    get-multiline beginning-of-line beginning-of-input ? push-stack ;
-
-: handle-back-anchor ( -- )
-    get-multiline end-of-line end-of-input ? push-stack ;
+: handle-front-anchor ( -- ) beginning-of-line push-stack ;
+: handle-back-anchor ( -- ) end-of-line push-stack ;

 ERROR: bad-character-class obj ;
 ERROR: expected-posix-class ;
@ -247,8 +230,8 @@ ERROR: expected-posix-class ;
    read1 CHAR: { = [ expected-posix-class ] unless
    "}" read-until [ bad-character-class ] unless
    {
-        { "Lower" [ get-case-insensitive Letter-class letter-class ? ] }
-        { "Upper" [ get-case-insensitive Letter-class LETTER-class ? ] }
+        { "Lower" [ letter-class ] }
+        { "Upper" [ LETTER-class ] }
        { "Alpha" [ Letter-class ] }
        { "ASCII" [ ascii-class ] }
        { "Digit" [ digit-class ] }
@ -412,7 +395,8 @@ DEFER: handle-left-bracket
    [ first|concatenation ] map first|alternation ;

 : handle-right-parenthesis ( -- )
-    stack dup [ parentheses-group "members" word-prop member? ] find-last -rot cut rest
+    stack dup [ parentheses-group "members" word-prop member? ] find-last
+    -rot cut rest
    [ [ push ] keep current-regexp get (>>stack) ]
    [ finish-regexp-parse push-stack ] bi* ;

@ -429,12 +413,9 @@ DEFER: handle-left-bracket
        { CHAR: [ [ handle-left-bracket t ] }
        { CHAR: \ [ handle-escape t ] }
        [
-            dup CHAR: $ = peek1 f = and [
-                drop
-                handle-back-anchor f
-            ] [
-                push-constant t
-            ] if
+            dup CHAR: $ = peek1 f = and
+            [ drop handle-back-anchor f ]
+            [ push-constant t ] if
        ]
    } case ;

@ -451,7 +432,6 @@ DEFER: handle-left-bracket
                parse-regexp-beginning (parse-regexp)
            ] with-input-stream
        ] unless-empty
-        current-regexp get
-        stack finish-regexp-parse
-            >>parse-tree drop
+        current-regexp get [ finish-regexp-parse ] change-stack
+        dup stack>> >>parse-tree drop
    ] with-variable ;
--- a/basis/regexp/regexp-tests.factor
+++ b/basis/regexp/regexp-tests.factor
@ -238,7 +238,7 @@ IN: regexp-tests

 [ t ] [ "abc" <reversed> R/ abc/r matches? ] unit-test
 [ t ] [ "abc" <reversed> R/ a[bB][cC]/r matches? ] unit-test
-[ t ] [ "adcbe" R/ a(?r)(bcd)(?-r)e/r matches? ] unit-test
+[ t ] [ "adcbe" R/ a(?r)(bcd)(?-r)e/ matches? ] unit-test

 [ t ] [ "s@f" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
 [ f ] [ "a" "[a-z.-]@[a-z]" <regexp> matches? ] unit-test
@ -307,17 +307,30 @@ IN: regexp-tests
 ! Bug in parsing word
 [ t ] [ "a" R' a' matches?  ] unit-test

+! Convert to lowercase until E
+[ f ] [ "AA" R/ \LAA\E/ matches? ] unit-test
+[ t ] [ "aa" R/ \LAA\E/ matches? ] unit-test
+
+! Convert to uppercase until E
+[ t ] [ "AA" R/ \Uaa\E/ matches? ] unit-test
+[ f ] [ "aa" R/ \Uaa\E/ matches? ] unit-test
+
 ! [ "{Lower}" <regexp> ] [ invalid-range? ] must-fail-with

-[ t ] [ "a" R/ ^a/ matches? ] unit-test
-[ f ] [ "\na" R/ ^a/ matches? ] unit-test
-[ f ] [ "\r\na" R/ ^a/ matches? ] unit-test
-[ f ] [ "\ra" R/ ^a/ matches? ] unit-test
+! [ t ] [ "a" R/ ^a/ matches? ] unit-test
+! [ f ] [ "\na" R/ ^a/ matches? ] unit-test
+! [ f ] [ "\r\na" R/ ^a/ matches? ] unit-test
+! [ f ] [ "\ra" R/ ^a/ matches? ] unit-test

-[ t ] [ "a" R/ a$/ matches? ] unit-test
-[ f ] [ "a\n" R/ a$/ matches? ] unit-test
-[ f ] [ "a\r" R/ a$/ matches? ] unit-test
-[ f ] [ "a\r\n" R/ a$/ matches? ] unit-test
+! [ t ] [ "a" R/ a$/ matches? ] unit-test
+! [ f ] [ "a\n" R/ a$/ matches? ] unit-test
+! [ f ] [ "a\r" R/ a$/ matches? ] unit-test
+! [ f ] [ "a\r\n" R/ a$/ matches? ] unit-test
+
+! [ t ] [ "a" R/ a$|b$/ matches? ] unit-test
+! [ t ] [ "b" R/ a$|b$/ matches? ] unit-test
+! [ t ] [ "ab" R/ a$|b$/ matches? ] unit-test
+! [ t ] [ "ba" R/ ba$|b$/ matches? ] unit-test

 ! [ t ] [ "a" R/ \Aa/ matches? ] unit-test
 ! [ f ] [ "\na" R/ \Aaa/ matches? ] unit-test
@ -347,14 +360,6 @@ IN: regexp-tests
 ! [ t ] [ "\r\na" R/ ^a/m matches? ] unit-test
 ! [ t ] [ "\ra" R/ ^a/m matches? ] unit-test

-! Convert to lowercase until E
-[ f ] [ "AA" R/ \LAA\E/ matches? ] unit-test
-[ t ] [ "aa" R/ \LAA\E/ matches? ] unit-test
-
-! Convert to uppercase until E
-[ t ] [ "AA" R/ \Uaa\E/ matches? ] unit-test
-[ f ] [ "aa" R/ \Uaa\E/ matches? ] unit-test
-
 ! [ t ] [ "a" "a$" R/ a$/m matches? ] unit-test
 ! [ t ] [ "a\n" "a$" R/ a$/m matches? ] unit-test
 ! [ t ] [ "a\r" "a$" R/ a$/m matches? ] unit-test
--- a/basis/regexp/regexp.factor
+++ b/basis/regexp/regexp.factor
@ -16,6 +16,7 @@ IN: regexp
        H{ } clone >>nfa-traversal-flags
        H{ } clone >>dfa-traversal-flags
        H{ } clone >>options
+        H{ } clone >>matchers
        reset-regexp ;

 : construct-regexp ( regexp -- regexp' )
@ -93,26 +94,6 @@ IN: regexp
        { "R| "  "|"  }
    } swap [ subseq? not nip ] curry assoc-find drop ;

-ERROR: unknown-regexp-option option ;
-
-: option>ch ( option -- string )
-    {
-        { case-insensitive [ CHAR: i ] }
-        { multiline [ CHAR: m ] }
-        { reversed-regexp [ CHAR: r ] }
-        { dotall [ CHAR: s ] }
-        [ unknown-regexp-option ]
-    } case ;
-
-: ch>option ( ch -- option )
-    {
-        { CHAR: i [ case-insensitive ] }
-        { CHAR: m [ multiline ] }
-        { CHAR: r [ reversed-regexp ] }
-        { CHAR: s [ dotall ] }
-        [ unknown-regexp-option ]
-    } case ;
-
 : string>options ( string -- options )
    [ ch>option dup ] H{ } map>assoc ;

--- a/basis/regexp/utils/utils.factor
+++ b/basis/regexp/utils/utils.factor
@ -26,23 +26,6 @@ IN: regexp.utils
 : ?insert-at ( value key hash/f -- hash )
    [ H{ } clone ] unless* [ insert-at ] keep ;

-: last-state ( regexp -- range ) stack>> peek first2 [a,b] ;
-: push1 ( obj -- ) input-stream get stream>> push ;
-: peek1 ( -- obj ) input-stream get stream>> [ f ] [ peek ] if-empty ;
-: pop3 ( seq -- obj1 obj2 obj3 ) [ pop ] [ pop ] [ pop ] tri spin ;
-: drop1 ( -- ) read1 drop ;
-
-: stack ( -- obj ) current-regexp get stack>> ;
-: change-whole-stack ( quot -- )
-    current-regexp get
-    [ stack>> swap call ] keep (>>stack) ; inline
-: push-stack ( obj -- ) stack push ;
-: pop-stack ( -- obj ) stack pop ;
-: cut-out ( vector n -- vector' vector ) cut rest ;
-ERROR: cut-stack-error ;
-: cut-stack ( obj vector -- vector' vector )
-    tuck last-index [ cut-stack-error ] unless* cut-out swap ;
-
 ERROR: bad-octal number ;
 ERROR: bad-hex number ;
 : check-octal ( octal -- octal ) dup 255 > [ bad-octal ] when ;