Merge branch 'master' of git://factorcode.org/git/factor

db4
Doug Coleman 2009-03-21 09:47:05 -05:00
commit 6585133cc1
119 changed files with 668 additions and 589 deletions

View File

@ -26,4 +26,4 @@ M: F-destructor dispose* alien>> F ;
;FUNCTOR
: DESTRUCTOR: scan-word define-destructor ; parsing
SYNTAX: DESTRUCTOR: scan-word define-destructor ;

View File

@ -416,7 +416,7 @@ PRIVATE>
: define-fortran-record ( name vocab fields -- )
[ >lower ] [ ] [ fortran-record>c-struct ] tri* define-struct ;
: RECORD: scan in get parse-definition define-fortran-record ; parsing
SYNTAX: RECORD: scan in get parse-definition define-fortran-record ;
: set-fortran-abi ( library -- )
library-fortran-abis get-global at fortran-abi set ;
@ -437,16 +437,16 @@ MACRO: fortran-invoke ( return library function parameters -- )
return library function parameters return [ "void" ] unless* parse-arglist
[ \ fortran-invoke 5 [ ] nsequence ] dip define-declared ;
: SUBROUTINE:
SYNTAX: SUBROUTINE:
f "c-library" get scan ";" parse-tokens
[ "()" subseq? not ] filter define-fortran-function ; parsing
[ "()" subseq? not ] filter define-fortran-function ;
: FUNCTION:
SYNTAX: FUNCTION:
scan "c-library" get scan ";" parse-tokens
[ "()" subseq? not ] filter define-fortran-function ; parsing
[ "()" subseq? not ] filter define-fortran-function ;
: LIBRARY:
SYNTAX: LIBRARY:
scan
[ "c-library" set ]
[ set-fortran-abi ] bi ; parsing
[ set-fortran-abi ] bi ;

View File

@ -7,35 +7,34 @@ effects assocs combinators lexer strings.parser alien.parser
fry vocabs.parser words.constant ;
IN: alien.syntax
: DLL" lexer get skip-blank parse-string dlopen parsed ; parsing
SYNTAX: DLL" lexer get skip-blank parse-string dlopen parsed ;
: ALIEN: scan string>number <alien> parsed ; parsing
SYNTAX: ALIEN: scan string>number <alien> parsed ;
: BAD-ALIEN <bad-alien> parsed ; parsing
SYNTAX: BAD-ALIEN <bad-alien> parsed ;
: LIBRARY: scan "c-library" set ; parsing
SYNTAX: LIBRARY: scan "c-library" set ;
: FUNCTION:
SYNTAX: FUNCTION:
scan "c-library" get scan ";" parse-tokens
[ "()" subseq? not ] filter
define-function ; parsing
define-function ;
: TYPEDEF:
scan scan typedef ; parsing
SYNTAX: TYPEDEF:
scan scan typedef ;
: C-STRUCT:
scan in get parse-definition define-struct ; parsing
SYNTAX: C-STRUCT:
scan in get parse-definition define-struct ;
: C-UNION:
scan parse-definition define-union ; parsing
SYNTAX: C-UNION:
scan parse-definition define-union ;
: C-ENUM:
SYNTAX: C-ENUM:
";" parse-tokens
[ [ create-in ] dip define-constant ] each-index ;
parsing
: address-of ( name library -- value )
load-library dlsym [ "No such symbol" throw ] unless* ;
: &:
scan "c-library" get '[ _ _ address-of ] over push-all ; parsing
SYNTAX: &:
scan "c-library" get '[ _ _ address-of ] over push-all ;

View File

@ -68,7 +68,7 @@ M: bit-array resize
M: bit-array byte-length length 7 + -3 shift ;
: ?{ \ } [ >bit-array ] parse-literal ; parsing
SYNTAX: ?{ \ } [ >bit-array ] parse-literal ;
: integer>bit-array ( n -- bit-array )
dup 0 = [

View File

@ -31,7 +31,7 @@ M: bit-array new-resizable drop <bit-vector> ;
INSTANCE: bit-vector growable
: ?V{ \ } [ >bit-vector ] parse-literal ; parsing
SYNTAX: ?V{ \ } [ >bit-vector ] parse-literal ;
M: bit-vector >pprint-sequence ;
M: bit-vector pprint-delims drop \ ?V{ \ } ;

View File

@ -42,7 +42,7 @@ M: byte-array like
M: byte-array new-resizable drop <byte-vector> ;
: BV{ \ } [ >byte-vector ] parse-literal ; parsing
SYNTAX: BV{ \ } [ >byte-vector ] parse-literal ;
M: byte-vector pprint* pprint-object ;
M: byte-vector pprint-delims drop \ BV{ \ } ;

View File

@ -14,18 +14,14 @@ SYMBOL: sent-messages
: remember-send ( selector -- )
sent-messages (remember-send) ;
: ->
scan dup remember-send parsed \ send parsed ;
parsing
SYNTAX: -> scan dup remember-send parsed \ send parsed ;
SYMBOL: super-sent-messages
: remember-super-send ( selector -- )
super-sent-messages (remember-send) ;
: SUPER->
scan dup remember-super-send parsed \ super-send parsed ;
parsing
SYNTAX: SUPER-> scan dup remember-super-send parsed \ super-send parsed ;
SYMBOL: frameworks
@ -33,9 +29,9 @@ frameworks [ V{ } clone ] initialize
[ frameworks get [ load-framework ] each ] "cocoa.messages" add-init-hook
: FRAMEWORK: scan [ load-framework ] [ frameworks get push ] bi ; parsing
SYNTAX: FRAMEWORK: scan [ load-framework ] [ frameworks get push ] bi ;
: IMPORT: scan [ ] import-objc-class ; parsing
SYNTAX: IMPORT: scan [ ] import-objc-class ;
"Compiling Objective C bridge..." print

View File

@ -76,6 +76,6 @@ SYMBOL: +superclass+
import-objc-class
] bind ;
: CLASS:
SYNTAX: CLASS:
parse-definition unclip
>hashtable define-objc-class ; parsing
>hashtable define-objc-class ;

View File

@ -30,4 +30,4 @@ ERROR: no-such-color name ;
: named-color ( name -- color )
dup rgb.txt at [ ] [ no-such-color ] ?if ;
: COLOR: scan named-color parsed ; parsing
SYNTAX: COLOR: scan named-color parsed ;

View File

@ -13,10 +13,10 @@ IN: compiler.cfg.instructions.syntax
: insn-effect ( word -- effect )
boa-effect in>> but-last f <effect> ;
: INSN:
SYNTAX: INSN:
parse-tuple-definition "regs" suffix
[ dup tuple eq? [ drop insn-word ] when ] dip
[ define-tuple-class ]
[ 2drop save-location ]
[ 2drop [ ] [ '[ f _ boa , ] ] [ insn-effect ] tri define-inline ]
3tri ; parsing
3tri ;

View File

@ -17,6 +17,6 @@ C: <ds-loc> ds-loc
TUPLE: rs-loc < loc ;
C: <rs-loc> rs-loc
: V scan-word scan-word vreg boa parsed ; parsing
: D scan-word <ds-loc> parsed ; parsing
: R scan-word <rs-loc> parsed ; parsing
SYNTAX: V scan-word scan-word vreg boa parsed ;
SYNTAX: D scan-word <ds-loc> parsed ;
SYNTAX: R scan-word <rs-loc> parsed ;

View File

@ -16,8 +16,8 @@ MACRO: set-slots ( slots -- quot )
[ [ in>> '[ _ _ construct ] ] dip compose ] [ drop ] 2bi
define-declared ;
: CONSTRUCTOR:
SYNTAX: CONSTRUCTOR:
scan-word [ name>> "<" ">" surround create-in ] keep
"(" expect ")" parse-effect
parse-definition
define-constructor ; parsing
define-constructor ;

View File

@ -3,7 +3,7 @@
USING: words parser alien alien.c-types kernel fry accessors ;
IN: core-text.utilities
: C-GLOBAL:
SYNTAX: C-GLOBAL:
CREATE-WORD
dup name>> '[ _ f dlsym *void* ]
(( -- value )) define-declared ; parsing
(( -- value )) define-declared ;

View File

@ -21,7 +21,7 @@ IN: cpu.ppc.assembler.backend
: define-d-insn ( word opcode -- )
[ d-insn ] curry (( d a simm -- )) define-declared ;
: D: CREATE scan-word define-d-insn ; parsing
SYNTAX: D: CREATE scan-word define-d-insn ;
: sd-insn ( d a simm opcode -- )
[ s>u16 { 0 21 16 } bitfield ] dip insn ;
@ -29,7 +29,7 @@ IN: cpu.ppc.assembler.backend
: define-sd-insn ( word opcode -- )
[ sd-insn ] curry (( d a simm -- )) define-declared ;
: SD: CREATE scan-word define-sd-insn ; parsing
SYNTAX: SD: CREATE scan-word define-sd-insn ;
: i-insn ( li aa lk opcode -- )
[ { 0 1 0 } bitfield ] dip insn ;
@ -40,26 +40,26 @@ IN: cpu.ppc.assembler.backend
: (X) ( -- word quot )
CREATE scan-word scan-word scan-word [ x-insn ] 3curry ;
: X: (X) (( a s b -- )) define-declared ; parsing
SYNTAX: X: (X) (( a s b -- )) define-declared ;
: (1) ( quot -- quot' ) [ 0 ] prepose ;
: X1: (X) (1) (( a s -- )) define-declared ; parsing
SYNTAX: X1: (X) (1) (( a s -- )) define-declared ;
: xfx-insn ( d spr xo opcode -- )
[ { 1 11 21 } bitfield ] dip insn ;
: CREATE-MF ( -- word ) scan "MF" prepend create-in ;
: MFSPR:
SYNTAX: MFSPR:
CREATE-MF scan-word 5 shift [ 339 31 xfx-insn ] curry
(( d -- )) define-declared ; parsing
(( d -- )) define-declared ;
: CREATE-MT ( -- word ) scan "MT" prepend create-in ;
: MTSPR:
SYNTAX: MTSPR:
CREATE-MT scan-word 5 shift [ 467 31 xfx-insn ] curry
(( d -- )) define-declared ; parsing
(( d -- )) define-declared ;
: xo-insn ( d a b oe rc xo opcode -- )
[ { 1 0 10 11 16 21 } bitfield ] dip insn ;
@ -68,9 +68,9 @@ IN: cpu.ppc.assembler.backend
CREATE scan-word scan-word scan-word scan-word
[ xo-insn ] 2curry 2curry ;
: XO: (XO) (( a s b -- )) define-declared ; parsing
SYNTAX: XO: (XO) (( a s b -- )) define-declared ;
: XO1: (XO) (1) (( a s -- )) define-declared ; parsing
SYNTAX: XO1: (XO) (1) (( a s -- )) define-declared ;
GENERIC# (B) 2 ( dest aa lk -- )
M: integer (B) 18 i-insn ;
@ -84,11 +84,11 @@ M: label BC [ 0 BC ] dip rc-relative-ppc-2 label-fixup ;
: CREATE-B ( -- word ) scan "B" prepend create-in ;
: BC:
SYNTAX: BC:
CREATE-B scan-word scan-word
[ rot BC ] 2curry (( c -- )) define-declared ; parsing
[ rot BC ] 2curry (( c -- )) define-declared ;
: B:
SYNTAX: B:
CREATE-B scan-word scan-word scan-word scan-word scan-word
[ b-insn ] curry curry curry curry curry
(( bo -- )) define-declared ; parsing
(( bo -- )) define-declared ;

View File

@ -11,5 +11,4 @@ IN: cpu.x86.assembler.syntax
: define-registers ( names size -- )
'[ _ define-register ] each-index ;
: REGISTERS: ( -- )
scan-word ";" parse-tokens swap define-registers ; parsing
SYNTAX: REGISTERS: scan-word ";" parse-tokens swap define-registers ;

View File

@ -14,10 +14,10 @@ GENERIC: definition-icon ( definition -- path )
<<
: ICON:
SYNTAX: ICON:
scan-word \ definition-icon create-method
scan '[ drop _ definition-icon-path ]
define ; parsing
define ;
>>

View File

@ -85,9 +85,9 @@ PRIVATE>
: define-consult ( consultation -- )
[ register-consult ] [ consult-methods ] bi ;
: CONSULT:
SYNTAX: CONSULT:
scan-word scan-word parse-definition <consultation>
[ save-location ] [ define-consult ] bi ; parsing
[ save-location ] [ define-consult ] bi ;
M: consultation where loc>> ;
@ -144,8 +144,8 @@ PRIVATE>
[ initialize-protocol-props ] 2tri
] 2bi ;
: PROTOCOL:
CREATE-WORD parse-definition define-protocol ; parsing
SYNTAX: PROTOCOL:
CREATE-WORD parse-definition define-protocol ;
PREDICATE: protocol < word protocol-words ; ! Subclass of symbol?
@ -159,7 +159,7 @@ M: protocol definer drop \ PROTOCOL: \ ; ;
M: protocol group-words protocol-words ;
: SLOT-PROTOCOL:
SYNTAX: SLOT-PROTOCOL:
CREATE-WORD ";" parse-tokens
[ [ reader-word ] [ writer-word ] bi 2array ] map concat
define-protocol ; parsing
define-protocol ;

View File

@ -53,4 +53,4 @@ M: callable deep-fry
M: object deep-fry , ;
: '[ parse-quotation fry over push-all ; parsing
SYNTAX: '[ parse-quotation fry over push-all ;

View File

@ -16,6 +16,8 @@ IN: functors
: define* ( word def effect -- ) pick set-word define-declared ;
: define-syntax* ( word def -- ) over set-word define-syntax ;
TUPLE: fake-quotation seq ;
GENERIC: >fake-quotations ( quot -- fake )
@ -41,7 +43,7 @@ M: object fake-quotations> ;
: DEFINE* ( accum -- accum ) effect get parsed \ define* parsed ;
: `TUPLE:
SYNTAX: `TUPLE:
scan-param parsed
scan {
{ ";" [ tuple parsed f parsed ] }
@ -52,40 +54,44 @@ M: object fake-quotations> ;
make parsed
]
} case
\ define-tuple-class parsed ; parsing
\ define-tuple-class parsed ;
: `M:
SYNTAX: `M:
effect off
scan-param parsed
scan-param parsed
\ create-method-in parsed
parse-definition*
DEFINE* ; parsing
DEFINE* ;
: `C:
SYNTAX: `C:
effect off
scan-param parsed
scan-param parsed
[ [ boa ] curry ] over push-all
DEFINE* ; parsing
DEFINE* ;
: `:
SYNTAX: `:
effect off
scan-param parsed
parse-definition*
DEFINE* ; parsing
DEFINE* ;
: `INSTANCE:
SYNTAX: `SYNTAX:
effect off
scan-param parsed
parse-definition*
\ define-syntax* parsed ;
SYNTAX: `INSTANCE:
scan-param parsed
scan-param parsed
\ add-mixin-instance parsed ; parsing
\ add-mixin-instance parsed ;
: `inline [ word make-inline ] over push-all ; parsing
SYNTAX: `inline [ word make-inline ] over push-all ;
: `parsing [ word make-parsing ] over push-all ; parsing
: `(
")" parse-effect effect set ; parsing
SYNTAX: `(
")" parse-effect effect set ;
: (INTERPOLATE) ( accum quot -- accum )
[ scan interpolate-locals ] dip
@ -93,11 +99,11 @@ M: object fake-quotations> ;
PRIVATE>
: IS [ dup search [ ] [ no-word ] ?if ] (INTERPOLATE) ; parsing
SYNTAX: IS [ dup search [ ] [ no-word ] ?if ] (INTERPOLATE) ;
: DEFINES [ create-in ] (INTERPOLATE) ; parsing
SYNTAX: DEFINES [ create-in ] (INTERPOLATE) ;
: DEFINES-CLASS [ create-class-in ] (INTERPOLATE) ; parsing
SYNTAX: DEFINES-CLASS [ create-class-in ] (INTERPOLATE) ;
DEFER: ;FUNCTOR delimiter
@ -110,8 +116,8 @@ DEFER: ;FUNCTOR delimiter
{ "C:" POSTPONE: `C: }
{ ":" POSTPONE: `: }
{ "INSTANCE:" POSTPONE: `INSTANCE: }
{ "SYNTAX:" POSTPONE: `SYNTAX: }
{ "inline" POSTPONE: `inline }
{ "parsing" POSTPONE: `parsing }
{ "(" POSTPONE: `( }
} ;
@ -132,4 +138,4 @@ DEFER: ;FUNCTOR delimiter
PRIVATE>
: FUNCTOR: (FUNCTOR:) define ; parsing
SYNTAX: FUNCTOR: (FUNCTOR:) define ;

View File

@ -5,19 +5,19 @@ help.topics namespaces vocabs definitions compiler.units
vocabs.parser ;
IN: help.syntax
: HELP:
SYNTAX: HELP:
scan-word bootstrap-word
dup set-word
dup >link save-location
\ ; parse-until >array swap set-word-help ; parsing
\ ; parse-until >array swap set-word-help ;
: ARTICLE:
SYNTAX: ARTICLE:
location [
\ ; parse-until >array [ first2 ] keep 2 tail <article>
over add-article >link
] dip remember-definition ; parsing
] dip remember-definition ;
: ABOUT:
SYNTAX: ABOUT:
in get vocab
dup changed-definition
scan-object >>help drop ; parsing
scan-object >>help drop ;

View File

@ -59,12 +59,11 @@ M: object specializer-declaration class ;
: specialized-length ( specializer -- n )
dup [ array? ] all? [ first ] when length ;
: HINTS:
SYNTAX: HINTS:
scan-object
dup method-spec? [ first2 method ] when
[ redefined ]
[ parse-definition "specializer" set-word-prop ] bi ;
parsing
! Default specializers
{ first first2 first3 first4 }

View File

@ -25,8 +25,7 @@ M: tuple-class component-tag ( tag class -- )
[ compile-component-attrs ] 2bi
[ render ] [code] ;
: COMPONENT:
SYNTAX: COMPONENT:
scan-word
[ name>> ] [ '[ _ component-tag ] ] bi
define-chloe-tag ;
parsing

View File

@ -15,8 +15,8 @@ tags [ H{ } clone ] initialize
: define-chloe-tag ( name quot -- ) swap tags get set-at ;
: CHLOE:
scan parse-definition define-chloe-tag ; parsing
SYNTAX: CHLOE:
scan parse-definition define-chloe-tag ;
CONSTANT: chloe-ns "http://factorcode.org/chloe/1.0"

View File

@ -49,7 +49,7 @@ DEFER: <% delimiter
drop
] if ;
: %> lexer get parse-%> ; parsing
SYNTAX: %> lexer get parse-%> ;
: parse-template-lines ( lines -- quot )
<template-lexer> [

View File

@ -38,6 +38,6 @@ MACRO: interpolate ( string -- )
: interpolate-locals ( string -- quot )
[ search [ ] ] (interpolate) ;
: I[
SYNTAX: I[
"]I" parse-multiline-string
interpolate-locals over push-all ; parsing
interpolate-locals over push-all ;

View File

@ -63,6 +63,6 @@ SYMBOL: euc-table
PRIVATE>
: EUC:
SYNTAX: EUC:
! EUC: euc-kr "vocab:io/encodings/korean/cp949.txt"
CREATE-CLASS scan-object define-euc ; parsing
CREATE-CLASS scan-object define-euc ;

View File

@ -7,30 +7,30 @@ IN: io.encodings.iso2022
[ "hello" ] [ "hello" >byte-array iso2022 decode ] unit-test
[ "hello" ] [ "hello" iso2022 encode >string ] unit-test
[ "hi" ] [ B{ CHAR: h $ ESC CHAR: ( CHAR: B CHAR: i } iso2022 decode ] unit-test
[ "hi" ] [ B{ CHAR: h CHAR: i $ ESC CHAR: ( CHAR: B } iso2022 decode ] unit-test
[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i $ ESC CHAR: ( } iso2022 decode ] unit-test
[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i $ ESC } iso2022 decode ] unit-test
[ "hi" ] [ B{ CHAR: h ESC CHAR: ( CHAR: B CHAR: i } iso2022 decode ] unit-test
[ "hi" ] [ B{ CHAR: h CHAR: i ESC CHAR: ( CHAR: B } iso2022 decode ] unit-test
[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i ESC CHAR: ( } iso2022 decode ] unit-test
[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i ESC } iso2022 decode ] unit-test
[ B{ CHAR: h $ ESC CHAR: ( CHAR: J HEX: D8 } ] [ "h\u00ff98" iso2022 encode ] unit-test
[ "h\u00ff98" ] [ B{ CHAR: h $ ESC CHAR: ( CHAR: J HEX: D8 } iso2022 decode ] unit-test
[ "hi" ] [ B{ CHAR: h $ ESC CHAR: ( CHAR: J CHAR: i } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h $ ESC CHAR: ( CHAR: J } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: ( CHAR: J HEX: 80 } iso2022 decode ] unit-test
[ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: D8 } ] [ "h\u00ff98" iso2022 encode ] unit-test
[ "h\u00ff98" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: D8 } iso2022 decode ] unit-test
[ "hi" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J CHAR: i } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: 80 } iso2022 decode ] unit-test
[ B{ CHAR: h $ ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } ] [ "h\u007126" iso2022 encode ] unit-test
[ "h\u007126" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: B HEX: 3E } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: B } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: B HEX: 80 HEX: 80 } iso2022 decode ] unit-test
[ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } ] [ "h\u007126" iso2022 encode ] unit-test
[ "h\u007126" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 80 HEX: 80 } iso2022 decode ] unit-test
[ B{ CHAR: h $ ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } ] [ "h\u0058ce" iso2022 encode ] unit-test
[ "h\u0058ce" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: ( CHAR: D } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: ( } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h $ ESC CHAR: $ CHAR: ( CHAR: D HEX: 70 HEX: 70 } iso2022 decode ] unit-test
[ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } ] [ "h\u0058ce" iso2022 encode ] unit-test
[ "h\u0058ce" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 70 HEX: 70 } iso2022 decode ] unit-test
[ "\u{syriac-music}" iso2022 encode ] must-fail

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: io.encodings kernel sequences io simple-flat-file sets math
combinators.short-circuit io.binary values arrays assocs
locals accessors combinators literals biassocs byte-arrays ;
locals accessors combinators biassocs byte-arrays parser ;
IN: io.encodings.iso2022
SINGLETON: iso2022
@ -31,12 +31,12 @@ M: iso2022 <encoder>
M: iso2022 <decoder>
make-iso-coder <decoder> ;
CONSTANT: ESC HEX: 16
<< SYNTAX: ESC HEX: 16 parsed ; >>
CONSTANT: switch-ascii B{ $ ESC CHAR: ( CHAR: B }
CONSTANT: switch-jis201 B{ $ ESC CHAR: ( CHAR: J }
CONSTANT: switch-jis208 B{ $ ESC CHAR: $ CHAR: B }
CONSTANT: switch-jis212 B{ $ ESC CHAR: $ CHAR: ( CHAR: D }
CONSTANT: switch-ascii B{ ESC CHAR: ( CHAR: B }
CONSTANT: switch-jis201 B{ ESC CHAR: ( CHAR: J }
CONSTANT: switch-jis208 B{ ESC CHAR: $ CHAR: B }
CONSTANT: switch-jis212 B{ ESC CHAR: $ CHAR: ( CHAR: D }
: find-type ( char -- code type )
{

View File

@ -3,7 +3,7 @@ tools.test parser math namespaces continuations vocabs kernel
compiler.units eval vocabs.parser ;
IN: listener.tests
: hello "Hi" print ; parsing
SYNTAX: hello "Hi" print ;
: parse-interactive ( string -- quot )
<string-reader> stream-read-quot ;

View File

@ -5,27 +5,25 @@ vocabs.loader words kernel namespaces locals.parser locals.types
locals.errors ;
IN: locals
: :>
SYNTAX: :>
scan locals get [ :>-outside-lambda-error ] unless*
[ make-local ] bind <def> parsed ; parsing
[ make-local ] bind <def> parsed ;
: [| parse-lambda over push-all ; parsing
SYNTAX: [| parse-lambda over push-all ;
: [let parse-let over push-all ; parsing
SYNTAX: [let parse-let over push-all ;
: [let* parse-let* over push-all ; parsing
SYNTAX: [let* parse-let* over push-all ;
: [wlet parse-wlet over push-all ; parsing
SYNTAX: [wlet parse-wlet over push-all ;
: :: (::) define ; parsing
SYNTAX: :: (::) define ;
: M:: (M::) define ; parsing
SYNTAX: M:: (M::) define ;
: MACRO:: (::) define-macro ; parsing
SYNTAX: MACRO:: (::) define-macro ;
: MEMO:: (::) define-memoized ; parsing
USE: syntax
SYNTAX: MEMO:: (::) define-memoized ;
{
"locals.macros"

View File

@ -135,11 +135,11 @@ PRIVATE>
[ [ input-logging-quot ] 2keep drop error-logging-quot ]
(define-logging) ;
: LOG:
SYNTAX: LOG:
#! Syntax: name level
CREATE-WORD dup scan-word
'[ 1array stack>message _ _ log-message ]
(( message -- )) define-declared ; parsing
(( message -- )) define-declared ;
USE: vocabs.loader

View File

@ -16,7 +16,7 @@ PRIVATE>
[ over real-macro-effect memoize-quot [ call ] append define ]
2bi ;
: MACRO: (:) define-macro ; parsing
SYNTAX: MACRO: (:) define-macro ;
PREDICATE: macro < word "macro" word-prop >boolean ;

View File

@ -16,8 +16,8 @@ SYMBOL: _
: define-match-vars ( seq -- )
[ define-match-var ] each ;
: MATCH-VARS: ! vars ...
";" parse-tokens define-match-vars ; parsing
SYNTAX: MATCH-VARS: ! vars ...
";" parse-tokens define-match-vars ;
: match-var? ( symbol -- bool )
dup word? [ "match-var" word-prop ] [ drop f ] if ;

View File

@ -289,7 +289,7 @@ M: MATRIX n*V(*)V+M!
M: MATRIX n*V(*)Vconj+M!
(prepare-ger) [ XGERC ] dip ;
: XMATRIX{ \ } [ >MATRIX ] parse-literal ; parsing
SYNTAX: XMATRIX{ \ } [ >MATRIX ] parse-literal ;
M: MATRIX pprint-delims
drop \ XMATRIX{ \ } ;

View File

@ -179,7 +179,7 @@ M: VECTOR n*V+V!
M: VECTOR n*V!
(prepare-scal) [ XSCAL ] dip ;
: XVECTOR{ \ } [ >VECTOR ] parse-literal ; parsing
SYNTAX: XVECTOR{ \ } [ >VECTOR ] parse-literal ;
M: VECTOR pprint-delims
drop \ XVECTOR{ \ } ;

View File

@ -31,7 +31,7 @@ M: complex sqrt >polar [ fsqrt ] [ 2.0 / ] bi* polar> ;
IN: syntax
: C{ \ } [ first2 rect> ] parse-literal ; parsing
SYNTAX: C{ \ } [ first2 rect> ] parse-literal ;
USE: prettyprint.custom

View File

@ -41,7 +41,7 @@ PRIVATE>
[ drop "memoize" set-word-prop ]
3tri ;
: MEMO: (:) define-memoized ; parsing
SYNTAX: MEMO: (:) define-memoized ;
PREDICATE: memoized < word "memoize" word-prop ;

View File

@ -76,18 +76,6 @@ ERROR: end-of-stream multipart ;
: empty-name? ( string -- ? )
{ "''" "\"\"" "" f } member? ;
: quote? ( ch -- ? ) "'\"" member? ;
: quoted? ( str -- ? )
{
[ length 1 > ]
[ first quote? ]
[ [ first ] [ peek ] bi = ]
} 1&& ;
: unquote ( str -- newstr )
dup quoted? [ but-last-slice rest-slice >string ] when ;
: save-uploaded-file ( multipart -- )
dup filename>> empty-name? [
drop

View File

@ -20,10 +20,10 @@ PRIVATE>
[ (parse-here) ] "" make but-last
lexer get next-line ;
: STRING:
SYNTAX: STRING:
CREATE-WORD
parse-here 1quotation
(( -- string )) define-inline ; parsing
(( -- string )) define-inline ;
<PRIVATE
@ -48,16 +48,16 @@ PRIVATE>
change-column drop
] "" make ;
: <"
"\">" parse-multiline-string parsed ; parsing
SYNTAX: <"
"\">" parse-multiline-string parsed ;
: <'
"'>" parse-multiline-string parsed ; parsing
SYNTAX: <'
"'>" parse-multiline-string parsed ;
: {'
"'}" parse-multiline-string parsed ; parsing
SYNTAX: {'
"'}" parse-multiline-string parsed ;
: {"
"\"}" parse-multiline-string parsed ; parsing
SYNTAX: {"
"\"}" parse-multiline-string parsed ;
: /* "*/" parse-multiline-string drop ; parsing
SYNTAX: /* "*/" parse-multiline-string drop ;

View File

@ -62,7 +62,7 @@ M: nibble-array resize
M: nibble-array byte-length length nibbles>bytes ;
: N{ \ } [ >nibble-array ] parse-literal ; parsing
SYNTAX: N{ \ } [ >nibble-array ] parse-literal ;
INSTANCE: nibble-array sequence

View File

@ -47,7 +47,7 @@ reset-gl-function-number-counter
parameters return parse-arglist [ abi indirect-quot ] dip
define-declared ;
: GL-FUNCTION:
SYNTAX: GL-FUNCTION:
gl-function-calling-convention
scan
scan dup
@ -55,5 +55,4 @@ reset-gl-function-number-counter
gl-function-number
[ gl-function-pointer ] 2curry swap
";" parse-tokens [ "()" subseq? not ] filter
define-indirect
; parsing
define-indirect ;

View File

@ -279,12 +279,12 @@ H{ } clone verify-messages set-global
: verify-message ( n -- word ) verify-messages get-global at ;
: X509_V_:
SYNTAX: X509_V_:
scan "X509_V_" prepend create-in
scan-word
[ 1quotation (( -- value )) define-inline ]
[ verify-messages get set-at ]
2bi ; parsing
2bi ;
>>

View File

@ -34,9 +34,9 @@ TUPLE: tokenizer any one many ;
: reset-tokenizer ( -- )
default-tokenizer \ tokenizer set-global ;
: TOKENIZER:
SYNTAX: TOKENIZER:
scan search [ "Tokenizer not found" throw ] unless*
execute( -- tokenizer ) \ tokenizer set-global ; parsing
execute( -- tokenizer ) \ tokenizer set-global ;
TUPLE: ebnf-non-terminal symbol ;
TUPLE: ebnf-terminal symbol ;
@ -522,16 +522,14 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
parse-ebnf dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry [ with-scope ast>> ] curry ;
: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at
parsed reset-tokenizer ; parsing
SYNTAX: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at
parsed reset-tokenizer ;
: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip
parsed \ call parsed reset-tokenizer ; parsing
SYNTAX: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip
parsed \ call parsed reset-tokenizer ;
: EBNF:
SYNTAX: EBNF:
reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string
ebnf>quot swapd (( input -- ast )) define-declared "ebnf-parser" set-word-prop
reset-tokenizer ; parsing
reset-tokenizer ;

View File

@ -616,7 +616,7 @@ PRIVATE>
ERROR: parse-failed input word ;
: PEG:
SYNTAX: PEG:
(:)
[let | def [ ] word [ ] |
[
@ -630,7 +630,7 @@ ERROR: parse-failed input word ;
]
] with-compilation-unit
] over push-all
] ; parsing
] ;
USING: vocabs vocabs.loader ;

View File

@ -48,7 +48,7 @@ M: persistent-hash hashcode* nip assoc-size ;
M: persistent-hash clone ;
: PH{ \ } [ >persistent-hash ] parse-literal ; parsing
SYNTAX: PH{ \ } [ >persistent-hash ] parse-literal ;
M: persistent-hash pprint-delims drop \ PH{ \ } ;
M: persistent-hash >pprint-sequence >alist ;

View File

@ -179,7 +179,7 @@ M: persistent-vector equal?
: >persistent-vector ( seq -- pvec )
T{ persistent-vector } like ;
: PV{ \ } [ >persistent-vector ] parse-literal ; parsing
SYNTAX: PV{ \ } [ >persistent-vector ] parse-literal ;
M: persistent-vector pprint-delims drop \ PV{ \ } ;
M: persistent-vector >pprint-sequence ;

View File

@ -96,12 +96,12 @@ ARTICLE: "prettyprint-literal" "Literal prettyprinting protocol"
{ $code
"TUPLE: rect w h ;"
""
": RECT["
"SYNTAX: RECT["
" scan-word"
" scan-word \\ * assert="
" scan-word"
" scan-word \\ ] assert="
" <rect> parsed ; parsing"
" <rect> parsed ;"
}
"An example literal might be:"
{ $code "RECT[ 100 * 200 ]" }

View File

@ -0,0 +1 @@
Doug Coleman

View File

@ -0,0 +1,16 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: sequences math kernel strings combinators.short-circuit ;
IN: quoting
: quote? ( ch -- ? ) "'\"" member? ;
: quoted? ( str -- ? )
{
[ length 1 > ]
[ first quote? ]
[ [ first ] [ peek ] bi = ]
} 1&& ;
: unquote ( str -- newstr )
dup quoted? [ but-last-slice rest-slice >string ] when ;

View File

@ -1 +1,2 @@
Doug Coleman
Daniel Ehrenberg

View File

@ -230,7 +230,10 @@ TUPLE: class-partition integers not-integers simples not-simples and or other ;
dup or-class flatten partition-classes
dup not-integers>> length {
{ 0 [ nip make-or-class ] }
{ 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] }
{ 1 [
not-integers>> first
[ class>> '[ _ swap class-member? ] any? ] keep or
] }
[ 3drop t ]
} case ;
@ -251,6 +254,12 @@ M: or-class <not-class>
M: t <not-class> drop f ;
M: f <not-class> drop t ;
: <minus-class> ( a b -- a-b )
<not-class> 2array <and-class> ;
: <sym-diff-class> ( a b -- a~b )
2array [ <or-class> ] [ <and-class> ] bi <minus-class> ;
M: primitive-class class-member?
class>> class-member? ;

View File

@ -11,7 +11,7 @@ IN: regexp.parser.tests
"a|b" "a.b" "a|b|c" "abc|b" "a|bcd" "a|(b)" "(?-i:a)" "||"
"(a)|b" "(a|b)" "((a)|(b))" "(?:a)" "(?i:a)" "|b" "b|"
"[abc]" "[a-c]" "[^a-c]" "[^]]" "[]a]" "[[]" "[]-a]" "[a-]" "[-]"
"[--a]" "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
"foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
"(foo){2}" "{2,3}" "{," "{,}" "}" "foo}" "[^]-a]" "[^-]a]"
"[a-]" "[^a-]" "[^a-]" "a{,2}" "(?#foobar)"
"\\p{Space}" "\\t" "\\[" "[\\]]" "\\P{Space}"

View File

@ -148,19 +148,29 @@ Character = EscapeSequence
| "^" => [[ ^ <tagged-epsilon> ]]
| . ?[ allowed-char? ]?
AnyRangeCharacter = EscapeSequence | .
AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .)
RangeCharacter = !("]") AnyRangeCharacter
Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
Range = RangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
| RangeCharacter
StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
StartRange = AnyRangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
| AnyRangeCharacter
Ranges = StartRange:s Range*:r => [[ r s prefix ]]
CharClass = "^"?:n Ranges:e => [[ e n char-class ]]
BasicCharClass = "^"?:n Ranges:e => [[ e n char-class ]]
CharClass = BasicCharClass:b "&&" CharClass:c
=> [[ b c 2array <and-class> ]]
| BasicCharClass:b "||" CharClass:c
=> [[ b c 2array <or-class> ]]
| BasicCharClass:b "~~" CharClass:c
=> [[ b c <sym-diff-class> ]]
| BasicCharClass:b "--" CharClass:c
=> [[ b c <minus-class> ]]
| BasicCharClass
Options = [idmsux]*

View File

@ -45,11 +45,11 @@ ARTICLE: { "regexp" "construction" } "Constructing regular expressions"
ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
"Regexp syntax is largely compatible with Perl, Java and extended POSIX regexps, but not completely. Below, the syntax is documented."
{ $heading "Characters" }
"At its core, regular expressions consist of character literals. For example, " { $snippet "R/ f/" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } "for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
"At its core, regular expressions consist of character literals. For example, " { $snippet "R/ f/" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } " for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
{ $heading "Concatenation, alternation and grouping" }
"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R/ ab/" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for gropuing. So " { $snippet "R/ f(oo|ar)/" } " would match either 'foo' or 'far'."
"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R/ ab/" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for grouping. So " { $snippet "R/ f(oo|ar)/" } " would match either 'foo' or 'far'."
{ $heading "Character classes" }
"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a carat, as in " { $snippet "[^a]" } " which matches all characters which are not a."
"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a caret, as in " { $snippet "[^a]" } " which matches all characters which are not a."
{ $heading "Predefined character classes" }
"Several character classes are predefined, both for convenience and because they are too large to represent directly. In Factor regular expressions, all character classes are Unicode-aware."
{ $table
@ -72,10 +72,12 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
{ { $snippet "\\p{blank}" } "Non-newline whitespace" }
{ { $snippet "\\p{cntrl}" } "Control character" }
{ { $snippet "\\p{space}" } "Whitespace" }
{ { $snippet "\\p{xdigit}" } "Hexidecimal digit" }
{ { $snippet "\\p{xdigit}" } "Hexadecimal digit" }
{ { $snippet "\\p{Nd}" } "Character in Unicode category Nd" }
{ { $snippet "\\p{Z}" } "Character in Unicode category beginning with Z" }
{ { $snippet "\\p{script=Cham}" } "Character in the Cham writing system" } }
{ $heading "Character class operations" }
"Character classes can be composed using four binary operations: " { $snippet "|| && ~~ --" } ". These do the operations union, intersection, symmetric difference and difference, respectively. For example, characters which are lower-case but not Latin script could be matched as " { $snippet "[\\p{lower}--\\p{script=latin}]" } ". These operations are right-associative, and " { $snippet "^" } " binds tighter than them. There is no syntax for grouping."
{ $heading "Boundaries" }
"Special operators exist to match certain points in the string. These are called 'zero-width' because they do not consume any characters."
{ $table
@ -107,9 +109,18 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
{ $heading "Quotation" }
"To make it convenient to have a long string which uses regexp operators, a special syntax is provided. If a substring begins with " { $snippet "\\Q" } " then everything until " { $snippet "\\E" } " is quoted (escaped). For example, " { $snippet "R/ \\Qfoo\\bar|baz()\\E/" } " matches exactly the string " { $snippet "\"foo\\bar|baz()\"" } "."
{ $heading "Unsupported features" }
"One missing feature is backreferences. This is because of a design decision to allow only regular expressions following the formal theory of regular languages. For more information, see " { $link { "regexp" "theory" } } ". You can create a new regular expression to match a particular string using " { $vocab-link "regexp.combinators" } " and group capture is available to extract parts of a regular expression match." $nl
"Another feature is Perl's " { $snippet "\\G" } " syntax, which references the previous match, is not included. This is because that sequence is inherently stateful, and Factor regexps don't hold state." $nl
"None of the operations which embed code into a regexp are supported, as this would require the inclusion of the Factor parser and compiler in any application which wants to expose regexps to the user. None of the casing operations are included of Perl like \\L, for simplicity." ; ! Also describe syntax, from the beginning
{ $subheading "Group capture" }
{ $subheading "Reluctant and posessive quantifiers" }
{ $subheading "Backreferences" }
"Backreferences were omitted because of a design decision to allow only regular expressions following the formal theory of regular languages. For more information, see " { $link { "regexp" "theory" } } "."
$nl
"To work around the lack of backreferences, consider using group capture and then creating a new regular expression to match the captured string using " { $vocab-link "regexp.combinators" } "."
{ $subheading "Previous match" }
"Another feature that is not included is Perl's " { $snippet "\\G" } " syntax, which references the previous match. This is because that sequence is inherently stateful, and Factor regexps don't hold state."
{ $subheading "Embedding code" }
"Operations which embed code into a regexp are not supported. This would require the inclusion of the Factor parser and compiler in any deployed application which wants to expose regexps to the user, leading to an undesirable increase in the code size."
{ $heading "Casing operations" }
"No special casing operations are included, for example Perl's " { $snippet "\\L" } "." ;
ARTICLE: { "regexp" "options" } "Regular expression options"
"When " { $link { "regexp" "construction" } } ", various options can be provided. Options have single-character names. A string of options has one of the following two forms:"
@ -152,7 +163,7 @@ ARTICLE: { "regexp" "theory" } "The theory of regular expressions"
"This implies, by DeMorgan's law, that, if you have two regular languages, their intersection is also regular. That is, for any two regular expressions, there exists a regular expression which matches strings that match both inputs." $nl
"Traditionally, regular expressions on computer support an additional operation: backreferences. For example, the Perl regexp " { $snippet "/(.*)$1/" } " matches a string repated twice. If a backreference refers to a string with a predetermined maximum length, then the resulting language is still regular." $nl
"But, if not, the language is not regular. There is strong evidence that there is no efficient way to parse with backreferences in the general case. Perl uses a naive backtracking algorithm which has pathological behavior in some cases, taking exponential time to match even if backreferences aren't used. Additionally, expressions with backreferences don't have the properties with negation and intersection described above." $nl
"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex also use this algorithm, but in the Factor regular expression engine, all other features of regexps are still present." ;
"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex use the same algorithm." ;
ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions"
"Testing if a string matches a regular expression:"

View File

@ -508,3 +508,29 @@ IN: regexp-tests
[ t ] [ " " R/ \P{LL}/ matches? ] unit-test
[ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
[ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
! Logical operators
[ t ] [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ t ] [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ t ] [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ t ] [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ t ] [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ t ] [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ f ] [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
[ f ] [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test

View File

@ -204,17 +204,17 @@ PRIVATE>
PRIVATE>
: R! CHAR: ! parsing-regexp ; parsing
: R" CHAR: " parsing-regexp ; parsing
: R# CHAR: # parsing-regexp ; parsing
: R' CHAR: ' parsing-regexp ; parsing
: R( CHAR: ) parsing-regexp ; parsing
: R/ CHAR: / parsing-regexp ; parsing
: R@ CHAR: @ parsing-regexp ; parsing
: R[ CHAR: ] parsing-regexp ; parsing
: R` CHAR: ` parsing-regexp ; parsing
: R{ CHAR: } parsing-regexp ; parsing
: R| CHAR: | parsing-regexp ; parsing
SYNTAX: R! CHAR: ! parsing-regexp ;
SYNTAX: R" CHAR: " parsing-regexp ;
SYNTAX: R# CHAR: # parsing-regexp ;
SYNTAX: R' CHAR: ' parsing-regexp ;
SYNTAX: R( CHAR: ) parsing-regexp ;
SYNTAX: R/ CHAR: / parsing-regexp ;
SYNTAX: R@ CHAR: @ parsing-regexp ;
SYNTAX: R[ CHAR: ] parsing-regexp ;
SYNTAX: R` CHAR: ` parsing-regexp ;
SYNTAX: R{ CHAR: } parsing-regexp ;
SYNTAX: R| CHAR: | parsing-regexp ;
USING: vocabs vocabs.loader ;

View File

@ -74,4 +74,4 @@ PRIVATE>
: roman/mod ( str1 str2 -- str3 str4 )
[ /mod ] binary-roman-op [ >roman ] dip ;
: ROMAN: scan roman> parsed ; parsing
SYNTAX: ROMAN: scan roman> parsed ;

View File

@ -93,7 +93,6 @@ M: object declarations. drop ;
M: word declarations.
{
POSTPONE: parsing
POSTPONE: delimiter
POSTPONE: inline
POSTPONE: recursive

View File

@ -19,8 +19,8 @@ MACRO: shuffle-effect ( effect -- )
[ [ at \ swap \ nth [ ] 3sequence ] curry map , \ cleave , ] 2bi
] [ ] make ;
: shuffle(
")" parse-effect parsed \ shuffle-effect parsed ; parsing
SYNTAX: shuffle(
")" parse-effect parsed \ shuffle-effect parsed ;
: 2swap ( x y z t -- z t x y ) 2 2 mnswap ; inline

View File

@ -1,8 +1,24 @@
USING: help.syntax help.markup strings ;
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup strings biassocs arrays ;
IN: simple-flat-file
ABOUT: "simple-flat-file"
ARTICLE: "simple-flat-file" "Parsing simple flat files"
"The " { $vocab-link "simple-flat-file" } " vocabulary provides words for loading and parsing simple flat files in a particular format which is common for encoding tasks."
{ $subsection flat-file>biassoc } ;
"The " { $vocab-link "simple-flat-file" } " vocabulary provides words for loading and parsing simple flat files in a particular format which is common for encoding and Unicode tasks."
{ $subsection flat-file>biassoc }
{ $subsection load-interval-file }
{ $subsection data } ;
HELP: load-interval-file
{ $values { "filename" string } { "table" "an interval map" } }
{ $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ;
HELP: data
{ $values { "filename" string } { "data" array } }
{ $description "This loads a file that's delineated by semicolons and lines, returning an array of lines, where each line is an array split by the semicolons, with whitespace trimmed off." } ;
HELP: flat-file>biassoc
{ $values { "filename" string } { "biassoc" biassoc } }
{ $description "This loads a flat file, in the form that many encoding resource files are in, with two columns of numeric data in hex, and returns a biassoc associating them." } ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: sequences splitting kernel math.parser io.files io.encodings.utf8
biassocs ascii ;
biassocs ascii namespaces arrays make assocs interval-maps sets ;
IN: simple-flat-file
: drop-comments ( seq -- newseq )
@ -30,3 +30,25 @@ IN: simple-flat-file
: data ( filename -- data )
utf8 file-lines drop-comments [ split-; ] map ;
SYMBOL: interned
: range, ( value key -- )
swap interned get
[ = ] with find nip 2array , ;
: expand-ranges ( assoc -- interval-map )
[
[
swap CHAR: . over member? [
".." split1 [ hex> ] bi@ 2array
] [ hex> ] if range,
] assoc-each
] { } make <interval-map> ;
: process-interval-file ( ranges -- table )
dup values prune interned
[ expand-ranges ] with-variable ;
: load-interval-file ( filename -- table )
data process-interval-file ;

View File

@ -70,7 +70,7 @@ M: A >pprint-sequence ;
M: A pprint* pprint-object ;
: A{ \ } [ >A ] parse-literal ; parsing
SYNTAX: A{ \ } [ >A ] parse-literal ;
INSTANCE: A sequence

View File

@ -39,7 +39,7 @@ M: V >pprint-sequence ;
M: V pprint* pprint-object ;
: V{ \ } [ >V ] parse-literal ; parsing
SYNTAX: V{ \ } [ >V ] parse-literal ;
INSTANCE: V growable

View File

@ -66,3 +66,8 @@ DEFER: curry-folding-test ( quot -- )
{ 3 0 } [ [ 1 2 3 ] curry-folding-test ] must-infer-as
{ 3 0 } [ 1 [ 2 3 ] curry curry-folding-test ] must-infer-as
{ 3 0 } [ [ 1 2 ] 3 [ ] curry compose curry-folding-test ] must-infer-as
: member?-test ( a -- ? ) { 1 2 3 10 7 58 } member? ;
[ f ] [ 1.0 member?-test ] unit-test
[ t ] [ \ member?-test def>> first [ member?-test ] all? ] unit-test

View File

@ -1,12 +1,12 @@
! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: fry accessors arrays kernel kernel.private combinators.private
words sequences generic math namespaces make quotations assocs
combinators classes.tuple classes.tuple.private effects summary
hashtables classes generic sets definitions generic.standard
slots.private continuations locals generalizations
stack-checker.backend stack-checker.state stack-checker.visitor
stack-checker.errors stack-checker.values
words sequences generic math math.order namespaces make quotations assocs
combinators combinators.short-circuit classes.tuple
classes.tuple.private effects summary hashtables classes generic sets
definitions generic.standard slots.private continuations locals
generalizations stack-checker.backend stack-checker.state
stack-checker.visitor stack-checker.errors stack-checker.values
stack-checker.recursive-state ;
IN: stack-checker.transforms
@ -107,36 +107,28 @@ IN: stack-checker.transforms
] 1 define-transform
! Membership testing
CONSTANT: bit-member-n 256
CONSTANT: bit-member-max 256
: bit-member? ( seq -- ? )
#! Can we use a fast byte array test here?
{
{ [ dup length 8 < ] [ f ] }
{ [ dup [ integer? not ] any? ] [ f ] }
{ [ dup [ 0 < ] any? ] [ f ] }
{ [ dup [ bit-member-n >= ] any? ] [ f ] }
[ t ]
} cond nip ;
[ length 4 > ]
[ [ integer? ] all? ]
[ [ 0 bit-member-max between? ] any? ]
} 1&& ;
: bit-member-seq ( seq -- flags )
bit-member-n swap [ member? 1 0 ? ] curry B{ } map-as ;
: exact-float? ( f -- ? )
dup float? [ dup >integer >float = ] [ drop f ] if ; inline
[ supremum 1+ ] keep '[ _ member? 1 0 ? ] B{ } map-as ;
: bit-member-quot ( seq -- newquot )
[
bit-member-seq ,
[
{
{ [ over fixnum? ] [ ?nth 1 eq? ] }
{ [ over bignum? ] [ ?nth 1 eq? ] }
{ [ over exact-float? ] [ ?nth 1 eq? ] }
[ 2drop f ]
} cond
] %
] [ ] make ;
bit-member-seq
'[
_ {
{ [ over fixnum? ] [ ?nth 1 eq? ] }
{ [ over bignum? ] [ ?nth 1 eq? ] }
[ 2drop f ]
} cond
] ;
: member-quot ( seq -- newquot )
dup bit-member? [

View File

@ -32,7 +32,7 @@ PRIVATE>
: >suffix-array ( seq -- array )
[ suffixes ] map concat natural-sort ;
: SA{ \ } [ >suffix-array ] parse-literal ; parsing
SYNTAX: SA{ \ } [ >suffix-array ] parse-literal ;
: query ( begin suffix-array -- matches )
2dup find-index dup

View File

@ -40,10 +40,9 @@ M: bad-tr summary
PRIVATE>
: TR:
SYNTAX: TR:
scan parse-definition
unclip-last [ unclip-last ] dip compute-tr
[ check-tr ]
[ [ create-tr ] dip define-tr ]
[ [ "-fast" append create-tr ] dip define-fast-tr ] 2tri ;
parsing

View File

@ -4,8 +4,9 @@ USING: combinators.short-circuit unicode.categories kernel math
combinators splitting sequences math.parser io.files io assocs
arrays namespaces make math.ranges unicode.normalize
unicode.normalize.private values io.encodings.ascii
unicode.syntax unicode.data compiler.units fry
alien.syntax sets accessors interval-maps memoize locals words ;
unicode.data compiler.units fry unicode.categories.syntax
alien.syntax sets accessors interval-maps memoize locals words
simple-flat-file ;
IN: unicode.breaks
<PRIVATE
@ -31,9 +32,9 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
[ drop Control ]
} case ;
CATEGORY: (extend) Me Mn ;
: extend? ( ch -- ? )
{ [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
CATEGORY: extend
Me Mn |
"Other_Grapheme_Extend" property? ;
: loe? ( ch -- ? )
"Logical_Order_Exception" property? ;
@ -127,7 +128,7 @@ to: grapheme-table
VALUE: word-break-table
"vocab:unicode/data/WordBreakProperty.txt" load-key-value
"vocab:unicode/data/WordBreakProperty.txt" load-interval-file
to: word-break-table
C-ENUM: wOther wCR wLF wNewline wExtend wFormat wKatakana wALetter wMidLetter

View File

@ -1,8 +1,8 @@
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.data sequences namespaces
sbufs make unicode.syntax unicode.normalize math hints
unicode.categories combinators unicode.syntax assocs combinators.short-circuit
sbufs make unicode.normalize math hints
unicode.categories combinators assocs combinators.short-circuit
strings splitting kernel accessors unicode.breaks fry locals ;
QUALIFIED: ascii
IN: unicode.case

View File

@ -12,6 +12,9 @@ HELP: Letter
HELP: alpha
{ $class-description "The class of alphanumeric characters." } ;
HELP: math
{ $class-description "The class of Unicode math characters." } ;
HELP: blank
{ $class-description "The class of whitespace characters." } ;
@ -54,6 +57,8 @@ ARTICLE: "unicode.categories" "Character classes"
{ $subsection uncased }
{ $subsection uncased? }
{ $subsection character }
{ $subsection character? } ;
{ $subsection character? }
{ $subsection math }
{ $subsection math? } ;
ABOUT: "unicode.categories"

View File

@ -1,15 +1,16 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.syntax ;
USING: unicode.categories.syntax sequences unicode.data ;
IN: unicode.categories
CATEGORY: blank Zs Zl Zp \r\n ;
CATEGORY: letter Ll ;
CATEGORY: LETTER Lu ;
CATEGORY: Letter Lu Ll Lt Lm Lo ;
CATEGORY: blank Zs Zl Zp | "\r\n" member? ;
CATEGORY: letter Ll | "Other_Lowercase" property? ;
CATEGORY: LETTER Lu | "Other_Uppercase" property? ;
CATEGORY: Letter Lu Ll Lt Lm Lo Nl ;
CATEGORY: digit Nd Nl No ;
CATEGORY-NOT: printable Cc Cf Cs Co Cn ;
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No ;
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ;
CATEGORY: control Cc ;
CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ;
CATEGORY-NOT: character Cn ;
CATEGORY: math Sm | "Other_Math" property? ;

View File

@ -0,0 +1,19 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup ;
IN: unicode.categories.syntax
ABOUT: "unicode.categories.syntax"
ARTICLE: "unicode.categories.syntax" "Unicode category syntax"
"There is special syntax sugar for making predicate classes which are unions of Unicode general categories, plus some other code."
{ $subsection POSTPONE: CATEGORY: }
{ $subsection POSTPONE: CATEGORY-NOT: } ;
HELP: CATEGORY:
{ $syntax "CATEGORY: foo Nl Pd Lu | \"Diacritic\" property? ;" }
{ $description "This defines a predicate class which is a subset of code points. In this example, " { $snippet "foo" } " is the class of characters which are in the general category Nl or Pd or Lu, or which have the Diacritic property." } ;
HELP: CATEGORY-NOT:
{ $syntax "CATEGORY-NOT: foo Nl Pd Lu | \"Diacritic\" property? ;" }
{ $description "This defines a predicate class which is a subset of code points, the complement of what " { $link POSTPONE: CATEGORY: } " would define. In this example, " { $snippet "foo" } " is the class of characters which are neither in the general category Nl or Pd or Lu, nor have the Diacritic property." } ;

View File

@ -0,0 +1,3 @@
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.

View File

@ -0,0 +1,34 @@
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.data kernel math sequences parser unicode.data.private
bit-arrays namespaces sequences.private arrays classes.parser
assocs classes.predicate sets fry splitting accessors ;
IN: unicode.categories.syntax
! For use in CATEGORY:
SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co | ;
<PRIVATE
: [category] ( categories code -- quot )
'[ dup category# _ member? [ drop t ] _ if ] ;
: integer-predicate-class ( word predicate -- )
integer swap define-predicate-class ;
: define-category ( word categories code -- )
[category] integer-predicate-class ;
: define-not-category ( word categories code -- )
[category] [ not ] compose integer-predicate-class ;
: parse-category ( -- word tokens quot )
CREATE-CLASS \ ; parse-until { | } split1
[ [ name>> categories-map at ] map ]
[ [ [ ] like ] [ [ drop f ] ] if* ] bi* ;
PRIVATE>
SYNTAX: CATEGORY: parse-category define-category ;
SYNTAX: CATEGORY-NOT: parse-category define-not-category ;

View File

@ -4,7 +4,7 @@ USING: combinators.short-circuit sequences io.files
io.encodings.ascii kernel values splitting accessors math.parser
ascii io assocs strings math namespaces make sorting combinators
math.order arrays unicode.normalize unicode.data locals
unicode.syntax macros sequences.deep words unicode.breaks
macros sequences.deep words unicode.breaks
quotations combinators.short-circuit simple-flat-file ;
IN: unicode.collation

View File

@ -6,7 +6,7 @@ IN: unicode.data
ABOUT: "unicode.data"
ARTICLE: "unicode.data" "Unicode data tables"
"The " { $vocab-link "unicode.data" "unicode.data" } " vocabulary contains core Unicode data tables and code for parsing this from files."
"The " { $vocab-link "unicode.data" "unicode.data" } " vocabulary contains core Unicode data tables and code for parsing this from files. The following words access these data tables."
{ $subsection canonical-entry }
{ $subsection combine-chars }
{ $subsection combining-class }
@ -14,7 +14,11 @@ ARTICLE: "unicode.data" "Unicode data tables"
{ $subsection name>char }
{ $subsection char>name }
{ $subsection property? }
{ $subsection load-key-value } ;
{ $subsection category }
{ $subsection ch>upper }
{ $subsection ch>lower }
{ $subsection ch>title }
{ $subsection special-case } ;
HELP: canonical-entry
{ $values { "char" "a code point" } { "seq" string } }
@ -48,6 +52,22 @@ HELP: property?
{ $values { "char" "a code point" } { "property" string } { "?" "a boolean" } }
{ $description "Tests whether the code point is listed under the given property in PropList.txt in the Unicode Character Database." } ;
HELP: load-key-value
{ $values { "filename" string } { "table" "an interval map" } }
{ $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ;
HELP: category
{ $values { "char" "a code point" } { "category" string } }
{ $description "Returns the general category of a code point, in the form of a string. This will always be a string within the ASCII range of length two. If the code point is unassigned, then it returns " { $snippet "Cn" } "." } ;
HELP: ch>upper
{ $values { "ch" "a code point" } { "upper" "a code point" } }
{ $description "Returns the simple upper-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
HELP: ch>lower
{ $values { "ch" "a code point" } { "lower" "a code point" } }
{ $description "Returns the simple lower-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
HELP: ch>title
{ $values { "ch" "a code point" } { "title" "a code point" } }
{ $description "Returns the simple title-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
HELP: special-case
{ $values { "ch" "a code point" } { "casing-tuple" { "a tuple, or " { $link f } } } }
{ $description "If a code point has special casing behavior, returns a tuple which represents that information." } ;

View File

@ -58,7 +58,7 @@ CONSTANT: num-chars HEX: 2FA1E
PRIVATE>
: category# ( char -- category )
: category# ( char -- n )
! There are a few characters that should be Cn
! that this gives Cf or Mn
! Cf = 26; Mn = 5; Cn = 29
@ -219,27 +219,3 @@ load-properties to: properties
[ name>char [ "Invalid character" throw ] unless* ]
name>char-hook set-global
SYMBOL: interned
: range, ( value key -- )
swap interned get
[ = ] with find nip 2array , ;
: expand-ranges ( assoc -- interval-map )
[
[
swap CHAR: . over member? [
".." split1 [ hex> ] bi@ 2array
] [ hex> ] if range,
] assoc-each
] { } make <interval-map> ;
: process-key-value ( ranges -- table )
dup values prune interned
[ expand-ranges ] with-variable ;
PRIVATE>
: load-key-value ( filename -- table )
data process-key-value ;

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: ascii sequences namespaces make unicode.data kernel math arrays
locals sorting.insertion accessors assocs math.order combinators
unicode.syntax strings sbufs hints combinators.short-circuit vectors ;
strings sbufs hints combinators.short-circuit vectors ;
IN: unicode.normalize
<PRIVATE

View File

@ -1,17 +1,13 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors values kernel sequences assocs io.files
io.encodings ascii math.ranges io splitting math.parser
namespaces make byte-arrays locals math sets io.encodings.ascii
words words.symbol compiler.units arrays interval-maps
unicode.data ;
USING: values interval-maps simple-flat-file ;
IN: unicode.script
<PRIVATE
VALUE: script-table
"vocab:unicode/script/Scripts.txt" load-key-value
"vocab:unicode/script/Scripts.txt" load-interval-file
to: script-table
PRIVATE>

View File

@ -1,38 +0,0 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.data kernel math sequences parser lexer
bit-arrays namespaces make sequences.private arrays quotations
assocs classes.predicate math.order strings.parser ;
IN: unicode.syntax
<PRIVATE
: >category-array ( categories -- bitarray )
categories [ swap member? ] with map >bit-array ;
: as-string ( strings -- bit-array )
concat unescape-string ;
: [category] ( categories -- quot )
[
[ [ categories member? not ] filter as-string ] keep
[ categories member? ] filter >category-array
[ dup category# ] % , [ nth-unsafe [ drop t ] ] %
\ member? 2array >quotation ,
\ if ,
] [ ] make ;
: define-category ( word categories -- )
[category] integer swap define-predicate-class ;
PRIVATE>
: CATEGORY:
CREATE ";" parse-tokens define-category ; parsing
: seq-minus ( seq1 seq2 -- diff )
[ member? not ] curry filter ;
: CATEGORY-NOT:
CREATE ";" parse-tokens
categories swap seq-minus define-category ; parsing

View File

@ -15,7 +15,7 @@ $nl
{ $vocab-subsection "Word and grapheme breaks" "unicode.breaks" }
{ $vocab-subsection "Unicode normalization" "unicode.normalize" }
"The following are mostly for internal use:"
{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
{ $vocab-subsection "Unicode category syntax" "unicode.categories.syntax" }
{ $vocab-subsection "Unicode data tables" "unicode.data" }
{ $see-also "ascii" "io.encodings" } ;

View File

@ -179,7 +179,7 @@ PRIVATE>
dup protocol>> '[ _ protocol-port or ] change-port ;
! Literal syntax
: URL" lexer get skip-blank parse-string >url parsed ; parsing
SYNTAX: URL" lexer get skip-blank parse-string >url parsed ;
USING: vocabs vocabs.loader ;

View File

@ -30,11 +30,11 @@ PREDICATE: value-word < word
[ second \ obj>> = ]
} 1&& ;
: VALUE:
SYNTAX: VALUE:
CREATE-WORD
dup t "no-def-strip" set-word-prop
T{ value-holder } clone [ obj>> ] curry
(( -- value )) define-declared ; parsing
(( -- value )) define-declared ;
M: value-word definer drop \ VALUE: f ;
@ -43,9 +43,9 @@ M: value-word definition drop f ;
: set-value ( value word -- )
def>> first (>>obj) ;
: to:
SYNTAX: to:
scan-word literalize parsed
\ set-value parsed ; parsing
\ set-value parsed ;
: get-value ( word -- value )
def>> first obj>> ;

View File

@ -50,7 +50,7 @@ M: vlist like
INSTANCE: vlist immutable-sequence
: VL{ \ } [ >vlist ] parse-literal ; parsing
SYNTAX: VL{ \ } [ >vlist ] parse-literal ;
M: vlist pprint-delims drop \ VL{ \ } ;
M: vlist >pprint-sequence ;
@ -87,7 +87,7 @@ M: valist assoc-like
INSTANCE: valist assoc
: VA{ \ } [ >valist ] parse-literal ; parsing
SYNTAX: VA{ \ } [ >valist ] parse-literal ;
M: valist pprint-delims drop \ VA{ \ } ;
M: valist >pprint-sequence >alist ;

View File

@ -90,14 +90,13 @@ unless
PRIVATE>
: COM-INTERFACE:
SYNTAX: COM-INTERFACE:
scan
scan find-com-interface-definition
scan string>guid
parse-com-functions
<com-interface-definition>
dup save-com-interface-definition
define-words-for-com-interface
; parsing
define-words-for-com-interface ;
: GUID: scan string>guid parsed ; parsing
SYNTAX: GUID: scan string>guid parsed ;

View File

@ -1,19 +1,26 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences unicode.syntax math math.order combinators
hints ;
USING: kernel sequences unicode.categories.syntax math math.order
combinators hints combinators.short-circuit ;
IN: xml.char-classes
CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
: 1.0name-start? ( char -- ? )
dup 1.0name-start*? [ drop t ]
[ HEX: 2BB HEX: 2C1 between? ] if ;
CATEGORY: 1.0name-start
Ll Lu Lo Lt Nl | {
[ HEX: 2BB HEX: 2C1 between? ]
[ "\u000559\u0006E5\u0006E6_:" member? ]
} 1|| ;
CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
CATEGORY: 1.0name-char
Ll Lu Lo Lt Nl Mc Me Mn Lm Nd |
"_-.\u000387:" member? ;
CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
CATEGORY: 1.1name-start
Ll Lu Lo Lm Nl |
"_:" member? ;
CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
CATEGORY: 1.1name-char
Ll Lu Lo Lm Nl Mc Mn Nd Pc Cf |
"_-.\u0000b7:" member? ;
: name-start? ( 1.0? char -- ? )
swap [ 1.0name-start? ] [ 1.1name-start? ] if ;

View File

@ -26,17 +26,17 @@ M: no-tag summary
PRIVATE>
: TAGS:
SYNTAX: TAGS:
CREATE
[ H{ } clone "xtable" set-word-prop ]
[ define-tags ] bi ; parsing
[ define-tags ] bi ;
: TAG:
scan scan-word parse-definition define-tag ; parsing
SYNTAX: TAG:
scan scan-word parse-definition define-tag ;
: XML-NS:
SYNTAX: XML-NS:
CREATE-WORD (( string -- name )) over set-stack-effect
scan '[ f swap _ <name> ] define-memoized ; parsing
scan '[ f swap _ <name> ] define-memoized ;
<PRIVATE
@ -168,11 +168,11 @@ MACRO: interpolate-xml ( xml -- quot )
PRIVATE>
: <XML
"XML>" [ string>doc ] parse-def ; parsing
SYNTAX: <XML
"XML>" [ string>doc ] parse-def ;
: [XML
"XML]" [ string>chunk ] parse-def ; parsing
SYNTAX: [XML
"XML]" [ string>chunk ] parse-def ;
<PRIVATE

View File

@ -10,11 +10,11 @@ IN: xmode.loader.syntax
: (parse-rule-tag) ( rule-set tag specs class -- )
new swap init-from-tag swap add-rule ; inline
: RULE:
SYNTAX: RULE:
scan scan-word scan-word [
[ parse-definition call( -- ) ] { } make
swap [ (parse-rule-tag) ] 2curry
] dip swap define-tag ; parsing
] dip swap define-tag ;
! Attribute utilities
: string>boolean ( string -- ? ) "TRUE" = ;

View File

@ -57,6 +57,7 @@ IN: bootstrap.syntax
"EXCLUDE:"
"RENAME:"
"ALIAS:"
"SYNTAX:"
"V{"
"W{"
"["
@ -68,7 +69,6 @@ IN: bootstrap.syntax
"foldable"
"inline"
"recursive"
"parsing"
"t"
"{"
"}"

View File

@ -54,8 +54,10 @@ $nl
ARTICLE: "parsing-words" "Parsing words"
"The Factor parser follows a simple recursive-descent design. The parser reads successive tokens from the input; if the token identifies a number or an ordinary word, it is added to an accumulator vector. Otherwise if the token identifies a parsing word, the parsing word is executed immediately."
$nl
"Parsing words are marked by suffixing the definition with a " { $link POSTPONE: parsing } " declaration. Here is the simplest possible parsing word; it prints a greeting at parse time:"
{ $code ": hello \"Hello world\" print ; parsing" }
"Parsing words are defined using the a defining word:"
{ $subsection POSTPONE: SYNTAX: }
"Parsing words have uppercase names by convention. Here is the simplest possible parsing word; it prints a greeting at parse time:"
{ $code "SYNTAX: HELLO \"Hello world\" print ;" }
"Parsing words must not pop or push items from the stack; however, they are permitted to access the accumulator vector supplied by the parser at the top of the stack. That is, parsing words must have stack effect " { $snippet "( accum -- accum )" } ", where " { $snippet "accum" } " is the accumulator vector supplied by the parser."
$nl
"Parsing words can read input, add word definitions to the dictionary, and do anything an ordinary word can."

View File

@ -106,7 +106,7 @@ IN: parser.tests
] unit-test
DEFER: foo
"IN: parser.tests USING: math prettyprint ; : foo 2 2 + . ; parsing" eval
"IN: parser.tests USING: math prettyprint ; SYNTAX: foo 2 2 + . ;" eval
[ ] [ "USE: parser.tests foo" eval ] unit-test
@ -487,7 +487,7 @@ IN: parser.tests
[ "DEFER: blahy" eval ] [ error>> error>> no-current-vocab? ] must-fail-with
[
"IN: parser.tests : blahy ; parsing FORGET: blahy" eval
"IN: parser.tests SYNTAX: blahy ; FORGET: blahy" eval
] [
error>> staging-violation?
] must-fail-with

View File

@ -177,10 +177,10 @@ HELP: delimiter
{ $syntax ": foo ... ; delimiter" }
{ $description "Declares the most recently defined word as a delimiter. Delimiters are words which are only ever valid as the end of a nested block to be read by " { $link parse-until } ". An unpaired occurrence of a delimiter is a parse error." } ;
HELP: parsing
{ $syntax ": foo ... ; parsing" }
{ $description "Declares the most recently defined word as a parsing word." }
{ $examples "In the below example, the " { $snippet "world" } " word is never called, however its body references a parsing word which executes immediately:" { $example "USE: io" "IN: scratchpad" "<< : hello \"Hello parser!\" print ; parsing >>\n: world hello ;" "Hello parser!" } } ;
HELP: SYNTAX:
{ $syntax "SYNTAX: foo ... ;" }
{ $description "Defines a parsing word." }
{ $examples "In the below example, the " { $snippet "world" } " word is never called, however its body references a parsing word which executes immediately:" { $example "USE: io" "IN: scratchpad" "<< SYNTAX: HELLO \"Hello parser!\" print ; >>\n: world HELLO ;" "Hello parser!" } } ;
HELP: inline
{ $syntax ": foo ... ; inline" }

View File

@ -22,58 +22,58 @@ IN: bootstrap.syntax
: define-delimiter ( name -- )
"syntax" lookup t "delimiter" set-word-prop ;
: define-syntax ( name quot -- )
[ dup "syntax" lookup [ dup ] [ no-word-error ] ?if ] dip
define make-parsing ;
: define-core-syntax ( name quot -- )
[ dup "syntax" lookup [ ] [ no-word-error ] ?if ] dip
define-syntax ;
[
{ "]" "}" ";" ">>" } [ define-delimiter ] each
"PRIMITIVE:" [
"Primitive definition is not supported" throw
] define-syntax
] define-core-syntax
"CS{" [
"Call stack literals are not supported" throw
] define-syntax
] define-core-syntax
"!" [ lexer get next-line ] define-syntax
"!" [ lexer get next-line ] define-core-syntax
"#!" [ POSTPONE: ! ] define-syntax
"#!" [ POSTPONE: ! ] define-core-syntax
"IN:" [ scan set-in ] define-syntax
"IN:" [ scan set-in ] define-core-syntax
"PRIVATE>" [ in get ".private" ?tail drop set-in ] define-syntax
"PRIVATE>" [ in get ".private" ?tail drop set-in ] define-core-syntax
"<PRIVATE" [
POSTPONE: PRIVATE> in get ".private" append set-in
] define-syntax
] define-core-syntax
"USE:" [ scan use+ ] define-syntax
"USE:" [ scan use+ ] define-core-syntax
"USING:" [ ";" parse-tokens add-use ] define-syntax
"USING:" [ ";" parse-tokens add-use ] define-core-syntax
"QUALIFIED:" [ scan dup add-qualified ] define-syntax
"QUALIFIED:" [ scan dup add-qualified ] define-core-syntax
"QUALIFIED-WITH:" [ scan scan add-qualified ] define-syntax
"QUALIFIED-WITH:" [ scan scan add-qualified ] define-core-syntax
"FROM:" [
scan "=>" expect ";" parse-tokens swap add-words-from
] define-syntax
] define-core-syntax
"EXCLUDE:" [
scan "=>" expect ";" parse-tokens swap add-words-excluding
] define-syntax
] define-core-syntax
"RENAME:" [
scan scan "=>" expect scan add-renamed-word
] define-syntax
] define-core-syntax
"HEX:" [ 16 parse-base ] define-syntax
"OCT:" [ 8 parse-base ] define-syntax
"BIN:" [ 2 parse-base ] define-syntax
"HEX:" [ 16 parse-base ] define-core-syntax
"OCT:" [ 8 parse-base ] define-core-syntax
"BIN:" [ 2 parse-base ] define-core-syntax
"f" [ f parsed ] define-syntax
"f" [ f parsed ] define-core-syntax
"t" "syntax" lookup define-singleton-class
"CHAR:" [
@ -82,157 +82,160 @@ IN: bootstrap.syntax
{ [ "\\" ?head ] [ next-escape >string "" assert= ] }
[ name>char-hook get call( name -- char ) ]
} cond parsed
] define-syntax
] define-core-syntax
"\"" [ parse-string parsed ] define-syntax
"\"" [ parse-string parsed ] define-core-syntax
"SBUF\"" [
lexer get skip-blank parse-string >sbuf parsed
] define-syntax
] define-core-syntax
"P\"" [
lexer get skip-blank parse-string <pathname> parsed
] define-syntax
] define-core-syntax
"[" [ parse-quotation parsed ] define-syntax
"{" [ \ } [ >array ] parse-literal ] define-syntax
"V{" [ \ } [ >vector ] parse-literal ] define-syntax
"B{" [ \ } [ >byte-array ] parse-literal ] define-syntax
"H{" [ \ } [ >hashtable ] parse-literal ] define-syntax
"T{" [ parse-tuple-literal parsed ] define-syntax
"W{" [ \ } [ first <wrapper> ] parse-literal ] define-syntax
"[" [ parse-quotation parsed ] define-core-syntax
"{" [ \ } [ >array ] parse-literal ] define-core-syntax
"V{" [ \ } [ >vector ] parse-literal ] define-core-syntax
"B{" [ \ } [ >byte-array ] parse-literal ] define-core-syntax
"H{" [ \ } [ >hashtable ] parse-literal ] define-core-syntax
"T{" [ parse-tuple-literal parsed ] define-core-syntax
"W{" [ \ } [ first <wrapper> ] parse-literal ] define-core-syntax
"POSTPONE:" [ scan-word parsed ] define-syntax
"\\" [ scan-word <wrapper> parsed ] define-syntax
"inline" [ word make-inline ] define-syntax
"recursive" [ word make-recursive ] define-syntax
"foldable" [ word make-foldable ] define-syntax
"flushable" [ word make-flushable ] define-syntax
"delimiter" [ word t "delimiter" set-word-prop ] define-syntax
"parsing" [ word make-parsing ] define-syntax
"POSTPONE:" [ scan-word parsed ] define-core-syntax
"\\" [ scan-word <wrapper> parsed ] define-core-syntax
"inline" [ word make-inline ] define-core-syntax
"recursive" [ word make-recursive ] define-core-syntax
"foldable" [ word make-foldable ] define-core-syntax
"flushable" [ word make-flushable ] define-core-syntax
"delimiter" [ word t "delimiter" set-word-prop ] define-core-syntax
"SYNTAX:" [
(:) define-syntax
] define-core-syntax
"SYMBOL:" [
CREATE-WORD define-symbol
] define-syntax
] define-core-syntax
"SYMBOLS:" [
";" parse-tokens
[ create-in dup reset-generic define-symbol ] each
] define-syntax
] define-core-syntax
"SINGLETONS:" [
";" parse-tokens
[ create-class-in define-singleton-class ] each
] define-syntax
] define-core-syntax
"ALIAS:" [
CREATE-WORD scan-word define-alias
] define-syntax
] define-core-syntax
"CONSTANT:" [
CREATE scan-object define-constant
] define-syntax
] define-core-syntax
"DEFER:" [
scan current-vocab create
[ fake-definition ] [ set-word ] [ [ undefined ] define ] tri
] define-syntax
] define-core-syntax
":" [
(:) define
] define-syntax
] define-core-syntax
"GENERIC:" [
CREATE-GENERIC define-simple-generic
] define-syntax
] define-core-syntax
"GENERIC#" [
CREATE-GENERIC
scan-word <standard-combination> define-generic
] define-syntax
] define-core-syntax
"MATH:" [
CREATE-GENERIC
T{ math-combination } define-generic
] define-syntax
] define-core-syntax
"HOOK:" [
CREATE-GENERIC scan-word
<hook-combination> define-generic
] define-syntax
] define-core-syntax
"M:" [
(M:) define
] define-syntax
] define-core-syntax
"UNION:" [
CREATE-CLASS parse-definition define-union-class
] define-syntax
] define-core-syntax
"INTERSECTION:" [
CREATE-CLASS parse-definition define-intersection-class
] define-syntax
] define-core-syntax
"MIXIN:" [
CREATE-CLASS define-mixin-class
] define-syntax
] define-core-syntax
"INSTANCE:" [
location [
scan-word scan-word 2dup add-mixin-instance
<mixin-instance>
] dip remember-definition
] define-syntax
] define-core-syntax
"PREDICATE:" [
CREATE-CLASS
scan "<" assert=
scan-word
parse-definition define-predicate-class
] define-syntax
] define-core-syntax
"SINGLETON:" [
CREATE-CLASS define-singleton-class
] define-syntax
] define-core-syntax
"TUPLE:" [
parse-tuple-definition define-tuple-class
] define-syntax
] define-core-syntax
"SLOT:" [
scan define-protocol-slot
] define-syntax
] define-core-syntax
"C:" [
CREATE-WORD scan-word define-boa-word
] define-syntax
] define-core-syntax
"ERROR:" [
parse-tuple-definition
pick save-location
define-error-class
] define-syntax
] define-core-syntax
"FORGET:" [
scan-object forget
] define-syntax
] define-core-syntax
"(" [
")" parse-effect
word dup [ set-stack-effect ] [ 2drop ] if
] define-syntax
] define-core-syntax
"((" [
"))" parse-effect parsed
] define-syntax
] define-core-syntax
"MAIN:" [ scan-word in get vocab (>>main) ] define-syntax
"MAIN:" [ scan-word in get vocab (>>main) ] define-core-syntax
"<<" [
[
\ >> parse-until >quotation
] with-nested-compilation-unit call( -- )
] define-syntax
] define-core-syntax
"call-next-method" [
current-method get [
@ -241,13 +244,13 @@ IN: bootstrap.syntax
] [
not-in-a-method-error
] if*
] define-syntax
] define-core-syntax
"initial:" "syntax" lookup define-symbol
"read-only" "syntax" lookup define-symbol
"call(" [ \ call-effect parse-call( ] define-syntax
"call(" [ \ call-effect parse-call( ] define-core-syntax
"execute(" [ \ execute-effect parse-call( ] define-syntax
"execute(" [ \ execute-effect parse-call( ] define-core-syntax
] with-compilation-unit

View File

@ -57,16 +57,12 @@ $nl
} ;
ARTICLE: "declarations" "Declarations"
"Declarations give special behavior to a word. Declarations are parsing words that set a word property in the most recently defined word."
$nl
"The first declaration specifies the time when a word runs. It affects both the non-optimizing and optimizing compilers:"
{ $subsection POSTPONE: parsing }
"The remaining declarations only affect definitions compiled with the optimizing compiler. They do not change evaluation semantics of a word, but instead declare that the word follows a certain contract, and thus may be compiled differently."
{ $warning "If a generic word is declared " { $link POSTPONE: foldable } " or " { $link POSTPONE: flushable } ", all methods must satisfy the contract, otherwise unpredicable behavior will occur." }
"Declarations are parsing words that set a word property in the most recently defined word. Declarations only affect definitions compiled with the optimizing compiler. They do not change evaluation semantics of a word, but instead declare that the word follows a certain contract, and thus may be compiled differently."
{ $subsection POSTPONE: inline }
{ $subsection POSTPONE: foldable }
{ $subsection POSTPONE: flushable }
{ $subsection POSTPONE: recursive }
{ $warning "If a generic word is declared " { $link POSTPONE: foldable } " or " { $link POSTPONE: flushable } ", all methods must satisfy the contract, otherwise unpredicable behavior will occur." }
"Stack effect declarations are documented in " { $link "effect-declaration" } "." ;
ARTICLE: "word-definition" "Defining words"
@ -279,7 +275,7 @@ HELP: bootstrap-word
HELP: parsing-word? ( obj -- ? )
{ $values { "obj" object } { "?" "a boolean" } }
{ $description "Tests if an object is a parsing word declared by " { $link POSTPONE: parsing } "." }
{ $description "Tests if an object is a parsing word declared by " { $link POSTPONE: SYNTAX: } "." }
{ $notes "Outputs " { $link f } " if the object is not a word." } ;
HELP: define-declared

View File

@ -232,7 +232,10 @@ ERROR: bad-create name vocab ;
PREDICATE: parsing-word < word "parsing" word-prop ;
: make-parsing ( word -- ) t "parsing" set-word-prop ;
M: parsing-word definer drop \ SYNTAX: \ ; ;
: define-syntax ( word quot -- )
[ drop ] [ define ] 2bi t "parsing" set-word-prop ;
: delimiter? ( obj -- ? )
dup word? [ "delimiter" word-prop ] [ drop f ] if ;

View File

@ -56,8 +56,8 @@ PRIVATE>
: unadvise ( word -- )
[ reset ] [ { before after around advised } [ f swap set-word-prop ] with each ] bi ;
: ADVISE: ! word adname location => word adname quot loc
scan-word scan scan-word parse-definition swap [ spin ] dip advise ; parsing
SYNTAX: ADVISE: ! word adname location => word adname quot loc
scan-word scan scan-word parse-definition swap [ spin ] dip advise ;
: UNADVISE:
scan-word parsed \ unadvise parsed ; parsing
SYNTAX: UNADVISE:
scan-word parsed \ unadvise parsed ;

View File

@ -24,7 +24,7 @@ NAMEs. DEFINES ${NAME}s.
WHERE
: (NAME) ( str -- ) drop ; inline
: !NAME (parse-annotation) \ (NAME) parsed ; parsing
SYNTAX: !NAME (parse-annotation) \ (NAME) parsed ;
: NAMEs ( -- usages )
\ (NAME) (non-annotation-usage) ;

Some files were not shown because too many files have changed in this diff Show More