Merge branch 'master' of git://factorcode.org/git/factor into clean-linux-x86-32
commit
2c19aa1f25
|
@ -26,4 +26,4 @@ M: F-destructor dispose* alien>> F ;
|
|||
|
||||
;FUNCTOR
|
||||
|
||||
: DESTRUCTOR: scan-word define-destructor ; parsing
|
||||
SYNTAX: DESTRUCTOR: scan-word define-destructor ;
|
|
@ -416,7 +416,7 @@ PRIVATE>
|
|||
: define-fortran-record ( name vocab fields -- )
|
||||
[ >lower ] [ ] [ fortran-record>c-struct ] tri* define-struct ;
|
||||
|
||||
: RECORD: scan in get parse-definition define-fortran-record ; parsing
|
||||
SYNTAX: RECORD: scan in get parse-definition define-fortran-record ;
|
||||
|
||||
: set-fortran-abi ( library -- )
|
||||
library-fortran-abis get-global at fortran-abi set ;
|
||||
|
@ -437,16 +437,16 @@ MACRO: fortran-invoke ( return library function parameters -- )
|
|||
return library function parameters return [ "void" ] unless* parse-arglist
|
||||
[ \ fortran-invoke 5 [ ] nsequence ] dip define-declared ;
|
||||
|
||||
: SUBROUTINE:
|
||||
SYNTAX: SUBROUTINE:
|
||||
f "c-library" get scan ";" parse-tokens
|
||||
[ "()" subseq? not ] filter define-fortran-function ; parsing
|
||||
[ "()" subseq? not ] filter define-fortran-function ;
|
||||
|
||||
: FUNCTION:
|
||||
SYNTAX: FUNCTION:
|
||||
scan "c-library" get scan ";" parse-tokens
|
||||
[ "()" subseq? not ] filter define-fortran-function ; parsing
|
||||
[ "()" subseq? not ] filter define-fortran-function ;
|
||||
|
||||
: LIBRARY:
|
||||
SYNTAX: LIBRARY:
|
||||
scan
|
||||
[ "c-library" set ]
|
||||
[ set-fortran-abi ] bi ; parsing
|
||||
[ set-fortran-abi ] bi ;
|
||||
|
||||
|
|
|
@ -7,35 +7,34 @@ effects assocs combinators lexer strings.parser alien.parser
|
|||
fry vocabs.parser words.constant ;
|
||||
IN: alien.syntax
|
||||
|
||||
: DLL" lexer get skip-blank parse-string dlopen parsed ; parsing
|
||||
SYNTAX: DLL" lexer get skip-blank parse-string dlopen parsed ;
|
||||
|
||||
: ALIEN: scan string>number <alien> parsed ; parsing
|
||||
SYNTAX: ALIEN: scan string>number <alien> parsed ;
|
||||
|
||||
: BAD-ALIEN <bad-alien> parsed ; parsing
|
||||
SYNTAX: BAD-ALIEN <bad-alien> parsed ;
|
||||
|
||||
: LIBRARY: scan "c-library" set ; parsing
|
||||
SYNTAX: LIBRARY: scan "c-library" set ;
|
||||
|
||||
: FUNCTION:
|
||||
SYNTAX: FUNCTION:
|
||||
scan "c-library" get scan ";" parse-tokens
|
||||
[ "()" subseq? not ] filter
|
||||
define-function ; parsing
|
||||
define-function ;
|
||||
|
||||
: TYPEDEF:
|
||||
scan scan typedef ; parsing
|
||||
SYNTAX: TYPEDEF:
|
||||
scan scan typedef ;
|
||||
|
||||
: C-STRUCT:
|
||||
scan in get parse-definition define-struct ; parsing
|
||||
SYNTAX: C-STRUCT:
|
||||
scan in get parse-definition define-struct ;
|
||||
|
||||
: C-UNION:
|
||||
scan parse-definition define-union ; parsing
|
||||
SYNTAX: C-UNION:
|
||||
scan parse-definition define-union ;
|
||||
|
||||
: C-ENUM:
|
||||
SYNTAX: C-ENUM:
|
||||
";" parse-tokens
|
||||
[ [ create-in ] dip define-constant ] each-index ;
|
||||
parsing
|
||||
|
||||
: address-of ( name library -- value )
|
||||
load-library dlsym [ "No such symbol" throw ] unless* ;
|
||||
|
||||
: &:
|
||||
scan "c-library" get '[ _ _ address-of ] over push-all ; parsing
|
||||
SYNTAX: &:
|
||||
scan "c-library" get '[ _ _ address-of ] over push-all ;
|
||||
|
|
|
@ -68,7 +68,7 @@ M: bit-array resize
|
|||
|
||||
M: bit-array byte-length length 7 + -3 shift ;
|
||||
|
||||
: ?{ \ } [ >bit-array ] parse-literal ; parsing
|
||||
SYNTAX: ?{ \ } [ >bit-array ] parse-literal ;
|
||||
|
||||
: integer>bit-array ( n -- bit-array )
|
||||
dup 0 = [
|
||||
|
|
|
@ -31,7 +31,7 @@ M: bit-array new-resizable drop <bit-vector> ;
|
|||
|
||||
INSTANCE: bit-vector growable
|
||||
|
||||
: ?V{ \ } [ >bit-vector ] parse-literal ; parsing
|
||||
SYNTAX: ?V{ \ } [ >bit-vector ] parse-literal ;
|
||||
|
||||
M: bit-vector >pprint-sequence ;
|
||||
M: bit-vector pprint-delims drop \ ?V{ \ } ;
|
||||
|
|
|
@ -42,7 +42,7 @@ M: byte-array like
|
|||
|
||||
M: byte-array new-resizable drop <byte-vector> ;
|
||||
|
||||
: BV{ \ } [ >byte-vector ] parse-literal ; parsing
|
||||
SYNTAX: BV{ \ } [ >byte-vector ] parse-literal ;
|
||||
|
||||
M: byte-vector pprint* pprint-object ;
|
||||
M: byte-vector pprint-delims drop \ BV{ \ } ;
|
||||
|
|
|
@ -14,18 +14,14 @@ SYMBOL: sent-messages
|
|||
: remember-send ( selector -- )
|
||||
sent-messages (remember-send) ;
|
||||
|
||||
: ->
|
||||
scan dup remember-send parsed \ send parsed ;
|
||||
parsing
|
||||
SYNTAX: -> scan dup remember-send parsed \ send parsed ;
|
||||
|
||||
SYMBOL: super-sent-messages
|
||||
|
||||
: remember-super-send ( selector -- )
|
||||
super-sent-messages (remember-send) ;
|
||||
|
||||
: SUPER->
|
||||
scan dup remember-super-send parsed \ super-send parsed ;
|
||||
parsing
|
||||
SYNTAX: SUPER-> scan dup remember-super-send parsed \ super-send parsed ;
|
||||
|
||||
SYMBOL: frameworks
|
||||
|
||||
|
@ -33,9 +29,9 @@ frameworks [ V{ } clone ] initialize
|
|||
|
||||
[ frameworks get [ load-framework ] each ] "cocoa.messages" add-init-hook
|
||||
|
||||
: FRAMEWORK: scan [ load-framework ] [ frameworks get push ] bi ; parsing
|
||||
SYNTAX: FRAMEWORK: scan [ load-framework ] [ frameworks get push ] bi ;
|
||||
|
||||
: IMPORT: scan [ ] import-objc-class ; parsing
|
||||
SYNTAX: IMPORT: scan [ ] import-objc-class ;
|
||||
|
||||
"Compiling Objective C bridge..." print
|
||||
|
||||
|
|
|
@ -76,6 +76,6 @@ SYMBOL: +superclass+
|
|||
import-objc-class
|
||||
] bind ;
|
||||
|
||||
: CLASS:
|
||||
SYNTAX: CLASS:
|
||||
parse-definition unclip
|
||||
>hashtable define-objc-class ; parsing
|
||||
>hashtable define-objc-class ;
|
||||
|
|
|
@ -30,4 +30,4 @@ ERROR: no-such-color name ;
|
|||
: named-color ( name -- color )
|
||||
dup rgb.txt at [ ] [ no-such-color ] ?if ;
|
||||
|
||||
: COLOR: scan named-color parsed ; parsing
|
||||
SYNTAX: COLOR: scan named-color parsed ;
|
|
@ -13,10 +13,10 @@ IN: compiler.cfg.instructions.syntax
|
|||
: insn-effect ( word -- effect )
|
||||
boa-effect in>> but-last f <effect> ;
|
||||
|
||||
: INSN:
|
||||
SYNTAX: INSN:
|
||||
parse-tuple-definition "regs" suffix
|
||||
[ dup tuple eq? [ drop insn-word ] when ] dip
|
||||
[ define-tuple-class ]
|
||||
[ 2drop save-location ]
|
||||
[ 2drop [ ] [ '[ f _ boa , ] ] [ insn-effect ] tri define-inline ]
|
||||
3tri ; parsing
|
||||
3tri ;
|
||||
|
|
|
@ -17,6 +17,6 @@ C: <ds-loc> ds-loc
|
|||
TUPLE: rs-loc < loc ;
|
||||
C: <rs-loc> rs-loc
|
||||
|
||||
: V scan-word scan-word vreg boa parsed ; parsing
|
||||
: D scan-word <ds-loc> parsed ; parsing
|
||||
: R scan-word <rs-loc> parsed ; parsing
|
||||
SYNTAX: V scan-word scan-word vreg boa parsed ;
|
||||
SYNTAX: D scan-word <ds-loc> parsed ;
|
||||
SYNTAX: R scan-word <rs-loc> parsed ;
|
||||
|
|
|
@ -16,8 +16,8 @@ MACRO: set-slots ( slots -- quot )
|
|||
[ [ in>> '[ _ _ construct ] ] dip compose ] [ drop ] 2bi
|
||||
define-declared ;
|
||||
|
||||
: CONSTRUCTOR:
|
||||
SYNTAX: CONSTRUCTOR:
|
||||
scan-word [ name>> "<" ">" surround create-in ] keep
|
||||
"(" expect ")" parse-effect
|
||||
parse-definition
|
||||
define-constructor ; parsing
|
||||
define-constructor ;
|
|
@ -3,7 +3,7 @@
|
|||
USING: words parser alien alien.c-types kernel fry accessors ;
|
||||
IN: core-text.utilities
|
||||
|
||||
: C-GLOBAL:
|
||||
SYNTAX: C-GLOBAL:
|
||||
CREATE-WORD
|
||||
dup name>> '[ _ f dlsym *void* ]
|
||||
(( -- value )) define-declared ; parsing
|
||||
(( -- value )) define-declared ;
|
||||
|
|
|
@ -21,7 +21,7 @@ IN: cpu.ppc.assembler.backend
|
|||
: define-d-insn ( word opcode -- )
|
||||
[ d-insn ] curry (( d a simm -- )) define-declared ;
|
||||
|
||||
: D: CREATE scan-word define-d-insn ; parsing
|
||||
SYNTAX: D: CREATE scan-word define-d-insn ;
|
||||
|
||||
: sd-insn ( d a simm opcode -- )
|
||||
[ s>u16 { 0 21 16 } bitfield ] dip insn ;
|
||||
|
@ -29,7 +29,7 @@ IN: cpu.ppc.assembler.backend
|
|||
: define-sd-insn ( word opcode -- )
|
||||
[ sd-insn ] curry (( d a simm -- )) define-declared ;
|
||||
|
||||
: SD: CREATE scan-word define-sd-insn ; parsing
|
||||
SYNTAX: SD: CREATE scan-word define-sd-insn ;
|
||||
|
||||
: i-insn ( li aa lk opcode -- )
|
||||
[ { 0 1 0 } bitfield ] dip insn ;
|
||||
|
@ -40,26 +40,26 @@ IN: cpu.ppc.assembler.backend
|
|||
: (X) ( -- word quot )
|
||||
CREATE scan-word scan-word scan-word [ x-insn ] 3curry ;
|
||||
|
||||
: X: (X) (( a s b -- )) define-declared ; parsing
|
||||
SYNTAX: X: (X) (( a s b -- )) define-declared ;
|
||||
|
||||
: (1) ( quot -- quot' ) [ 0 ] prepose ;
|
||||
|
||||
: X1: (X) (1) (( a s -- )) define-declared ; parsing
|
||||
SYNTAX: X1: (X) (1) (( a s -- )) define-declared ;
|
||||
|
||||
: xfx-insn ( d spr xo opcode -- )
|
||||
[ { 1 11 21 } bitfield ] dip insn ;
|
||||
|
||||
: CREATE-MF ( -- word ) scan "MF" prepend create-in ;
|
||||
|
||||
: MFSPR:
|
||||
SYNTAX: MFSPR:
|
||||
CREATE-MF scan-word 5 shift [ 339 31 xfx-insn ] curry
|
||||
(( d -- )) define-declared ; parsing
|
||||
(( d -- )) define-declared ;
|
||||
|
||||
: CREATE-MT ( -- word ) scan "MT" prepend create-in ;
|
||||
|
||||
: MTSPR:
|
||||
SYNTAX: MTSPR:
|
||||
CREATE-MT scan-word 5 shift [ 467 31 xfx-insn ] curry
|
||||
(( d -- )) define-declared ; parsing
|
||||
(( d -- )) define-declared ;
|
||||
|
||||
: xo-insn ( d a b oe rc xo opcode -- )
|
||||
[ { 1 0 10 11 16 21 } bitfield ] dip insn ;
|
||||
|
@ -68,9 +68,9 @@ IN: cpu.ppc.assembler.backend
|
|||
CREATE scan-word scan-word scan-word scan-word
|
||||
[ xo-insn ] 2curry 2curry ;
|
||||
|
||||
: XO: (XO) (( a s b -- )) define-declared ; parsing
|
||||
SYNTAX: XO: (XO) (( a s b -- )) define-declared ;
|
||||
|
||||
: XO1: (XO) (1) (( a s -- )) define-declared ; parsing
|
||||
SYNTAX: XO1: (XO) (1) (( a s -- )) define-declared ;
|
||||
|
||||
GENERIC# (B) 2 ( dest aa lk -- )
|
||||
M: integer (B) 18 i-insn ;
|
||||
|
@ -84,11 +84,11 @@ M: label BC [ 0 BC ] dip rc-relative-ppc-2 label-fixup ;
|
|||
|
||||
: CREATE-B ( -- word ) scan "B" prepend create-in ;
|
||||
|
||||
: BC:
|
||||
SYNTAX: BC:
|
||||
CREATE-B scan-word scan-word
|
||||
[ rot BC ] 2curry (( c -- )) define-declared ; parsing
|
||||
[ rot BC ] 2curry (( c -- )) define-declared ;
|
||||
|
||||
: B:
|
||||
SYNTAX: B:
|
||||
CREATE-B scan-word scan-word scan-word scan-word scan-word
|
||||
[ b-insn ] curry curry curry curry curry
|
||||
(( bo -- )) define-declared ; parsing
|
||||
(( bo -- )) define-declared ;
|
||||
|
|
|
@ -11,5 +11,4 @@ IN: cpu.x86.assembler.syntax
|
|||
: define-registers ( names size -- )
|
||||
'[ _ define-register ] each-index ;
|
||||
|
||||
: REGISTERS: ( -- )
|
||||
scan-word ";" parse-tokens swap define-registers ; parsing
|
||||
SYNTAX: REGISTERS: scan-word ";" parse-tokens swap define-registers ;
|
||||
|
|
|
@ -14,10 +14,10 @@ GENERIC: definition-icon ( definition -- path )
|
|||
|
||||
<<
|
||||
|
||||
: ICON:
|
||||
SYNTAX: ICON:
|
||||
scan-word \ definition-icon create-method
|
||||
scan '[ drop _ definition-icon-path ]
|
||||
define ; parsing
|
||||
define ;
|
||||
|
||||
>>
|
||||
|
||||
|
|
|
@ -85,9 +85,9 @@ PRIVATE>
|
|||
: define-consult ( consultation -- )
|
||||
[ register-consult ] [ consult-methods ] bi ;
|
||||
|
||||
: CONSULT:
|
||||
SYNTAX: CONSULT:
|
||||
scan-word scan-word parse-definition <consultation>
|
||||
[ save-location ] [ define-consult ] bi ; parsing
|
||||
[ save-location ] [ define-consult ] bi ;
|
||||
|
||||
M: consultation where loc>> ;
|
||||
|
||||
|
@ -144,8 +144,8 @@ PRIVATE>
|
|||
[ initialize-protocol-props ] 2tri
|
||||
] 2bi ;
|
||||
|
||||
: PROTOCOL:
|
||||
CREATE-WORD parse-definition define-protocol ; parsing
|
||||
SYNTAX: PROTOCOL:
|
||||
CREATE-WORD parse-definition define-protocol ;
|
||||
|
||||
PREDICATE: protocol < word protocol-words ; ! Subclass of symbol?
|
||||
|
||||
|
@ -159,7 +159,7 @@ M: protocol definer drop \ PROTOCOL: \ ; ;
|
|||
|
||||
M: protocol group-words protocol-words ;
|
||||
|
||||
: SLOT-PROTOCOL:
|
||||
SYNTAX: SLOT-PROTOCOL:
|
||||
CREATE-WORD ";" parse-tokens
|
||||
[ [ reader-word ] [ writer-word ] bi 2array ] map concat
|
||||
define-protocol ; parsing
|
||||
define-protocol ;
|
|
@ -53,4 +53,4 @@ M: callable deep-fry
|
|||
|
||||
M: object deep-fry , ;
|
||||
|
||||
: '[ parse-quotation fry over push-all ; parsing
|
||||
SYNTAX: '[ parse-quotation fry over push-all ;
|
||||
|
|
|
@ -16,6 +16,8 @@ IN: functors
|
|||
|
||||
: define* ( word def effect -- ) pick set-word define-declared ;
|
||||
|
||||
: define-syntax* ( word def -- ) over set-word define-syntax ;
|
||||
|
||||
TUPLE: fake-quotation seq ;
|
||||
|
||||
GENERIC: >fake-quotations ( quot -- fake )
|
||||
|
@ -41,7 +43,7 @@ M: object fake-quotations> ;
|
|||
|
||||
: DEFINE* ( accum -- accum ) effect get parsed \ define* parsed ;
|
||||
|
||||
: `TUPLE:
|
||||
SYNTAX: `TUPLE:
|
||||
scan-param parsed
|
||||
scan {
|
||||
{ ";" [ tuple parsed f parsed ] }
|
||||
|
@ -52,40 +54,44 @@ M: object fake-quotations> ;
|
|||
make parsed
|
||||
]
|
||||
} case
|
||||
\ define-tuple-class parsed ; parsing
|
||||
\ define-tuple-class parsed ;
|
||||
|
||||
: `M:
|
||||
SYNTAX: `M:
|
||||
effect off
|
||||
scan-param parsed
|
||||
scan-param parsed
|
||||
\ create-method-in parsed
|
||||
parse-definition*
|
||||
DEFINE* ; parsing
|
||||
DEFINE* ;
|
||||
|
||||
: `C:
|
||||
SYNTAX: `C:
|
||||
effect off
|
||||
scan-param parsed
|
||||
scan-param parsed
|
||||
[ [ boa ] curry ] over push-all
|
||||
DEFINE* ; parsing
|
||||
DEFINE* ;
|
||||
|
||||
: `:
|
||||
SYNTAX: `:
|
||||
effect off
|
||||
scan-param parsed
|
||||
parse-definition*
|
||||
DEFINE* ; parsing
|
||||
DEFINE* ;
|
||||
|
||||
: `INSTANCE:
|
||||
SYNTAX: `SYNTAX:
|
||||
effect off
|
||||
scan-param parsed
|
||||
parse-definition*
|
||||
\ define-syntax* parsed ;
|
||||
|
||||
SYNTAX: `INSTANCE:
|
||||
scan-param parsed
|
||||
scan-param parsed
|
||||
\ add-mixin-instance parsed ; parsing
|
||||
\ add-mixin-instance parsed ;
|
||||
|
||||
: `inline [ word make-inline ] over push-all ; parsing
|
||||
SYNTAX: `inline [ word make-inline ] over push-all ;
|
||||
|
||||
: `parsing [ word make-parsing ] over push-all ; parsing
|
||||
|
||||
: `(
|
||||
")" parse-effect effect set ; parsing
|
||||
SYNTAX: `(
|
||||
")" parse-effect effect set ;
|
||||
|
||||
: (INTERPOLATE) ( accum quot -- accum )
|
||||
[ scan interpolate-locals ] dip
|
||||
|
@ -93,11 +99,11 @@ M: object fake-quotations> ;
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: IS [ dup search [ ] [ no-word ] ?if ] (INTERPOLATE) ; parsing
|
||||
SYNTAX: IS [ dup search [ ] [ no-word ] ?if ] (INTERPOLATE) ;
|
||||
|
||||
: DEFINES [ create-in ] (INTERPOLATE) ; parsing
|
||||
SYNTAX: DEFINES [ create-in ] (INTERPOLATE) ;
|
||||
|
||||
: DEFINES-CLASS [ create-class-in ] (INTERPOLATE) ; parsing
|
||||
SYNTAX: DEFINES-CLASS [ create-class-in ] (INTERPOLATE) ;
|
||||
|
||||
DEFER: ;FUNCTOR delimiter
|
||||
|
||||
|
@ -110,8 +116,8 @@ DEFER: ;FUNCTOR delimiter
|
|||
{ "C:" POSTPONE: `C: }
|
||||
{ ":" POSTPONE: `: }
|
||||
{ "INSTANCE:" POSTPONE: `INSTANCE: }
|
||||
{ "SYNTAX:" POSTPONE: `SYNTAX: }
|
||||
{ "inline" POSTPONE: `inline }
|
||||
{ "parsing" POSTPONE: `parsing }
|
||||
{ "(" POSTPONE: `( }
|
||||
} ;
|
||||
|
||||
|
@ -132,4 +138,4 @@ DEFER: ;FUNCTOR delimiter
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: FUNCTOR: (FUNCTOR:) define ; parsing
|
||||
SYNTAX: FUNCTOR: (FUNCTOR:) define ;
|
||||
|
|
|
@ -5,19 +5,19 @@ help.topics namespaces vocabs definitions compiler.units
|
|||
vocabs.parser ;
|
||||
IN: help.syntax
|
||||
|
||||
: HELP:
|
||||
SYNTAX: HELP:
|
||||
scan-word bootstrap-word
|
||||
dup set-word
|
||||
dup >link save-location
|
||||
\ ; parse-until >array swap set-word-help ; parsing
|
||||
\ ; parse-until >array swap set-word-help ;
|
||||
|
||||
: ARTICLE:
|
||||
SYNTAX: ARTICLE:
|
||||
location [
|
||||
\ ; parse-until >array [ first2 ] keep 2 tail <article>
|
||||
over add-article >link
|
||||
] dip remember-definition ; parsing
|
||||
] dip remember-definition ;
|
||||
|
||||
: ABOUT:
|
||||
SYNTAX: ABOUT:
|
||||
in get vocab
|
||||
dup changed-definition
|
||||
scan-object >>help drop ; parsing
|
||||
scan-object >>help drop ;
|
||||
|
|
|
@ -59,12 +59,11 @@ M: object specializer-declaration class ;
|
|||
: specialized-length ( specializer -- n )
|
||||
dup [ array? ] all? [ first ] when length ;
|
||||
|
||||
: HINTS:
|
||||
SYNTAX: HINTS:
|
||||
scan-object
|
||||
dup method-spec? [ first2 method ] when
|
||||
[ redefined ]
|
||||
[ parse-definition "specializer" set-word-prop ] bi ;
|
||||
parsing
|
||||
|
||||
! Default specializers
|
||||
{ first first2 first3 first4 }
|
||||
|
|
|
@ -25,8 +25,7 @@ M: tuple-class component-tag ( tag class -- )
|
|||
[ compile-component-attrs ] 2bi
|
||||
[ render ] [code] ;
|
||||
|
||||
: COMPONENT:
|
||||
SYNTAX: COMPONENT:
|
||||
scan-word
|
||||
[ name>> ] [ '[ _ component-tag ] ] bi
|
||||
define-chloe-tag ;
|
||||
parsing
|
||||
|
|
|
@ -15,8 +15,8 @@ tags [ H{ } clone ] initialize
|
|||
|
||||
: define-chloe-tag ( name quot -- ) swap tags get set-at ;
|
||||
|
||||
: CHLOE:
|
||||
scan parse-definition define-chloe-tag ; parsing
|
||||
SYNTAX: CHLOE:
|
||||
scan parse-definition define-chloe-tag ;
|
||||
|
||||
CONSTANT: chloe-ns "http://factorcode.org/chloe/1.0"
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ DEFER: <% delimiter
|
|||
drop
|
||||
] if ;
|
||||
|
||||
: %> lexer get parse-%> ; parsing
|
||||
SYNTAX: %> lexer get parse-%> ;
|
||||
|
||||
: parse-template-lines ( lines -- quot )
|
||||
<template-lexer> [
|
||||
|
|
|
@ -38,6 +38,6 @@ MACRO: interpolate ( string -- )
|
|||
: interpolate-locals ( string -- quot )
|
||||
[ search [ ] ] (interpolate) ;
|
||||
|
||||
: I[
|
||||
SYNTAX: I[
|
||||
"]I" parse-multiline-string
|
||||
interpolate-locals over push-all ; parsing
|
||||
interpolate-locals over push-all ;
|
||||
|
|
|
@ -1,22 +1,32 @@
|
|||
USING: help.markup help.syntax ;
|
||||
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax assocs kernel sequences ;
|
||||
IN: interval-maps
|
||||
|
||||
HELP: interval-at*
|
||||
{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
|
||||
{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
|
||||
{ $description "Looks up a key in an interval map, returning the corresponding value if the item is in an interval in the map, and a boolean flag. The operation takes O(log n) time." } ;
|
||||
|
||||
HELP: interval-at
|
||||
{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } }
|
||||
{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } }
|
||||
{ $description "Looks up a key in an interval map, returning the value of the corresponding interval, or f if the interval is not present in the map." } ;
|
||||
|
||||
HELP: interval-key?
|
||||
{ $values { "key" "an object" } { "map" "an interval map" } { "?" "a boolean" } }
|
||||
{ $values { "key" object } { "map" interval-map } { "?" "a boolean" } }
|
||||
{ $description "Tests whether an object is in an interval in the interval map, returning t if the object is present." } ;
|
||||
|
||||
HELP: <interval-map>
|
||||
{ $values { "specification" "an assoc" } { "map" "an interval map" } }
|
||||
{ $values { "specification" assoc } { "map" interval-map } }
|
||||
{ $description "From a specification, produce an interval tree. The specification is an assoc where the keys are intervals, or pairs of numbers to represent intervals, or individual numbers to represent singleton intervals. The values are the values int he interval map. Construction time is O(n log n)." } ;
|
||||
|
||||
HELP: interval-values
|
||||
{ $values { "map" interval-map } { "values" sequence } }
|
||||
{ $description "Constructs a list of all of the values that interval map keys are associated with. This list may contain duplicates." } ;
|
||||
|
||||
HELP: coalesce
|
||||
{ $values { "alist" "an association list with integer keys" } { "specification" { "array of the format used by " { $link <interval-map> } } } }
|
||||
{ $description "Finds ranges used in the given alist, coalescing them into a single range." } ;
|
||||
|
||||
ARTICLE: "interval-maps" "Interval maps"
|
||||
"The " { $vocab-link "interval-maps" } " vocabulary implements a data structure, similar to assocs, where a set of closed intervals of keys are associated with values. As such, interval maps do not conform to the assoc protocol, because intervals of floats, for example, can be used, and it is impossible to get a list of keys in between."
|
||||
$nl
|
||||
|
@ -24,7 +34,9 @@ $nl
|
|||
{ $subsection interval-at* }
|
||||
{ $subsection interval-at }
|
||||
{ $subsection interval-key? }
|
||||
{ $subsection interval-values }
|
||||
"Use the following to construct interval maps"
|
||||
{ $subsection <interval-map> } ;
|
||||
{ $subsection <interval-map> }
|
||||
{ $subsection coalesce } ;
|
||||
|
||||
ABOUT: "interval-maps"
|
||||
|
|
|
@ -8,17 +8,21 @@ TUPLE: interval-map array ;
|
|||
|
||||
<PRIVATE
|
||||
|
||||
ALIAS: start first
|
||||
ALIAS: end second
|
||||
ALIAS: value third
|
||||
|
||||
: find-interval ( key interval-map -- interval-node )
|
||||
[ first <=> ] with search nip ;
|
||||
array>> [ start <=> ] with search nip ;
|
||||
|
||||
: interval-contains? ( key interval-node -- ? )
|
||||
first2 between? ;
|
||||
[ start ] [ end ] bi between? ;
|
||||
|
||||
: all-intervals ( sequence -- intervals )
|
||||
[ [ dup number? [ dup 2array ] when ] dip ] { } assoc-map-as ;
|
||||
|
||||
: disjoint? ( node1 node2 -- ? )
|
||||
[ second ] [ first ] bi* < ;
|
||||
[ end ] [ start ] bi* < ;
|
||||
|
||||
: ensure-disjoint ( intervals -- intervals )
|
||||
dup [ disjoint? ] monotonic?
|
||||
|
@ -30,14 +34,17 @@ TUPLE: interval-map array ;
|
|||
PRIVATE>
|
||||
|
||||
: interval-at* ( key map -- value ? )
|
||||
[ drop ] [ array>> find-interval ] 2bi
|
||||
[ drop ] [ find-interval ] 2bi
|
||||
[ nip ] [ interval-contains? ] 2bi
|
||||
[ third t ] [ drop f f ] if ;
|
||||
[ value t ] [ drop f f ] if ;
|
||||
|
||||
: interval-at ( key map -- value ) interval-at* drop ;
|
||||
|
||||
: interval-key? ( key map -- ? ) interval-at* nip ;
|
||||
|
||||
: interval-values ( map -- values )
|
||||
array>> [ value ] map ;
|
||||
|
||||
: <interval-map> ( specification -- map )
|
||||
all-intervals [ [ first second ] compare ] sort
|
||||
>intervals ensure-disjoint interval-map boa ;
|
||||
|
|
|
@ -63,6 +63,6 @@ SYMBOL: euc-table
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: EUC:
|
||||
SYNTAX: EUC:
|
||||
! EUC: euc-kr "vocab:io/encodings/korean/cp949.txt"
|
||||
CREATE-CLASS scan-object define-euc ; parsing
|
||||
CREATE-CLASS scan-object define-euc ;
|
||||
|
|
|
@ -0,0 +1,208 @@
|
|||
#
|
||||
# Name: JIS X 0201 (1976) to Unicode 1.1 Table
|
||||
# Unicode version: 1.1
|
||||
# Table version: 0.9
|
||||
# Table format: Format A
|
||||
# Date: 8 March 1994
|
||||
#
|
||||
# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved.
|
||||
#
|
||||
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
|
||||
# No claims are made as to fitness for any particular purpose. No
|
||||
# warranties of any kind are expressed or implied. The recipient
|
||||
# agrees to determine applicability of information provided. If this
|
||||
# file has been provided on magnetic media by Unicode, Inc., the sole
|
||||
# remedy for any claim will be exchange of defective media within 90
|
||||
# days of receipt.
|
||||
#
|
||||
# Recipient is granted the right to make copies in any form for
|
||||
# internal distribution and to freely use the information supplied
|
||||
# in the creation of products supporting Unicode. Unicode, Inc.
|
||||
# specifically excludes the right to re-distribute this file directly
|
||||
# to third parties or other organizations whether for profit or not.
|
||||
#
|
||||
# General notes:
|
||||
#
|
||||
#
|
||||
# This table contains one set of mappings from JIS X 0201 into Unicode.
|
||||
# Note that these data are *possible* mappings only and may not be the
|
||||
# same as those used by actual products, nor may they be the best suited
|
||||
# for all uses. For more information on the mappings between various code
|
||||
# pages incorporating the repertoire of JIS X 0201 and Unicode, consult the
|
||||
# VENDORS mapping data. Normative information on the mapping between
|
||||
# JIS X 0201 and Unicode may be found in the Unihan.txt file in the
|
||||
# latest Unicode Character Database.
|
||||
#
|
||||
# If you have carefully considered the fact that the mappings in
|
||||
# this table are only one possible set of mappings between JIS X 0201 and
|
||||
# Unicode and have no normative status, but still feel that you
|
||||
# have located an error in the table that requires fixing, you may
|
||||
# report any such error to errata@unicode.org.
|
||||
#
|
||||
#
|
||||
# Format: Three tab-separated columns
|
||||
# Column #1 is the shift JIS code (in hex as 0xXX)
|
||||
# Column #2 is the Unicode (in hex as 0xXXXX)
|
||||
# Column #3 the Unicode (ISO 10646) name (follows a comment sign)
|
||||
#
|
||||
# The entries are in JIS order
|
||||
#
|
||||
#
|
||||
0x20 0x0020 # SPACE
|
||||
0x21 0x0021 # EXCLAMATION MARK
|
||||
0x22 0x0022 # QUOTATION MARK
|
||||
0x23 0x0023 # NUMBER SIGN
|
||||
0x24 0x0024 # DOLLAR SIGN
|
||||
0x25 0x0025 # PERCENT SIGN
|
||||
0x26 0x0026 # AMPERSAND
|
||||
0x27 0x0027 # APOSTROPHE
|
||||
0x28 0x0028 # LEFT PARENTHESIS
|
||||
0x29 0x0029 # RIGHT PARENTHESIS
|
||||
0x2A 0x002A # ASTERISK
|
||||
0x2B 0x002B # PLUS SIGN
|
||||
0x2C 0x002C # COMMA
|
||||
0x2D 0x002D # HYPHEN-MINUS
|
||||
0x2E 0x002E # FULL STOP
|
||||
0x2F 0x002F # SOLIDUS
|
||||
0x30 0x0030 # DIGIT ZERO
|
||||
0x31 0x0031 # DIGIT ONE
|
||||
0x32 0x0032 # DIGIT TWO
|
||||
0x33 0x0033 # DIGIT THREE
|
||||
0x34 0x0034 # DIGIT FOUR
|
||||
0x35 0x0035 # DIGIT FIVE
|
||||
0x36 0x0036 # DIGIT SIX
|
||||
0x37 0x0037 # DIGIT SEVEN
|
||||
0x38 0x0038 # DIGIT EIGHT
|
||||
0x39 0x0039 # DIGIT NINE
|
||||
0x3A 0x003A # COLON
|
||||
0x3B 0x003B # SEMICOLON
|
||||
0x3C 0x003C # LESS-THAN SIGN
|
||||
0x3D 0x003D # EQUALS SIGN
|
||||
0x3E 0x003E # GREATER-THAN SIGN
|
||||
0x3F 0x003F # QUESTION MARK
|
||||
0x40 0x0040 # COMMERCIAL AT
|
||||
0x41 0x0041 # LATIN CAPITAL LETTER A
|
||||
0x42 0x0042 # LATIN CAPITAL LETTER B
|
||||
0x43 0x0043 # LATIN CAPITAL LETTER C
|
||||
0x44 0x0044 # LATIN CAPITAL LETTER D
|
||||
0x45 0x0045 # LATIN CAPITAL LETTER E
|
||||
0x46 0x0046 # LATIN CAPITAL LETTER F
|
||||
0x47 0x0047 # LATIN CAPITAL LETTER G
|
||||
0x48 0x0048 # LATIN CAPITAL LETTER H
|
||||
0x49 0x0049 # LATIN CAPITAL LETTER I
|
||||
0x4A 0x004A # LATIN CAPITAL LETTER J
|
||||
0x4B 0x004B # LATIN CAPITAL LETTER K
|
||||
0x4C 0x004C # LATIN CAPITAL LETTER L
|
||||
0x4D 0x004D # LATIN CAPITAL LETTER M
|
||||
0x4E 0x004E # LATIN CAPITAL LETTER N
|
||||
0x4F 0x004F # LATIN CAPITAL LETTER O
|
||||
0x50 0x0050 # LATIN CAPITAL LETTER P
|
||||
0x51 0x0051 # LATIN CAPITAL LETTER Q
|
||||
0x52 0x0052 # LATIN CAPITAL LETTER R
|
||||
0x53 0x0053 # LATIN CAPITAL LETTER S
|
||||
0x54 0x0054 # LATIN CAPITAL LETTER T
|
||||
0x55 0x0055 # LATIN CAPITAL LETTER U
|
||||
0x56 0x0056 # LATIN CAPITAL LETTER V
|
||||
0x57 0x0057 # LATIN CAPITAL LETTER W
|
||||
0x58 0x0058 # LATIN CAPITAL LETTER X
|
||||
0x59 0x0059 # LATIN CAPITAL LETTER Y
|
||||
0x5A 0x005A # LATIN CAPITAL LETTER Z
|
||||
0x5B 0x005B # LEFT SQUARE BRACKET
|
||||
0x5C 0x00A5 # YEN SIGN
|
||||
0x5D 0x005D # RIGHT SQUARE BRACKET
|
||||
0x5E 0x005E # CIRCUMFLEX ACCENT
|
||||
0x5F 0x005F # LOW LINE
|
||||
0x60 0x0060 # GRAVE ACCENT
|
||||
0x61 0x0061 # LATIN SMALL LETTER A
|
||||
0x62 0x0062 # LATIN SMALL LETTER B
|
||||
0x63 0x0063 # LATIN SMALL LETTER C
|
||||
0x64 0x0064 # LATIN SMALL LETTER D
|
||||
0x65 0x0065 # LATIN SMALL LETTER E
|
||||
0x66 0x0066 # LATIN SMALL LETTER F
|
||||
0x67 0x0067 # LATIN SMALL LETTER G
|
||||
0x68 0x0068 # LATIN SMALL LETTER H
|
||||
0x69 0x0069 # LATIN SMALL LETTER I
|
||||
0x6A 0x006A # LATIN SMALL LETTER J
|
||||
0x6B 0x006B # LATIN SMALL LETTER K
|
||||
0x6C 0x006C # LATIN SMALL LETTER L
|
||||
0x6D 0x006D # LATIN SMALL LETTER M
|
||||
0x6E 0x006E # LATIN SMALL LETTER N
|
||||
0x6F 0x006F # LATIN SMALL LETTER O
|
||||
0x70 0x0070 # LATIN SMALL LETTER P
|
||||
0x71 0x0071 # LATIN SMALL LETTER Q
|
||||
0x72 0x0072 # LATIN SMALL LETTER R
|
||||
0x73 0x0073 # LATIN SMALL LETTER S
|
||||
0x74 0x0074 # LATIN SMALL LETTER T
|
||||
0x75 0x0075 # LATIN SMALL LETTER U
|
||||
0x76 0x0076 # LATIN SMALL LETTER V
|
||||
0x77 0x0077 # LATIN SMALL LETTER W
|
||||
0x78 0x0078 # LATIN SMALL LETTER X
|
||||
0x79 0x0079 # LATIN SMALL LETTER Y
|
||||
0x7A 0x007A # LATIN SMALL LETTER Z
|
||||
0x7B 0x007B # LEFT CURLY BRACKET
|
||||
0x7C 0x007C # VERTICAL LINE
|
||||
0x7D 0x007D # RIGHT CURLY BRACKET
|
||||
0x7E 0x203E # OVERLINE
|
||||
0xA1 0xFF61 # HALFWIDTH IDEOGRAPHIC FULL STOP
|
||||
0xA2 0xFF62 # HALFWIDTH LEFT CORNER BRACKET
|
||||
0xA3 0xFF63 # HALFWIDTH RIGHT CORNER BRACKET
|
||||
0xA4 0xFF64 # HALFWIDTH IDEOGRAPHIC COMMA
|
||||
0xA5 0xFF65 # HALFWIDTH KATAKANA MIDDLE DOT
|
||||
0xA6 0xFF66 # HALFWIDTH KATAKANA LETTER WO
|
||||
0xA7 0xFF67 # HALFWIDTH KATAKANA LETTER SMALL A
|
||||
0xA8 0xFF68 # HALFWIDTH KATAKANA LETTER SMALL I
|
||||
0xA9 0xFF69 # HALFWIDTH KATAKANA LETTER SMALL U
|
||||
0xAA 0xFF6A # HALFWIDTH KATAKANA LETTER SMALL E
|
||||
0xAB 0xFF6B # HALFWIDTH KATAKANA LETTER SMALL O
|
||||
0xAC 0xFF6C # HALFWIDTH KATAKANA LETTER SMALL YA
|
||||
0xAD 0xFF6D # HALFWIDTH KATAKANA LETTER SMALL YU
|
||||
0xAE 0xFF6E # HALFWIDTH KATAKANA LETTER SMALL YO
|
||||
0xAF 0xFF6F # HALFWIDTH KATAKANA LETTER SMALL TU
|
||||
0xB0 0xFF70 # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
|
||||
0xB1 0xFF71 # HALFWIDTH KATAKANA LETTER A
|
||||
0xB2 0xFF72 # HALFWIDTH KATAKANA LETTER I
|
||||
0xB3 0xFF73 # HALFWIDTH KATAKANA LETTER U
|
||||
0xB4 0xFF74 # HALFWIDTH KATAKANA LETTER E
|
||||
0xB5 0xFF75 # HALFWIDTH KATAKANA LETTER O
|
||||
0xB6 0xFF76 # HALFWIDTH KATAKANA LETTER KA
|
||||
0xB7 0xFF77 # HALFWIDTH KATAKANA LETTER KI
|
||||
0xB8 0xFF78 # HALFWIDTH KATAKANA LETTER KU
|
||||
0xB9 0xFF79 # HALFWIDTH KATAKANA LETTER KE
|
||||
0xBA 0xFF7A # HALFWIDTH KATAKANA LETTER KO
|
||||
0xBB 0xFF7B # HALFWIDTH KATAKANA LETTER SA
|
||||
0xBC 0xFF7C # HALFWIDTH KATAKANA LETTER SI
|
||||
0xBD 0xFF7D # HALFWIDTH KATAKANA LETTER SU
|
||||
0xBE 0xFF7E # HALFWIDTH KATAKANA LETTER SE
|
||||
0xBF 0xFF7F # HALFWIDTH KATAKANA LETTER SO
|
||||
0xC0 0xFF80 # HALFWIDTH KATAKANA LETTER TA
|
||||
0xC1 0xFF81 # HALFWIDTH KATAKANA LETTER TI
|
||||
0xC2 0xFF82 # HALFWIDTH KATAKANA LETTER TU
|
||||
0xC3 0xFF83 # HALFWIDTH KATAKANA LETTER TE
|
||||
0xC4 0xFF84 # HALFWIDTH KATAKANA LETTER TO
|
||||
0xC5 0xFF85 # HALFWIDTH KATAKANA LETTER NA
|
||||
0xC6 0xFF86 # HALFWIDTH KATAKANA LETTER NI
|
||||
0xC7 0xFF87 # HALFWIDTH KATAKANA LETTER NU
|
||||
0xC8 0xFF88 # HALFWIDTH KATAKANA LETTER NE
|
||||
0xC9 0xFF89 # HALFWIDTH KATAKANA LETTER NO
|
||||
0xCA 0xFF8A # HALFWIDTH KATAKANA LETTER HA
|
||||
0xCB 0xFF8B # HALFWIDTH KATAKANA LETTER HI
|
||||
0xCC 0xFF8C # HALFWIDTH KATAKANA LETTER HU
|
||||
0xCD 0xFF8D # HALFWIDTH KATAKANA LETTER HE
|
||||
0xCE 0xFF8E # HALFWIDTH KATAKANA LETTER HO
|
||||
0xCF 0xFF8F # HALFWIDTH KATAKANA LETTER MA
|
||||
0xD0 0xFF90 # HALFWIDTH KATAKANA LETTER MI
|
||||
0xD1 0xFF91 # HALFWIDTH KATAKANA LETTER MU
|
||||
0xD2 0xFF92 # HALFWIDTH KATAKANA LETTER ME
|
||||
0xD3 0xFF93 # HALFWIDTH KATAKANA LETTER MO
|
||||
0xD4 0xFF94 # HALFWIDTH KATAKANA LETTER YA
|
||||
0xD5 0xFF95 # HALFWIDTH KATAKANA LETTER YU
|
||||
0xD6 0xFF96 # HALFWIDTH KATAKANA LETTER YO
|
||||
0xD7 0xFF97 # HALFWIDTH KATAKANA LETTER RA
|
||||
0xD8 0xFF98 # HALFWIDTH KATAKANA LETTER RI
|
||||
0xD9 0xFF99 # HALFWIDTH KATAKANA LETTER RU
|
||||
0xDA 0xFF9A # HALFWIDTH KATAKANA LETTER RE
|
||||
0xDB 0xFF9B # HALFWIDTH KATAKANA LETTER RO
|
||||
0xDC 0xFF9C # HALFWIDTH KATAKANA LETTER WA
|
||||
0xDD 0xFF9D # HALFWIDTH KATAKANA LETTER N
|
||||
0xDE 0xFF9E # HALFWIDTH KATAKANA VOICED SOUND MARK
|
||||
0xDF 0xFF9F # HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
0
basis/unicode/syntax/authors.txt → basis/io/encodings/iso2022/authors.txt
Executable file → Normal file
0
basis/unicode/syntax/authors.txt → basis/io/encodings/iso2022/authors.txt
Executable file → Normal file
|
@ -0,0 +1,13 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.syntax help.markup ;
|
||||
IN: io.encodings.iso2022
|
||||
|
||||
HELP: iso2022
|
||||
{ $class-description "This encoding class implements ISO 2022-JP-1, a Japanese text encoding commonly used for email." }
|
||||
{ $see-also "encodings-introduction" } ;
|
||||
|
||||
ARTICLE: "io.encodings.iso2022" "ISO 2022-JP-1 encoding"
|
||||
{ $subsection iso2022 } ;
|
||||
|
||||
ABOUT: "io.encodings.iso2022"
|
|
@ -0,0 +1,36 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io.encodings.string io.encodings.iso2022 tools.test
|
||||
io.encodings.iso2022.private literals strings byte-arrays ;
|
||||
IN: io.encodings.iso2022
|
||||
|
||||
[ "hello" ] [ "hello" >byte-array iso2022 decode ] unit-test
|
||||
[ "hello" ] [ "hello" iso2022 encode >string ] unit-test
|
||||
|
||||
[ "hi" ] [ B{ CHAR: h ESC CHAR: ( CHAR: B CHAR: i } iso2022 decode ] unit-test
|
||||
[ "hi" ] [ B{ CHAR: h CHAR: i ESC CHAR: ( CHAR: B } iso2022 decode ] unit-test
|
||||
[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i ESC CHAR: ( } iso2022 decode ] unit-test
|
||||
[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i ESC } iso2022 decode ] unit-test
|
||||
|
||||
[ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: D8 } ] [ "h\u00ff98" iso2022 encode ] unit-test
|
||||
[ "h\u00ff98" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: D8 } iso2022 decode ] unit-test
|
||||
[ "hi" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J CHAR: i } iso2022 decode ] unit-test
|
||||
[ "h" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J } iso2022 decode ] unit-test
|
||||
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: 80 } iso2022 decode ] unit-test
|
||||
|
||||
[ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } ] [ "h\u007126" iso2022 encode ] unit-test
|
||||
[ "h\u007126" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } iso2022 decode ] unit-test
|
||||
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E } iso2022 decode ] unit-test
|
||||
[ "h" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B } iso2022 decode ] unit-test
|
||||
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ } iso2022 decode ] unit-test
|
||||
[ "h\u00fffd" ] [ B{ CHAR: h ESC } iso2022 decode ] unit-test
|
||||
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 80 HEX: 80 } iso2022 decode ] unit-test
|
||||
|
||||
[ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } ] [ "h\u0058ce" iso2022 encode ] unit-test
|
||||
[ "h\u0058ce" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } iso2022 decode ] unit-test
|
||||
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 } iso2022 decode ] unit-test
|
||||
[ "h" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D } iso2022 decode ] unit-test
|
||||
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( } iso2022 decode ] unit-test
|
||||
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 70 HEX: 70 } iso2022 decode ] unit-test
|
||||
|
||||
[ "\u{syriac-music}" iso2022 encode ] must-fail
|
|
@ -0,0 +1,107 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: io.encodings kernel sequences io simple-flat-file sets math
|
||||
combinators.short-circuit io.binary values arrays assocs
|
||||
locals accessors combinators biassocs byte-arrays parser ;
|
||||
IN: io.encodings.iso2022
|
||||
|
||||
SINGLETON: iso2022
|
||||
|
||||
<PRIVATE
|
||||
|
||||
VALUE: jis201
|
||||
VALUE: jis208
|
||||
VALUE: jis212
|
||||
|
||||
"vocab:io/encodings/iso2022/201.txt" flat-file>biassoc to: jis201
|
||||
"vocab:io/encodings/iso2022/208.txt" flat-file>biassoc to: jis208
|
||||
"vocab:io/encodings/iso2022/212.txt" flat-file>biassoc to: jis212
|
||||
|
||||
VALUE: ascii
|
||||
128 unique >biassoc to: ascii
|
||||
|
||||
TUPLE: iso2022-state type ;
|
||||
|
||||
: make-iso-coder ( encoding -- state )
|
||||
drop ascii iso2022-state boa ;
|
||||
|
||||
M: iso2022 <encoder>
|
||||
make-iso-coder <encoder> ;
|
||||
|
||||
M: iso2022 <decoder>
|
||||
make-iso-coder <decoder> ;
|
||||
|
||||
<< SYNTAX: ESC HEX: 16 parsed ; >>
|
||||
|
||||
CONSTANT: switch-ascii B{ ESC CHAR: ( CHAR: B }
|
||||
CONSTANT: switch-jis201 B{ ESC CHAR: ( CHAR: J }
|
||||
CONSTANT: switch-jis208 B{ ESC CHAR: $ CHAR: B }
|
||||
CONSTANT: switch-jis212 B{ ESC CHAR: $ CHAR: ( CHAR: D }
|
||||
|
||||
: find-type ( char -- code type )
|
||||
{
|
||||
{ [ dup ascii value? ] [ drop switch-ascii ascii ] }
|
||||
{ [ dup jis201 value? ] [ drop switch-jis201 jis201 ] }
|
||||
{ [ dup jis208 value? ] [ drop switch-jis208 jis208 ] }
|
||||
{ [ dup jis212 value? ] [ drop switch-jis212 jis212 ] }
|
||||
[ encode-error ]
|
||||
} cond ;
|
||||
|
||||
: stream-write-num ( num stream -- )
|
||||
over 256 >=
|
||||
[ [ h>b/b swap 2byte-array ] dip stream-write ]
|
||||
[ stream-write1 ] if ;
|
||||
|
||||
M:: iso2022-state encode-char ( char stream encoding -- )
|
||||
char encoding type>> value? [
|
||||
char find-type
|
||||
[ stream stream-write ]
|
||||
[ encoding (>>type) ] bi*
|
||||
] unless
|
||||
char encoding type>> value-at stream stream-write-num ;
|
||||
|
||||
: read-escape ( stream -- type/f )
|
||||
dup stream-read1 {
|
||||
{ CHAR: ( [
|
||||
stream-read1 {
|
||||
{ CHAR: B [ ascii ] }
|
||||
{ CHAR: J [ jis201 ] }
|
||||
[ drop f ]
|
||||
} case
|
||||
] }
|
||||
{ CHAR: $ [
|
||||
dup stream-read1 {
|
||||
{ CHAR: @ [ drop jis208 ] } ! want: JIS X 0208-1978
|
||||
{ CHAR: B [ drop jis208 ] }
|
||||
{ CHAR: ( [
|
||||
stream-read1 CHAR: D = jis212 f ?
|
||||
] }
|
||||
[ 2drop f ]
|
||||
} case
|
||||
] }
|
||||
[ 2drop f ]
|
||||
} case ;
|
||||
|
||||
: double-width? ( type -- ? )
|
||||
{ [ jis208 eq? ] [ jis212 eq? ] } 1|| ;
|
||||
|
||||
: finish-decode ( num encoding -- char )
|
||||
type>> at replacement-char or ;
|
||||
|
||||
M:: iso2022-state decode-char ( stream encoding -- char )
|
||||
stream stream-read1 {
|
||||
{ ESC [
|
||||
stream read-escape [
|
||||
encoding (>>type)
|
||||
stream encoding decode-char
|
||||
] [ replacement-char ] if*
|
||||
] }
|
||||
{ f [ f ] }
|
||||
[
|
||||
encoding type>> double-width? [
|
||||
stream stream-read1
|
||||
[ 2byte-array be> encoding finish-decode ]
|
||||
[ drop replacement-char ] if*
|
||||
] [ encoding finish-decode ] if
|
||||
]
|
||||
} case ;
|
|
@ -0,0 +1 @@
|
|||
ISO-2022-JP-1 text encoding
|
|
@ -3,7 +3,7 @@ tools.test parser math namespaces continuations vocabs kernel
|
|||
compiler.units eval vocabs.parser ;
|
||||
IN: listener.tests
|
||||
|
||||
: hello "Hi" print ; parsing
|
||||
SYNTAX: hello "Hi" print ;
|
||||
|
||||
: parse-interactive ( string -- quot )
|
||||
<string-reader> stream-read-quot ;
|
||||
|
|
|
@ -5,27 +5,25 @@ vocabs.loader words kernel namespaces locals.parser locals.types
|
|||
locals.errors ;
|
||||
IN: locals
|
||||
|
||||
: :>
|
||||
SYNTAX: :>
|
||||
scan locals get [ :>-outside-lambda-error ] unless*
|
||||
[ make-local ] bind <def> parsed ; parsing
|
||||
[ make-local ] bind <def> parsed ;
|
||||
|
||||
: [| parse-lambda over push-all ; parsing
|
||||
SYNTAX: [| parse-lambda over push-all ;
|
||||
|
||||
: [let parse-let over push-all ; parsing
|
||||
SYNTAX: [let parse-let over push-all ;
|
||||
|
||||
: [let* parse-let* over push-all ; parsing
|
||||
SYNTAX: [let* parse-let* over push-all ;
|
||||
|
||||
: [wlet parse-wlet over push-all ; parsing
|
||||
SYNTAX: [wlet parse-wlet over push-all ;
|
||||
|
||||
: :: (::) define ; parsing
|
||||
SYNTAX: :: (::) define ;
|
||||
|
||||
: M:: (M::) define ; parsing
|
||||
SYNTAX: M:: (M::) define ;
|
||||
|
||||
: MACRO:: (::) define-macro ; parsing
|
||||
SYNTAX: MACRO:: (::) define-macro ;
|
||||
|
||||
: MEMO:: (::) define-memoized ; parsing
|
||||
|
||||
USE: syntax
|
||||
SYNTAX: MEMO:: (::) define-memoized ;
|
||||
|
||||
{
|
||||
"locals.macros"
|
||||
|
|
|
@ -135,11 +135,11 @@ PRIVATE>
|
|||
[ [ input-logging-quot ] 2keep drop error-logging-quot ]
|
||||
(define-logging) ;
|
||||
|
||||
: LOG:
|
||||
SYNTAX: LOG:
|
||||
#! Syntax: name level
|
||||
CREATE-WORD dup scan-word
|
||||
'[ 1array stack>message _ _ log-message ]
|
||||
(( message -- )) define-declared ; parsing
|
||||
(( message -- )) define-declared ;
|
||||
|
||||
USE: vocabs.loader
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ PRIVATE>
|
|||
[ over real-macro-effect memoize-quot [ call ] append define ]
|
||||
2bi ;
|
||||
|
||||
: MACRO: (:) define-macro ; parsing
|
||||
SYNTAX: MACRO: (:) define-macro ;
|
||||
|
||||
PREDICATE: macro < word "macro" word-prop >boolean ;
|
||||
|
||||
|
|
|
@ -16,8 +16,8 @@ SYMBOL: _
|
|||
: define-match-vars ( seq -- )
|
||||
[ define-match-var ] each ;
|
||||
|
||||
: MATCH-VARS: ! vars ...
|
||||
";" parse-tokens define-match-vars ; parsing
|
||||
SYNTAX: MATCH-VARS: ! vars ...
|
||||
";" parse-tokens define-match-vars ;
|
||||
|
||||
: match-var? ( symbol -- bool )
|
||||
dup word? [ "match-var" word-prop ] [ drop f ] if ;
|
||||
|
|
|
@ -289,7 +289,7 @@ M: MATRIX n*V(*)V+M!
|
|||
M: MATRIX n*V(*)Vconj+M!
|
||||
(prepare-ger) [ XGERC ] dip ;
|
||||
|
||||
: XMATRIX{ \ } [ >MATRIX ] parse-literal ; parsing
|
||||
SYNTAX: XMATRIX{ \ } [ >MATRIX ] parse-literal ;
|
||||
|
||||
M: MATRIX pprint-delims
|
||||
drop \ XMATRIX{ \ } ;
|
||||
|
|
|
@ -179,7 +179,7 @@ M: VECTOR n*V+V!
|
|||
M: VECTOR n*V!
|
||||
(prepare-scal) [ XSCAL ] dip ;
|
||||
|
||||
: XVECTOR{ \ } [ >VECTOR ] parse-literal ; parsing
|
||||
SYNTAX: XVECTOR{ \ } [ >VECTOR ] parse-literal ;
|
||||
|
||||
M: VECTOR pprint-delims
|
||||
drop \ XVECTOR{ \ } ;
|
||||
|
|
|
@ -31,7 +31,7 @@ M: complex sqrt >polar [ fsqrt ] [ 2.0 / ] bi* polar> ;
|
|||
|
||||
IN: syntax
|
||||
|
||||
: C{ \ } [ first2 rect> ] parse-literal ; parsing
|
||||
SYNTAX: C{ \ } [ first2 rect> ] parse-literal ;
|
||||
|
||||
USE: prettyprint.custom
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ PRIVATE>
|
|||
[ drop "memoize" set-word-prop ]
|
||||
3tri ;
|
||||
|
||||
: MEMO: (:) define-memoized ; parsing
|
||||
SYNTAX: MEMO: (:) define-memoized ;
|
||||
|
||||
PREDICATE: memoized < word "memoize" word-prop ;
|
||||
|
||||
|
|
|
@ -20,10 +20,10 @@ PRIVATE>
|
|||
[ (parse-here) ] "" make but-last
|
||||
lexer get next-line ;
|
||||
|
||||
: STRING:
|
||||
SYNTAX: STRING:
|
||||
CREATE-WORD
|
||||
parse-here 1quotation
|
||||
(( -- string )) define-inline ; parsing
|
||||
(( -- string )) define-inline ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
|
@ -48,16 +48,16 @@ PRIVATE>
|
|||
change-column drop
|
||||
] "" make ;
|
||||
|
||||
: <"
|
||||
"\">" parse-multiline-string parsed ; parsing
|
||||
SYNTAX: <"
|
||||
"\">" parse-multiline-string parsed ;
|
||||
|
||||
: <'
|
||||
"'>" parse-multiline-string parsed ; parsing
|
||||
SYNTAX: <'
|
||||
"'>" parse-multiline-string parsed ;
|
||||
|
||||
: {'
|
||||
"'}" parse-multiline-string parsed ; parsing
|
||||
SYNTAX: {'
|
||||
"'}" parse-multiline-string parsed ;
|
||||
|
||||
: {"
|
||||
"\"}" parse-multiline-string parsed ; parsing
|
||||
SYNTAX: {"
|
||||
"\"}" parse-multiline-string parsed ;
|
||||
|
||||
: /* "*/" parse-multiline-string drop ; parsing
|
||||
SYNTAX: /* "*/" parse-multiline-string drop ;
|
||||
|
|
|
@ -62,7 +62,7 @@ M: nibble-array resize
|
|||
|
||||
M: nibble-array byte-length length nibbles>bytes ;
|
||||
|
||||
: N{ \ } [ >nibble-array ] parse-literal ; parsing
|
||||
SYNTAX: N{ \ } [ >nibble-array ] parse-literal ;
|
||||
|
||||
INSTANCE: nibble-array sequence
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ reset-gl-function-number-counter
|
|||
parameters return parse-arglist [ abi indirect-quot ] dip
|
||||
define-declared ;
|
||||
|
||||
: GL-FUNCTION:
|
||||
SYNTAX: GL-FUNCTION:
|
||||
gl-function-calling-convention
|
||||
scan
|
||||
scan dup
|
||||
|
@ -55,5 +55,4 @@ reset-gl-function-number-counter
|
|||
gl-function-number
|
||||
[ gl-function-pointer ] 2curry swap
|
||||
";" parse-tokens [ "()" subseq? not ] filter
|
||||
define-indirect
|
||||
; parsing
|
||||
define-indirect ;
|
||||
|
|
|
@ -279,12 +279,12 @@ H{ } clone verify-messages set-global
|
|||
|
||||
: verify-message ( n -- word ) verify-messages get-global at ;
|
||||
|
||||
: X509_V_:
|
||||
SYNTAX: X509_V_:
|
||||
scan "X509_V_" prepend create-in
|
||||
scan-word
|
||||
[ 1quotation (( -- value )) define-inline ]
|
||||
[ verify-messages get set-at ]
|
||||
2bi ; parsing
|
||||
2bi ;
|
||||
|
||||
>>
|
||||
|
||||
|
|
|
@ -34,9 +34,9 @@ TUPLE: tokenizer any one many ;
|
|||
: reset-tokenizer ( -- )
|
||||
default-tokenizer \ tokenizer set-global ;
|
||||
|
||||
: TOKENIZER:
|
||||
SYNTAX: TOKENIZER:
|
||||
scan search [ "Tokenizer not found" throw ] unless*
|
||||
execute( -- tokenizer ) \ tokenizer set-global ; parsing
|
||||
execute( -- tokenizer ) \ tokenizer set-global ;
|
||||
|
||||
TUPLE: ebnf-non-terminal symbol ;
|
||||
TUPLE: ebnf-terminal symbol ;
|
||||
|
@ -522,16 +522,14 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
|
|||
parse-ebnf dup dup parser [ main swap at compile ] with-variable
|
||||
[ compiled-parse ] curry [ with-scope ast>> ] curry ;
|
||||
|
||||
: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at
|
||||
parsed reset-tokenizer ; parsing
|
||||
SYNTAX: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at
|
||||
parsed reset-tokenizer ;
|
||||
|
||||
: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip
|
||||
parsed \ call parsed reset-tokenizer ; parsing
|
||||
SYNTAX: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip
|
||||
parsed \ call parsed reset-tokenizer ;
|
||||
|
||||
: EBNF:
|
||||
SYNTAX: EBNF:
|
||||
reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string
|
||||
ebnf>quot swapd (( input -- ast )) define-declared "ebnf-parser" set-word-prop
|
||||
reset-tokenizer ; parsing
|
||||
|
||||
|
||||
reset-tokenizer ;
|
||||
|
||||
|
|
|
@ -616,7 +616,7 @@ PRIVATE>
|
|||
|
||||
ERROR: parse-failed input word ;
|
||||
|
||||
: PEG:
|
||||
SYNTAX: PEG:
|
||||
(:)
|
||||
[let | def [ ] word [ ] |
|
||||
[
|
||||
|
@ -630,7 +630,7 @@ ERROR: parse-failed input word ;
|
|||
]
|
||||
] with-compilation-unit
|
||||
] over push-all
|
||||
] ; parsing
|
||||
] ;
|
||||
|
||||
USING: vocabs vocabs.loader ;
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ M: persistent-hash hashcode* nip assoc-size ;
|
|||
|
||||
M: persistent-hash clone ;
|
||||
|
||||
: PH{ \ } [ >persistent-hash ] parse-literal ; parsing
|
||||
SYNTAX: PH{ \ } [ >persistent-hash ] parse-literal ;
|
||||
|
||||
M: persistent-hash pprint-delims drop \ PH{ \ } ;
|
||||
M: persistent-hash >pprint-sequence >alist ;
|
||||
|
|
|
@ -179,7 +179,7 @@ M: persistent-vector equal?
|
|||
: >persistent-vector ( seq -- pvec )
|
||||
T{ persistent-vector } like ;
|
||||
|
||||
: PV{ \ } [ >persistent-vector ] parse-literal ; parsing
|
||||
SYNTAX: PV{ \ } [ >persistent-vector ] parse-literal ;
|
||||
|
||||
M: persistent-vector pprint-delims drop \ PV{ \ } ;
|
||||
M: persistent-vector >pprint-sequence ;
|
||||
|
|
|
@ -96,12 +96,12 @@ ARTICLE: "prettyprint-literal" "Literal prettyprinting protocol"
|
|||
{ $code
|
||||
"TUPLE: rect w h ;"
|
||||
""
|
||||
": RECT["
|
||||
"SYNTAX: RECT["
|
||||
" scan-word"
|
||||
" scan-word \\ * assert="
|
||||
" scan-word"
|
||||
" scan-word \\ ] assert="
|
||||
" <rect> parsed ; parsing"
|
||||
" <rect> parsed ;"
|
||||
}
|
||||
"An example literal might be:"
|
||||
{ $code "RECT[ 100 * 200 ]" }
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.markup help.syntax strings ;
|
||||
IN: quoting
|
||||
|
||||
HELP: quote?
|
||||
{ $values
|
||||
{ "ch" "a character" }
|
||||
{ "?" "a boolean" }
|
||||
}
|
||||
{ $description "Returns true if the character is a single or double quote." } ;
|
||||
|
||||
HELP: quoted?
|
||||
{ $values
|
||||
{ "str" string }
|
||||
{ "?" "a boolean" }
|
||||
}
|
||||
{ $description "Returns true if a string is surrounded by matching single or double quotes as the first and last characters." } ;
|
||||
|
||||
HELP: unquote
|
||||
{ $values
|
||||
{ "str" string }
|
||||
{ "newstr" string }
|
||||
}
|
||||
{ $description "Removes a pair of matching single or double quotes from a string." } ;
|
||||
|
||||
ARTICLE: "quoting" "Quotation marks"
|
||||
"The " { $vocab-link "quoting" } " vocabulary is for removing quotes from a string." $nl
|
||||
"Removing quotes:"
|
||||
{ $subsection unquote } ;
|
||||
|
||||
ABOUT: "quoting"
|
|
@ -1,10 +0,0 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: tools.test quoting ;
|
||||
IN: quoting.tests
|
||||
|
||||
|
||||
[ "abc" ] [ "'abc'" unquote ] unit-test
|
||||
[ "abc" ] [ "\"abc\"" unquote ] unit-test
|
||||
[ "'abc" ] [ "'abc" unquote ] unit-test
|
||||
[ "abc'" ] [ "abc'" unquote ] unit-test
|
|
@ -1,6 +1,6 @@
|
|||
! Copyright (C) 2009 Doug Coleman.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: combinators.short-circuit kernel math sequences strings ;
|
||||
USING: sequences math kernel strings combinators.short-circuit ;
|
||||
IN: quoting
|
||||
|
||||
: quote? ( ch -- ? ) "'\"" member? ;
|
||||
|
@ -13,4 +13,4 @@ IN: quoting
|
|||
} 1&& ;
|
||||
|
||||
: unquote ( str -- newstr )
|
||||
dup quoted? [ but-last-slice rest-slice >string ] when ;
|
||||
dup quoted? [ but-last-slice rest-slice >string ] when ;
|
|
@ -1 +1,2 @@
|
|||
Doug Coleman
|
||||
Daniel Ehrenberg
|
||||
|
|
|
@ -108,21 +108,24 @@ M: terminator-class class-member? ( obj class -- ? )
|
|||
|
||||
M: f class-member? 2drop f ;
|
||||
|
||||
: same? ( obj1 obj2 quot1: ( obj1 -- val1 ) quot2: ( obj2 -- val2 ) -- ? )
|
||||
bi* = ; inline
|
||||
|
||||
M: script-class class-member?
|
||||
[ script-of ] [ script>> ] bi* = ;
|
||||
[ script-of ] [ script>> ] same? ;
|
||||
|
||||
M: category-class class-member?
|
||||
[ category# ] [ category>> ] bi* = ;
|
||||
[ category ] [ category>> ] same? ;
|
||||
|
||||
M: category-range-class class-member?
|
||||
[ category first ] [ category>> ] bi* = ;
|
||||
[ category first ] [ category>> ] same? ;
|
||||
|
||||
TUPLE: not-class class ;
|
||||
|
||||
PREDICATE: not-integer < not-class class>> integer? ;
|
||||
|
||||
UNION: simple-class
|
||||
primitive-class range-class category-class category-range-class dot ;
|
||||
primitive-class range-class dot ;
|
||||
PREDICATE: not-simple < not-class class>> simple-class? ;
|
||||
|
||||
M: not-class class-member?
|
||||
|
@ -227,7 +230,10 @@ TUPLE: class-partition integers not-integers simples not-simples and or other ;
|
|||
dup or-class flatten partition-classes
|
||||
dup not-integers>> length {
|
||||
{ 0 [ nip make-or-class ] }
|
||||
{ 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] }
|
||||
{ 1 [
|
||||
not-integers>> first
|
||||
[ class>> '[ _ swap class-member? ] any? ] keep or
|
||||
] }
|
||||
[ 3drop t ]
|
||||
} case ;
|
||||
|
||||
|
@ -248,6 +254,12 @@ M: or-class <not-class>
|
|||
M: t <not-class> drop f ;
|
||||
M: f <not-class> drop t ;
|
||||
|
||||
: <minus-class> ( a b -- a-b )
|
||||
<not-class> 2array <and-class> ;
|
||||
|
||||
: <sym-diff-class> ( a b -- a~b )
|
||||
2array [ <or-class> ] [ <and-class> ] bi <minus-class> ;
|
||||
|
||||
M: primitive-class class-member?
|
||||
class>> class-member? ;
|
||||
|
||||
|
|
|
@ -11,7 +11,7 @@ IN: regexp.parser.tests
|
|||
"a|b" "a.b" "a|b|c" "abc|b" "a|bcd" "a|(b)" "(?-i:a)" "||"
|
||||
"(a)|b" "(a|b)" "((a)|(b))" "(?:a)" "(?i:a)" "|b" "b|"
|
||||
"[abc]" "[a-c]" "[^a-c]" "[^]]" "[]a]" "[[]" "[]-a]" "[a-]" "[-]"
|
||||
"[--a]" "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
|
||||
"foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
|
||||
"(foo){2}" "{2,3}" "{," "{,}" "}" "foo}" "[^]-a]" "[^-]a]"
|
||||
"[a-]" "[^a-]" "[^a-]" "a{,2}" "(?#foobar)"
|
||||
"\\p{Space}" "\\t" "\\[" "[\\]]" "\\P{Space}"
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: peg.ebnf kernel math.parser sequences assocs arrays fry math
|
||||
combinators regexp.classes strings splitting peg locals accessors
|
||||
regexp.ast unicode.case ;
|
||||
regexp.ast unicode.case unicode.script.private unicode.categories
|
||||
memoize interval-maps sets unicode.data combinators.short-circuit ;
|
||||
IN: regexp.parser
|
||||
|
||||
: allowed-char? ( ch -- ? )
|
||||
|
@ -18,15 +19,41 @@ ERROR: bad-number ;
|
|||
|
||||
ERROR: bad-class name ;
|
||||
|
||||
: simple ( str -- simple )
|
||||
! Alternatively, first collation key level?
|
||||
>case-fold [ " \t_" member? not ] filter ;
|
||||
|
||||
: simple-table ( seq -- table )
|
||||
[ [ simple ] keep ] H{ } map>assoc ;
|
||||
|
||||
MEMO: simple-script-table ( -- table )
|
||||
script-table interval-values prune simple-table ;
|
||||
|
||||
MEMO: simple-category-table ( -- table )
|
||||
categories simple-table ;
|
||||
|
||||
: parse-unicode-class ( name -- class )
|
||||
! Implement this!
|
||||
drop f ;
|
||||
{
|
||||
{ [ dup { [ length 1 = ] [ first "clmnpsz" member? ] } 1&& ] [
|
||||
>upper first
|
||||
<category-range-class>
|
||||
] }
|
||||
{ [ dup >title categories member? ] [
|
||||
simple-category-table at <category-class>
|
||||
] }
|
||||
{ [ "script=" ?head ] [
|
||||
dup simple-script-table at
|
||||
[ <script-class> ]
|
||||
[ "script=" prepend bad-class ] ?if
|
||||
] }
|
||||
[ bad-class ]
|
||||
} cond ;
|
||||
|
||||
: unicode-class ( name -- class )
|
||||
dup parse-unicode-class [ ] [ bad-class ] ?if ;
|
||||
|
||||
: name>class ( name -- class )
|
||||
>string >case-fold {
|
||||
>string simple {
|
||||
{ "lower" letter-class }
|
||||
{ "upper" LETTER-class }
|
||||
{ "alpha" Letter-class }
|
||||
|
@ -121,19 +148,29 @@ Character = EscapeSequence
|
|||
| "^" => [[ ^ <tagged-epsilon> ]]
|
||||
| . ?[ allowed-char? ]?
|
||||
|
||||
AnyRangeCharacter = EscapeSequence | .
|
||||
AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .)
|
||||
|
||||
RangeCharacter = !("]") AnyRangeCharacter
|
||||
|
||||
Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
|
||||
Range = RangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
|
||||
| RangeCharacter
|
||||
|
||||
StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
|
||||
StartRange = AnyRangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
|
||||
| AnyRangeCharacter
|
||||
|
||||
Ranges = StartRange:s Range*:r => [[ r s prefix ]]
|
||||
|
||||
CharClass = "^"?:n Ranges:e => [[ e n char-class ]]
|
||||
BasicCharClass = "^"?:n Ranges:e => [[ e n char-class ]]
|
||||
|
||||
CharClass = BasicCharClass:b "&&" CharClass:c
|
||||
=> [[ b c 2array <and-class> ]]
|
||||
| BasicCharClass:b "||" CharClass:c
|
||||
=> [[ b c 2array <or-class> ]]
|
||||
| BasicCharClass:b "~~" CharClass:c
|
||||
=> [[ b c <sym-diff-class> ]]
|
||||
| BasicCharClass:b "--" CharClass:c
|
||||
=> [[ b c <minus-class> ]]
|
||||
| BasicCharClass
|
||||
|
||||
Options = [idmsux]*
|
||||
|
||||
|
|
|
@ -45,11 +45,11 @@ ARTICLE: { "regexp" "construction" } "Constructing regular expressions"
|
|||
ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
|
||||
"Regexp syntax is largely compatible with Perl, Java and extended POSIX regexps, but not completely. Below, the syntax is documented."
|
||||
{ $heading "Characters" }
|
||||
"At its core, regular expressions consist of character literals. For example, " { $snippet "R/ f/" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } "for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
|
||||
"At its core, regular expressions consist of character literals. For example, " { $snippet "R/ f/" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } " for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
|
||||
{ $heading "Concatenation, alternation and grouping" }
|
||||
"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R/ ab/" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for gropuing. So " { $snippet "R/ f(oo|ar)/" } " would match either 'foo' or 'far'."
|
||||
"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R/ ab/" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for grouping. So " { $snippet "R/ f(oo|ar)/" } " would match either 'foo' or 'far'."
|
||||
{ $heading "Character classes" }
|
||||
"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a carat, as in " { $snippet "[^a]" } " which matches all characters which are not a."
|
||||
"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a caret, as in " { $snippet "[^a]" } " which matches all characters which are not a."
|
||||
{ $heading "Predefined character classes" }
|
||||
"Several character classes are predefined, both for convenience and because they are too large to represent directly. In Factor regular expressions, all character classes are Unicode-aware."
|
||||
{ $table
|
||||
|
@ -72,8 +72,12 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
|
|||
{ { $snippet "\\p{blank}" } "Non-newline whitespace" }
|
||||
{ { $snippet "\\p{cntrl}" } "Control character" }
|
||||
{ { $snippet "\\p{space}" } "Whitespace" }
|
||||
{ { $snippet "\\p{xdigit}" } "Hexidecimal digit" } } ! In the future: Unicode
|
||||
"Full unicode properties are not yet supported."
|
||||
{ { $snippet "\\p{xdigit}" } "Hexadecimal digit" }
|
||||
{ { $snippet "\\p{Nd}" } "Character in Unicode category Nd" }
|
||||
{ { $snippet "\\p{Z}" } "Character in Unicode category beginning with Z" }
|
||||
{ { $snippet "\\p{script=Cham}" } "Character in the Cham writing system" } }
|
||||
{ $heading "Character class operations" }
|
||||
"Character classes can be composed using four binary operations: " { $snippet "|| && ~~ --" } ". These do the operations union, intersection, symmetric difference and difference, respectively. For example, characters which are lower-case but not Latin script could be matched as " { $snippet "[\\p{lower}--\\p{script=latin}]" } ". These operations are right-associative, and " { $snippet "^" } " binds tighter than them. There is no syntax for grouping."
|
||||
{ $heading "Boundaries" }
|
||||
"Special operators exist to match certain points in the string. These are called 'zero-width' because they do not consume any characters."
|
||||
{ $table
|
||||
|
@ -105,9 +109,18 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
|
|||
{ $heading "Quotation" }
|
||||
"To make it convenient to have a long string which uses regexp operators, a special syntax is provided. If a substring begins with " { $snippet "\\Q" } " then everything until " { $snippet "\\E" } " is quoted (escaped). For example, " { $snippet "R/ \\Qfoo\\bar|baz()\\E/" } " matches exactly the string " { $snippet "\"foo\\bar|baz()\"" } "."
|
||||
{ $heading "Unsupported features" }
|
||||
"One missing feature is backreferences. This is because of a design decision to allow only regular expressions following the formal theory of regular languages. For more information, see " { $link { "regexp" "theory" } } ". You can create a new regular expression to match a particular string using " { $vocab-link "regexp.combinators" } " and group capture is available to extract parts of a regular expression match." $nl
|
||||
"Another feature is Perl's " { $snippet "\\G" } " syntax, which references the previous match, is not included. This is because that sequence is inherently stateful, and Factor regexps don't hold state." $nl
|
||||
"None of the operations which embed code into a regexp are supported, as this would require the inclusion of the Factor parser and compiler in any application which wants to expose regexps to the user. None of the casing operations are included of Perl like \\L, for simplicity." ; ! Also describe syntax, from the beginning
|
||||
{ $subheading "Group capture" }
|
||||
{ $subheading "Reluctant and posessive quantifiers" }
|
||||
{ $subheading "Backreferences" }
|
||||
"Backreferences were omitted because of a design decision to allow only regular expressions following the formal theory of regular languages. For more information, see " { $link { "regexp" "theory" } } "."
|
||||
$nl
|
||||
"To work around the lack of backreferences, consider using group capture and then creating a new regular expression to match the captured string using " { $vocab-link "regexp.combinators" } "."
|
||||
{ $subheading "Previous match" }
|
||||
"Another feature that is not included is Perl's " { $snippet "\\G" } " syntax, which references the previous match. This is because that sequence is inherently stateful, and Factor regexps don't hold state."
|
||||
{ $subheading "Embedding code" }
|
||||
"Operations which embed code into a regexp are not supported. This would require the inclusion of the Factor parser and compiler in any deployed application which wants to expose regexps to the user, leading to an undesirable increase in the code size."
|
||||
{ $heading "Casing operations" }
|
||||
"No special casing operations are included, for example Perl's " { $snippet "\\L" } "." ;
|
||||
|
||||
ARTICLE: { "regexp" "options" } "Regular expression options"
|
||||
"When " { $link { "regexp" "construction" } } ", various options can be provided. Options have single-character names. A string of options has one of the following two forms:"
|
||||
|
@ -150,7 +163,7 @@ ARTICLE: { "regexp" "theory" } "The theory of regular expressions"
|
|||
"This implies, by DeMorgan's law, that, if you have two regular languages, their intersection is also regular. That is, for any two regular expressions, there exists a regular expression which matches strings that match both inputs." $nl
|
||||
"Traditionally, regular expressions on computer support an additional operation: backreferences. For example, the Perl regexp " { $snippet "/(.*)$1/" } " matches a string repated twice. If a backreference refers to a string with a predetermined maximum length, then the resulting language is still regular." $nl
|
||||
"But, if not, the language is not regular. There is strong evidence that there is no efficient way to parse with backreferences in the general case. Perl uses a naive backtracking algorithm which has pathological behavior in some cases, taking exponential time to match even if backreferences aren't used. Additionally, expressions with backreferences don't have the properties with negation and intersection described above." $nl
|
||||
"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex also use this algorithm, but in the Factor regular expression engine, all other features of regexps are still present." ;
|
||||
"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex use the same algorithm." ;
|
||||
|
||||
ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions"
|
||||
"Testing if a string matches a regular expression:"
|
||||
|
|
|
@ -480,3 +480,57 @@ IN: regexp-tests
|
|||
[ f ] [ "a\r" R/ a$./mds matches? ] unit-test
|
||||
[ t ] [ "a\n" R/ a$./ms matches? ] unit-test
|
||||
[ t ] [ "a\n" R/ a$./mds matches? ] unit-test
|
||||
|
||||
! Unicode categories
|
||||
[ t ] [ "a" R/ \p{L}/ matches? ] unit-test
|
||||
[ t ] [ "A" R/ \p{L}/ matches? ] unit-test
|
||||
[ f ] [ " " R/ \p{L}/ matches? ] unit-test
|
||||
[ f ] [ "a" R/ \P{L}/ matches? ] unit-test
|
||||
[ f ] [ "A" R/ \P{L}/ matches? ] unit-test
|
||||
[ t ] [ " " R/ \P{L}/ matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" R/ \p{Ll}/ matches? ] unit-test
|
||||
[ f ] [ "A" R/ \p{Ll}/ matches? ] unit-test
|
||||
[ f ] [ " " R/ \p{Ll}/ matches? ] unit-test
|
||||
[ f ] [ "a" R/ \P{Ll}/ matches? ] unit-test
|
||||
[ t ] [ "A" R/ \P{Ll}/ matches? ] unit-test
|
||||
[ t ] [ " " R/ \P{Ll}/ matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" R/ \p{script=Latin}/ matches? ] unit-test
|
||||
[ f ] [ " " R/ \p{script=Latin}/ matches? ] unit-test
|
||||
[ f ] [ "a" R/ \P{script=Latin}/ matches? ] unit-test
|
||||
[ t ] [ " " R/ \P{script=Latin}/ matches? ] unit-test
|
||||
|
||||
! These should be case-insensitive
|
||||
[ f ] [ " " R/ \p{l}/ matches? ] unit-test
|
||||
[ f ] [ "a" R/ \P{l}/ matches? ] unit-test
|
||||
[ f ] [ "a" R/ \P{ll}/ matches? ] unit-test
|
||||
[ t ] [ " " R/ \P{LL}/ matches? ] unit-test
|
||||
[ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
|
||||
[ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
|
||||
|
||||
! Logical operators
|
||||
[ t ] [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
|
||||
|
||||
[ t ] [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
|
||||
|
||||
[ f ] [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
|
||||
|
||||
[ f ] [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
[ t ] [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
[ f ] [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
|
||||
|
|
|
@ -204,17 +204,17 @@ PRIVATE>
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: R! CHAR: ! parsing-regexp ; parsing
|
||||
: R" CHAR: " parsing-regexp ; parsing
|
||||
: R# CHAR: # parsing-regexp ; parsing
|
||||
: R' CHAR: ' parsing-regexp ; parsing
|
||||
: R( CHAR: ) parsing-regexp ; parsing
|
||||
: R/ CHAR: / parsing-regexp ; parsing
|
||||
: R@ CHAR: @ parsing-regexp ; parsing
|
||||
: R[ CHAR: ] parsing-regexp ; parsing
|
||||
: R` CHAR: ` parsing-regexp ; parsing
|
||||
: R{ CHAR: } parsing-regexp ; parsing
|
||||
: R| CHAR: | parsing-regexp ; parsing
|
||||
SYNTAX: R! CHAR: ! parsing-regexp ;
|
||||
SYNTAX: R" CHAR: " parsing-regexp ;
|
||||
SYNTAX: R# CHAR: # parsing-regexp ;
|
||||
SYNTAX: R' CHAR: ' parsing-regexp ;
|
||||
SYNTAX: R( CHAR: ) parsing-regexp ;
|
||||
SYNTAX: R/ CHAR: / parsing-regexp ;
|
||||
SYNTAX: R@ CHAR: @ parsing-regexp ;
|
||||
SYNTAX: R[ CHAR: ] parsing-regexp ;
|
||||
SYNTAX: R` CHAR: ` parsing-regexp ;
|
||||
SYNTAX: R{ CHAR: } parsing-regexp ;
|
||||
SYNTAX: R| CHAR: | parsing-regexp ;
|
||||
|
||||
USING: vocabs vocabs.loader ;
|
||||
|
||||
|
|
|
@ -74,4 +74,4 @@ PRIVATE>
|
|||
: roman/mod ( str1 str2 -- str3 str4 )
|
||||
[ /mod ] binary-roman-op [ >roman ] dip ;
|
||||
|
||||
: ROMAN: scan roman> parsed ; parsing
|
||||
SYNTAX: ROMAN: scan roman> parsed ;
|
||||
|
|
|
@ -93,7 +93,6 @@ M: object declarations. drop ;
|
|||
|
||||
M: word declarations.
|
||||
{
|
||||
POSTPONE: parsing
|
||||
POSTPONE: delimiter
|
||||
POSTPONE: inline
|
||||
POSTPONE: recursive
|
||||
|
|
|
@ -19,8 +19,8 @@ MACRO: shuffle-effect ( effect -- )
|
|||
[ [ at \ swap \ nth [ ] 3sequence ] curry map , \ cleave , ] 2bi
|
||||
] [ ] make ;
|
||||
|
||||
: shuffle(
|
||||
")" parse-effect parsed \ shuffle-effect parsed ; parsing
|
||||
SYNTAX: shuffle(
|
||||
")" parse-effect parsed \ shuffle-effect parsed ;
|
||||
|
||||
: 2swap ( x y z t -- z t x y ) 2 2 mnswap ; inline
|
||||
|
||||
|
|
|
@ -1,8 +1,24 @@
|
|||
USING: help.syntax help.markup strings ;
|
||||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.syntax help.markup strings biassocs arrays ;
|
||||
IN: simple-flat-file
|
||||
|
||||
ABOUT: "simple-flat-file"
|
||||
|
||||
ARTICLE: "simple-flat-file" "Parsing simple flat files"
|
||||
"The " { $vocab-link "simple-flat-file" } " vocabulary provides words for loading and parsing simple flat files in a particular format which is common for encoding tasks."
|
||||
{ $subsection flat-file>biassoc } ;
|
||||
"The " { $vocab-link "simple-flat-file" } " vocabulary provides words for loading and parsing simple flat files in a particular format which is common for encoding and Unicode tasks."
|
||||
{ $subsection flat-file>biassoc }
|
||||
{ $subsection load-interval-file }
|
||||
{ $subsection data } ;
|
||||
|
||||
HELP: load-interval-file
|
||||
{ $values { "filename" string } { "table" "an interval map" } }
|
||||
{ $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ;
|
||||
|
||||
HELP: data
|
||||
{ $values { "filename" string } { "data" array } }
|
||||
{ $description "This loads a file that's delineated by semicolons and lines, returning an array of lines, where each line is an array split by the semicolons, with whitespace trimmed off." } ;
|
||||
|
||||
HELP: flat-file>biassoc
|
||||
{ $values { "filename" string } { "biassoc" biassoc } }
|
||||
{ $description "This loads a flat file, in the form that many encoding resource files are in, with two columns of numeric data in hex, and returns a biassoc associating them." } ;
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: sequences splitting kernel math.parser io.files io.encodings.utf8
|
||||
biassocs ascii ;
|
||||
biassocs ascii namespaces arrays make assocs interval-maps sets ;
|
||||
IN: simple-flat-file
|
||||
|
||||
: drop-comments ( seq -- newseq )
|
||||
|
@ -30,3 +30,25 @@ IN: simple-flat-file
|
|||
|
||||
: data ( filename -- data )
|
||||
utf8 file-lines drop-comments [ split-; ] map ;
|
||||
|
||||
SYMBOL: interned
|
||||
|
||||
: range, ( value key -- )
|
||||
swap interned get
|
||||
[ = ] with find nip 2array , ;
|
||||
|
||||
: expand-ranges ( assoc -- interval-map )
|
||||
[
|
||||
[
|
||||
swap CHAR: . over member? [
|
||||
".." split1 [ hex> ] bi@ 2array
|
||||
] [ hex> ] if range,
|
||||
] assoc-each
|
||||
] { } make <interval-map> ;
|
||||
|
||||
: process-interval-file ( ranges -- table )
|
||||
dup values prune interned
|
||||
[ expand-ranges ] with-variable ;
|
||||
|
||||
: load-interval-file ( filename -- table )
|
||||
data process-interval-file ;
|
||||
|
|
|
@ -70,7 +70,7 @@ M: A >pprint-sequence ;
|
|||
|
||||
M: A pprint* pprint-object ;
|
||||
|
||||
: A{ \ } [ >A ] parse-literal ; parsing
|
||||
SYNTAX: A{ \ } [ >A ] parse-literal ;
|
||||
|
||||
INSTANCE: A sequence
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ M: V >pprint-sequence ;
|
|||
|
||||
M: V pprint* pprint-object ;
|
||||
|
||||
: V{ \ } [ >V ] parse-literal ; parsing
|
||||
SYNTAX: V{ \ } [ >V ] parse-literal ;
|
||||
|
||||
INSTANCE: V growable
|
||||
|
||||
|
|
|
@ -65,4 +65,9 @@ DEFER: curry-folding-test ( quot -- )
|
|||
|
||||
{ 3 0 } [ [ 1 2 3 ] curry-folding-test ] must-infer-as
|
||||
{ 3 0 } [ 1 [ 2 3 ] curry curry-folding-test ] must-infer-as
|
||||
{ 3 0 } [ [ 1 2 ] 3 [ ] curry compose curry-folding-test ] must-infer-as
|
||||
{ 3 0 } [ [ 1 2 ] 3 [ ] curry compose curry-folding-test ] must-infer-as
|
||||
|
||||
: member?-test ( a -- ? ) { 1 2 3 10 7 58 } member? ;
|
||||
|
||||
[ f ] [ 1.0 member?-test ] unit-test
|
||||
[ t ] [ \ member?-test def>> first [ member?-test ] all? ] unit-test
|
|
@ -1,12 +1,12 @@
|
|||
! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: fry accessors arrays kernel kernel.private combinators.private
|
||||
words sequences generic math namespaces make quotations assocs
|
||||
combinators classes.tuple classes.tuple.private effects summary
|
||||
hashtables classes generic sets definitions generic.standard
|
||||
slots.private continuations locals generalizations
|
||||
stack-checker.backend stack-checker.state stack-checker.visitor
|
||||
stack-checker.errors stack-checker.values
|
||||
words sequences generic math math.order namespaces make quotations assocs
|
||||
combinators combinators.short-circuit classes.tuple
|
||||
classes.tuple.private effects summary hashtables classes generic sets
|
||||
definitions generic.standard slots.private continuations locals
|
||||
generalizations stack-checker.backend stack-checker.state
|
||||
stack-checker.visitor stack-checker.errors stack-checker.values
|
||||
stack-checker.recursive-state ;
|
||||
IN: stack-checker.transforms
|
||||
|
||||
|
@ -107,36 +107,28 @@ IN: stack-checker.transforms
|
|||
] 1 define-transform
|
||||
|
||||
! Membership testing
|
||||
CONSTANT: bit-member-n 256
|
||||
CONSTANT: bit-member-max 256
|
||||
|
||||
: bit-member? ( seq -- ? )
|
||||
#! Can we use a fast byte array test here?
|
||||
{
|
||||
{ [ dup length 8 < ] [ f ] }
|
||||
{ [ dup [ integer? not ] any? ] [ f ] }
|
||||
{ [ dup [ 0 < ] any? ] [ f ] }
|
||||
{ [ dup [ bit-member-n >= ] any? ] [ f ] }
|
||||
[ t ]
|
||||
} cond nip ;
|
||||
[ length 4 > ]
|
||||
[ [ integer? ] all? ]
|
||||
[ [ 0 bit-member-max between? ] any? ]
|
||||
} 1&& ;
|
||||
|
||||
: bit-member-seq ( seq -- flags )
|
||||
bit-member-n swap [ member? 1 0 ? ] curry B{ } map-as ;
|
||||
|
||||
: exact-float? ( f -- ? )
|
||||
dup float? [ dup >integer >float = ] [ drop f ] if ; inline
|
||||
[ supremum 1+ ] keep '[ _ member? 1 0 ? ] B{ } map-as ;
|
||||
|
||||
: bit-member-quot ( seq -- newquot )
|
||||
[
|
||||
bit-member-seq ,
|
||||
[
|
||||
{
|
||||
{ [ over fixnum? ] [ ?nth 1 eq? ] }
|
||||
{ [ over bignum? ] [ ?nth 1 eq? ] }
|
||||
{ [ over exact-float? ] [ ?nth 1 eq? ] }
|
||||
[ 2drop f ]
|
||||
} cond
|
||||
] %
|
||||
] [ ] make ;
|
||||
bit-member-seq
|
||||
'[
|
||||
_ {
|
||||
{ [ over fixnum? ] [ ?nth 1 eq? ] }
|
||||
{ [ over bignum? ] [ ?nth 1 eq? ] }
|
||||
[ 2drop f ]
|
||||
} cond
|
||||
] ;
|
||||
|
||||
: member-quot ( seq -- newquot )
|
||||
dup bit-member? [
|
||||
|
|
|
@ -32,7 +32,7 @@ PRIVATE>
|
|||
: >suffix-array ( seq -- array )
|
||||
[ suffixes ] map concat natural-sort ;
|
||||
|
||||
: SA{ \ } [ >suffix-array ] parse-literal ; parsing
|
||||
SYNTAX: SA{ \ } [ >suffix-array ] parse-literal ;
|
||||
|
||||
: query ( begin suffix-array -- matches )
|
||||
2dup find-index dup
|
||||
|
|
|
@ -18,7 +18,7 @@ M: word disassemble word-xt 2array disassemble ;
|
|||
|
||||
M: method-spec disassemble first2 method disassemble ;
|
||||
|
||||
cpu x86? os unix? and
|
||||
cpu x86?
|
||||
"tools.disassembler.udis"
|
||||
"tools.disassembler.gdb" ?
|
||||
require
|
||||
|
|
|
@ -30,9 +30,9 @@ CONSTANT: UD_VENDOR_AMD 0
|
|||
CONSTANT: UD_VENDOR_INTEL 1
|
||||
|
||||
FUNCTION: void ud_init ( ud* u ) ;
|
||||
FUNCTION: void ud_set_mode ( ud* u, uint8_t mode ) ;
|
||||
FUNCTION: void ud_set_mode ( ud* u, uchar mode ) ;
|
||||
FUNCTION: void ud_set_pc ( ud* u, ulonglong pc ) ;
|
||||
FUNCTION: void ud_set_input_buffer ( ud* u, uint8_t* offset, size_t size ) ;
|
||||
FUNCTION: void ud_set_input_buffer ( ud* u, uchar* offset, size_t size ) ;
|
||||
FUNCTION: void ud_set_vendor ( ud* u, uint vendor ) ;
|
||||
FUNCTION: void ud_set_syntax ( ud* u, void* syntax ) ;
|
||||
FUNCTION: void ud_input_skip ( ud* u, size_t size ) ;
|
||||
|
|
|
@ -40,10 +40,9 @@ M: bad-tr summary
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: TR:
|
||||
SYNTAX: TR:
|
||||
scan parse-definition
|
||||
unclip-last [ unclip-last ] dip compute-tr
|
||||
[ check-tr ]
|
||||
[ [ create-tr ] dip define-tr ]
|
||||
[ [ "-fast" append create-tr ] dip define-fast-tr ] 2tri ;
|
||||
parsing
|
||||
|
|
|
@ -4,8 +4,9 @@ USING: combinators.short-circuit unicode.categories kernel math
|
|||
combinators splitting sequences math.parser io.files io assocs
|
||||
arrays namespaces make math.ranges unicode.normalize
|
||||
unicode.normalize.private values io.encodings.ascii
|
||||
unicode.syntax unicode.data compiler.units fry
|
||||
alien.syntax sets accessors interval-maps memoize locals words ;
|
||||
unicode.data compiler.units fry unicode.categories.syntax
|
||||
alien.syntax sets accessors interval-maps memoize locals words
|
||||
simple-flat-file ;
|
||||
IN: unicode.breaks
|
||||
|
||||
<PRIVATE
|
||||
|
@ -31,9 +32,9 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
|
|||
[ drop Control ]
|
||||
} case ;
|
||||
|
||||
CATEGORY: (extend) Me Mn ;
|
||||
: extend? ( ch -- ? )
|
||||
{ [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
|
||||
CATEGORY: extend
|
||||
Me Mn |
|
||||
"Other_Grapheme_Extend" property? ;
|
||||
|
||||
: loe? ( ch -- ? )
|
||||
"Logical_Order_Exception" property? ;
|
||||
|
@ -127,7 +128,7 @@ to: grapheme-table
|
|||
|
||||
VALUE: word-break-table
|
||||
|
||||
"vocab:unicode/data/WordBreakProperty.txt" load-key-value
|
||||
"vocab:unicode/data/WordBreakProperty.txt" load-interval-file
|
||||
to: word-break-table
|
||||
|
||||
C-ENUM: wOther wCR wLF wNewline wExtend wFormat wKatakana wALetter wMidLetter
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: unicode.data sequences namespaces
|
||||
sbufs make unicode.syntax unicode.normalize math hints
|
||||
unicode.categories combinators unicode.syntax assocs combinators.short-circuit
|
||||
sbufs make unicode.normalize math hints
|
||||
unicode.categories combinators assocs combinators.short-circuit
|
||||
strings splitting kernel accessors unicode.breaks fry locals ;
|
||||
QUALIFIED: ascii
|
||||
IN: unicode.case
|
||||
|
|
|
@ -12,6 +12,9 @@ HELP: Letter
|
|||
HELP: alpha
|
||||
{ $class-description "The class of alphanumeric characters." } ;
|
||||
|
||||
HELP: math
|
||||
{ $class-description "The class of Unicode math characters." } ;
|
||||
|
||||
HELP: blank
|
||||
{ $class-description "The class of whitespace characters." } ;
|
||||
|
||||
|
@ -54,6 +57,8 @@ ARTICLE: "unicode.categories" "Character classes"
|
|||
{ $subsection uncased }
|
||||
{ $subsection uncased? }
|
||||
{ $subsection character }
|
||||
{ $subsection character? } ;
|
||||
{ $subsection character? }
|
||||
{ $subsection math }
|
||||
{ $subsection math? } ;
|
||||
|
||||
ABOUT: "unicode.categories"
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: unicode.syntax ;
|
||||
USING: unicode.categories.syntax sequences unicode.data ;
|
||||
IN: unicode.categories
|
||||
|
||||
CATEGORY: blank Zs Zl Zp \r\n ;
|
||||
CATEGORY: letter Ll ;
|
||||
CATEGORY: LETTER Lu ;
|
||||
CATEGORY: Letter Lu Ll Lt Lm Lo ;
|
||||
CATEGORY: blank Zs Zl Zp | "\r\n" member? ;
|
||||
CATEGORY: letter Ll | "Other_Lowercase" property? ;
|
||||
CATEGORY: LETTER Lu | "Other_Uppercase" property? ;
|
||||
CATEGORY: Letter Lu Ll Lt Lm Lo Nl ;
|
||||
CATEGORY: digit Nd Nl No ;
|
||||
CATEGORY-NOT: printable Cc Cf Cs Co Cn ;
|
||||
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No ;
|
||||
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ;
|
||||
CATEGORY: control Cc ;
|
||||
CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ;
|
||||
CATEGORY-NOT: character Cn ;
|
||||
CATEGORY: math Sm | "Other_Math" property? ;
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: help.syntax help.markup ;
|
||||
IN: unicode.categories.syntax
|
||||
|
||||
ABOUT: "unicode.categories.syntax"
|
||||
|
||||
ARTICLE: "unicode.categories.syntax" "Unicode category syntax"
|
||||
"There is special syntax sugar for making predicate classes which are unions of Unicode general categories, plus some other code."
|
||||
{ $subsection POSTPONE: CATEGORY: }
|
||||
{ $subsection POSTPONE: CATEGORY-NOT: } ;
|
||||
|
||||
HELP: CATEGORY:
|
||||
{ $syntax "CATEGORY: foo Nl Pd Lu | \"Diacritic\" property? ;" }
|
||||
{ $description "This defines a predicate class which is a subset of code points. In this example, " { $snippet "foo" } " is the class of characters which are in the general category Nl or Pd or Lu, or which have the Diacritic property." } ;
|
||||
|
||||
HELP: CATEGORY-NOT:
|
||||
{ $syntax "CATEGORY-NOT: foo Nl Pd Lu | \"Diacritic\" property? ;" }
|
||||
{ $description "This defines a predicate class which is a subset of code points, the complement of what " { $link POSTPONE: CATEGORY: } " would define. In this example, " { $snippet "foo" } " is the class of characters which are neither in the general category Nl or Pd or Lu, nor have the Diacritic property." } ;
|
|
@ -0,0 +1,3 @@
|
|||
! Copyright (C) 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: unicode.data kernel math sequences parser unicode.data.private
|
||||
bit-arrays namespaces sequences.private arrays classes.parser
|
||||
assocs classes.predicate sets fry splitting accessors ;
|
||||
IN: unicode.categories.syntax
|
||||
|
||||
! For use in CATEGORY:
|
||||
SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co | ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: [category] ( categories code -- quot )
|
||||
'[ dup category# _ member? [ drop t ] _ if ] ;
|
||||
|
||||
: integer-predicate-class ( word predicate -- )
|
||||
integer swap define-predicate-class ;
|
||||
|
||||
: define-category ( word categories code -- )
|
||||
[category] integer-predicate-class ;
|
||||
|
||||
: define-not-category ( word categories code -- )
|
||||
[category] [ not ] compose integer-predicate-class ;
|
||||
|
||||
: parse-category ( -- word tokens quot )
|
||||
CREATE-CLASS \ ; parse-until { | } split1
|
||||
[ [ name>> categories-map at ] map ]
|
||||
[ [ [ ] like ] [ [ drop f ] ] if* ] bi* ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
SYNTAX: CATEGORY: parse-category define-category ;
|
||||
|
||||
SYNTAX: CATEGORY-NOT: parse-category define-not-category ;
|
|
@ -4,7 +4,7 @@ USING: combinators.short-circuit sequences io.files
|
|||
io.encodings.ascii kernel values splitting accessors math.parser
|
||||
ascii io assocs strings math namespaces make sorting combinators
|
||||
math.order arrays unicode.normalize unicode.data locals
|
||||
unicode.syntax macros sequences.deep words unicode.breaks
|
||||
macros sequences.deep words unicode.breaks
|
||||
quotations combinators.short-circuit simple-flat-file ;
|
||||
IN: unicode.collation
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ IN: unicode.data
|
|||
ABOUT: "unicode.data"
|
||||
|
||||
ARTICLE: "unicode.data" "Unicode data tables"
|
||||
"The " { $vocab-link "unicode.data" "unicode.data" } " vocabulary contains core Unicode data tables and code for parsing this from files."
|
||||
"The " { $vocab-link "unicode.data" "unicode.data" } " vocabulary contains core Unicode data tables and code for parsing this from files. The following words access these data tables."
|
||||
{ $subsection canonical-entry }
|
||||
{ $subsection combine-chars }
|
||||
{ $subsection combining-class }
|
||||
|
@ -14,7 +14,11 @@ ARTICLE: "unicode.data" "Unicode data tables"
|
|||
{ $subsection name>char }
|
||||
{ $subsection char>name }
|
||||
{ $subsection property? }
|
||||
{ $subsection load-key-value } ;
|
||||
{ $subsection category }
|
||||
{ $subsection ch>upper }
|
||||
{ $subsection ch>lower }
|
||||
{ $subsection ch>title }
|
||||
{ $subsection special-case } ;
|
||||
|
||||
HELP: canonical-entry
|
||||
{ $values { "char" "a code point" } { "seq" string } }
|
||||
|
@ -48,6 +52,22 @@ HELP: property?
|
|||
{ $values { "char" "a code point" } { "property" string } { "?" "a boolean" } }
|
||||
{ $description "Tests whether the code point is listed under the given property in PropList.txt in the Unicode Character Database." } ;
|
||||
|
||||
HELP: load-key-value
|
||||
{ $values { "filename" string } { "table" "an interval map" } }
|
||||
{ $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ;
|
||||
HELP: category
|
||||
{ $values { "char" "a code point" } { "category" string } }
|
||||
{ $description "Returns the general category of a code point, in the form of a string. This will always be a string within the ASCII range of length two. If the code point is unassigned, then it returns " { $snippet "Cn" } "." } ;
|
||||
|
||||
HELP: ch>upper
|
||||
{ $values { "ch" "a code point" } { "upper" "a code point" } }
|
||||
{ $description "Returns the simple upper-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
|
||||
|
||||
HELP: ch>lower
|
||||
{ $values { "ch" "a code point" } { "lower" "a code point" } }
|
||||
{ $description "Returns the simple lower-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
|
||||
|
||||
HELP: ch>title
|
||||
{ $values { "ch" "a code point" } { "title" "a code point" } }
|
||||
{ $description "Returns the simple title-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
|
||||
|
||||
HELP: special-case
|
||||
{ $values { "ch" "a code point" } { "casing-tuple" { "a tuple, or " { $link f } } } }
|
||||
{ $description "If a code point has special casing behavior, returns a tuple which represents that information." } ;
|
||||
|
|
|
@ -58,7 +58,7 @@ CONSTANT: num-chars HEX: 2FA1E
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: category# ( char -- category )
|
||||
: category# ( char -- n )
|
||||
! There are a few characters that should be Cn
|
||||
! that this gives Cf or Mn
|
||||
! Cf = 26; Mn = 5; Cn = 29
|
||||
|
@ -219,27 +219,3 @@ load-properties to: properties
|
|||
|
||||
[ name>char [ "Invalid character" throw ] unless* ]
|
||||
name>char-hook set-global
|
||||
|
||||
SYMBOL: interned
|
||||
|
||||
: range, ( value key -- )
|
||||
swap interned get
|
||||
[ = ] with find nip 2array , ;
|
||||
|
||||
: expand-ranges ( assoc -- interval-map )
|
||||
[
|
||||
[
|
||||
swap CHAR: . over member? [
|
||||
".." split1 [ hex> ] bi@ 2array
|
||||
] [ hex> ] if range,
|
||||
] assoc-each
|
||||
] { } make <interval-map> ;
|
||||
|
||||
: process-key-value ( ranges -- table )
|
||||
dup values prune interned
|
||||
[ expand-ranges ] with-variable ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: load-key-value ( filename -- table )
|
||||
data process-key-value ;
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: ascii sequences namespaces make unicode.data kernel math arrays
|
||||
locals sorting.insertion accessors assocs math.order combinators
|
||||
unicode.syntax strings sbufs hints combinators.short-circuit vectors ;
|
||||
strings sbufs hints combinators.short-circuit vectors ;
|
||||
IN: unicode.normalize
|
||||
|
||||
<PRIVATE
|
||||
|
|
|
@ -1,17 +1,13 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors values kernel sequences assocs io.files
|
||||
io.encodings ascii math.ranges io splitting math.parser
|
||||
namespaces make byte-arrays locals math sets io.encodings.ascii
|
||||
words words.symbol compiler.units arrays interval-maps
|
||||
unicode.data ;
|
||||
USING: values interval-maps simple-flat-file ;
|
||||
IN: unicode.script
|
||||
|
||||
<PRIVATE
|
||||
|
||||
VALUE: script-table
|
||||
|
||||
"vocab:unicode/script/Scripts.txt" load-key-value
|
||||
"vocab:unicode/script/Scripts.txt" load-interval-file
|
||||
to: script-table
|
||||
|
||||
PRIVATE>
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
! Copyright (C) 2008 Daniel Ehrenberg.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: unicode.data kernel math sequences parser lexer
|
||||
bit-arrays namespaces make sequences.private arrays quotations
|
||||
assocs classes.predicate math.order strings.parser ;
|
||||
IN: unicode.syntax
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: >category-array ( categories -- bitarray )
|
||||
categories [ swap member? ] with map >bit-array ;
|
||||
|
||||
: as-string ( strings -- bit-array )
|
||||
concat unescape-string ;
|
||||
|
||||
: [category] ( categories -- quot )
|
||||
[
|
||||
[ [ categories member? not ] filter as-string ] keep
|
||||
[ categories member? ] filter >category-array
|
||||
[ dup category# ] % , [ nth-unsafe [ drop t ] ] %
|
||||
\ member? 2array >quotation ,
|
||||
\ if ,
|
||||
] [ ] make ;
|
||||
|
||||
: define-category ( word categories -- )
|
||||
[category] integer swap define-predicate-class ;
|
||||
|
||||
PRIVATE>
|
||||
|
||||
: CATEGORY:
|
||||
CREATE ";" parse-tokens define-category ; parsing
|
||||
|
||||
: seq-minus ( seq1 seq2 -- diff )
|
||||
[ member? not ] curry filter ;
|
||||
|
||||
: CATEGORY-NOT:
|
||||
CREATE ";" parse-tokens
|
||||
categories swap seq-minus define-category ; parsing
|
|
@ -15,7 +15,7 @@ $nl
|
|||
{ $vocab-subsection "Word and grapheme breaks" "unicode.breaks" }
|
||||
{ $vocab-subsection "Unicode normalization" "unicode.normalize" }
|
||||
"The following are mostly for internal use:"
|
||||
{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
|
||||
{ $vocab-subsection "Unicode category syntax" "unicode.categories.syntax" }
|
||||
{ $vocab-subsection "Unicode data tables" "unicode.data" }
|
||||
{ $see-also "ascii" "io.encodings" } ;
|
||||
|
||||
|
|
|
@ -179,7 +179,7 @@ PRIVATE>
|
|||
dup protocol>> '[ _ protocol-port or ] change-port ;
|
||||
|
||||
! Literal syntax
|
||||
: URL" lexer get skip-blank parse-string >url parsed ; parsing
|
||||
SYNTAX: URL" lexer get skip-blank parse-string >url parsed ;
|
||||
|
||||
USING: vocabs vocabs.loader ;
|
||||
|
||||
|
|
|
@ -30,11 +30,11 @@ PREDICATE: value-word < word
|
|||
[ second \ obj>> = ]
|
||||
} 1&& ;
|
||||
|
||||
: VALUE:
|
||||
SYNTAX: VALUE:
|
||||
CREATE-WORD
|
||||
dup t "no-def-strip" set-word-prop
|
||||
T{ value-holder } clone [ obj>> ] curry
|
||||
(( -- value )) define-declared ; parsing
|
||||
(( -- value )) define-declared ;
|
||||
|
||||
M: value-word definer drop \ VALUE: f ;
|
||||
|
||||
|
@ -43,9 +43,9 @@ M: value-word definition drop f ;
|
|||
: set-value ( value word -- )
|
||||
def>> first (>>obj) ;
|
||||
|
||||
: to:
|
||||
SYNTAX: to:
|
||||
scan-word literalize parsed
|
||||
\ set-value parsed ; parsing
|
||||
\ set-value parsed ;
|
||||
|
||||
: get-value ( word -- value )
|
||||
def>> first obj>> ;
|
||||
|
|
|
@ -50,7 +50,7 @@ M: vlist like
|
|||
|
||||
INSTANCE: vlist immutable-sequence
|
||||
|
||||
: VL{ \ } [ >vlist ] parse-literal ; parsing
|
||||
SYNTAX: VL{ \ } [ >vlist ] parse-literal ;
|
||||
|
||||
M: vlist pprint-delims drop \ VL{ \ } ;
|
||||
M: vlist >pprint-sequence ;
|
||||
|
@ -87,7 +87,7 @@ M: valist assoc-like
|
|||
|
||||
INSTANCE: valist assoc
|
||||
|
||||
: VA{ \ } [ >valist ] parse-literal ; parsing
|
||||
SYNTAX: VA{ \ } [ >valist ] parse-literal ;
|
||||
|
||||
M: valist pprint-delims drop \ VA{ \ } ;
|
||||
M: valist >pprint-sequence >alist ;
|
||||
|
|
|
@ -90,14 +90,13 @@ unless
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: COM-INTERFACE:
|
||||
SYNTAX: COM-INTERFACE:
|
||||
scan
|
||||
scan find-com-interface-definition
|
||||
scan string>guid
|
||||
parse-com-functions
|
||||
<com-interface-definition>
|
||||
dup save-com-interface-definition
|
||||
define-words-for-com-interface
|
||||
; parsing
|
||||
define-words-for-com-interface ;
|
||||
|
||||
: GUID: scan string>guid parsed ; parsing
|
||||
SYNTAX: GUID: scan string>guid parsed ;
|
||||
|
|
|
@ -1,19 +1,26 @@
|
|||
! Copyright (C) 2005, 2009 Daniel Ehrenberg
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: kernel sequences unicode.syntax math math.order combinators
|
||||
hints ;
|
||||
USING: kernel sequences unicode.categories.syntax math math.order
|
||||
combinators hints combinators.short-circuit ;
|
||||
IN: xml.char-classes
|
||||
|
||||
CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
|
||||
: 1.0name-start? ( char -- ? )
|
||||
dup 1.0name-start*? [ drop t ]
|
||||
[ HEX: 2BB HEX: 2C1 between? ] if ;
|
||||
CATEGORY: 1.0name-start
|
||||
Ll Lu Lo Lt Nl | {
|
||||
[ HEX: 2BB HEX: 2C1 between? ]
|
||||
[ "\u000559\u0006E5\u0006E6_:" member? ]
|
||||
} 1|| ;
|
||||
|
||||
CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
|
||||
CATEGORY: 1.0name-char
|
||||
Ll Lu Lo Lt Nl Mc Me Mn Lm Nd |
|
||||
"_-.\u000387:" member? ;
|
||||
|
||||
CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
|
||||
CATEGORY: 1.1name-start
|
||||
Ll Lu Lo Lm Nl |
|
||||
"_:" member? ;
|
||||
|
||||
CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
|
||||
CATEGORY: 1.1name-char
|
||||
Ll Lu Lo Lm Nl Mc Mn Nd Pc Cf |
|
||||
"_-.\u0000b7:" member? ;
|
||||
|
||||
: name-start? ( 1.0? char -- ? )
|
||||
swap [ 1.0name-start? ] [ 1.1name-start? ] if ;
|
||||
|
|
|
@ -26,17 +26,17 @@ M: no-tag summary
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: TAGS:
|
||||
SYNTAX: TAGS:
|
||||
CREATE
|
||||
[ H{ } clone "xtable" set-word-prop ]
|
||||
[ define-tags ] bi ; parsing
|
||||
[ define-tags ] bi ;
|
||||
|
||||
: TAG:
|
||||
scan scan-word parse-definition define-tag ; parsing
|
||||
SYNTAX: TAG:
|
||||
scan scan-word parse-definition define-tag ;
|
||||
|
||||
: XML-NS:
|
||||
SYNTAX: XML-NS:
|
||||
CREATE-WORD (( string -- name )) over set-stack-effect
|
||||
scan '[ f swap _ <name> ] define-memoized ; parsing
|
||||
scan '[ f swap _ <name> ] define-memoized ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
|
@ -168,11 +168,11 @@ MACRO: interpolate-xml ( xml -- quot )
|
|||
|
||||
PRIVATE>
|
||||
|
||||
: <XML
|
||||
"XML>" [ string>doc ] parse-def ; parsing
|
||||
SYNTAX: <XML
|
||||
"XML>" [ string>doc ] parse-def ;
|
||||
|
||||
: [XML
|
||||
"XML]" [ string>chunk ] parse-def ; parsing
|
||||
SYNTAX: [XML
|
||||
"XML]" [ string>chunk ] parse-def ;
|
||||
|
||||
<PRIVATE
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue