Merge branch 'master' of git://factorcode.org/git/factor

db4
Joe Groff 2010-02-03 21:50:36 -08:00
commit 4ba8c6e0b5
162 changed files with 177974 additions and 157514 deletions

1
.gitignore vendored
View File

@ -8,6 +8,7 @@ Factor/factor
*.a
*.dll
*.lib
*.exp
*.res
*.image
*.dylib

View File

@ -61,7 +61,7 @@ DLL_OBJS = vm\os-windows-nt.obj \
.rs.res:
rc $<
all: factor.com factor.exe
all: factor.com factor.exe libfactor-ffi-test.dll
libfactor-ffi-test.dll: vm/ffi_test.obj
link $(LINK_FLAGS) /out:libfactor-ffi-test.dll /dll vm/ffi_test.obj

View File

@ -8,7 +8,21 @@ $nl
"If the sequence is non-empty, outputs the index and value of the closest match, which is either an element for which the quotation output " { $link +eq+ } ", or failing that, least element for which the quotation output " { $link +lt+ } "."
$nl
"If the sequence is empty, outputs " { $link f } " " { $link f } "." }
{ $notes "If the sequence has at least one element, this word always outputs a valid index, because it finds the closest match, not necessarily an exact one. In this respect its behavior differs from " { $link find } "." } ;
{ $notes "If the sequence has at least one element, this word always outputs a valid index, because it finds the closest match, not necessarily an exact one. In this respect its behavior differs from " { $link find } "." }
{ $examples
"Searching for an integer in a sorted array:"
{ $example
"USING: binary-search math.order prettyprint ;"
"{ -13 -4 1 9 16 17 28 } [ 5 >=< ] search . ."
"1\n2"
}
"Frequently, the quotation passed to " { $link search } " is constructed by " { $link curry } " or " { $link with } " in order to make the search key a parameter:"
{ $example
"USING: binary-search kernel math.order prettyprint ;"
"5 { -13 -4 1 9 16 17 28 } [ <=> ] with search . ."
"1\n2"
}
} ;
{ find find-from find-last find-last find-last-from search } related-words

View File

@ -1,4 +1,4 @@
! Copyright (C) 2007, 2009 Slava Pestov.
! Copyright (C) 2007, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors cpu.architecture vocabs.loader system
sequences namespaces parser kernel kernel.private classes
@ -33,6 +33,7 @@ enable-optimizer
gc
: compile-unoptimized ( words -- )
[ [ subwords ] map ] keep suffix concat
[ optimized? not ] filter compile ;
"debug-compiler" get [
@ -102,7 +103,7 @@ gc
"." write flush
{
lines prefix suffix unclip new-assoc update
lines prefix suffix unclip new-assoc assoc-union!
word-prop set-word-prop 1array 2array 3array ?nth
} compile-unoptimized

View File

@ -545,7 +545,7 @@ M: quotation '
\ c-to-factor c-to-factor-word set
\ lazy-jit-compile lazy-jit-compile-word set
\ unwind-native-frames unwind-native-frames-word set
[ undefined ] undefined-quot set ;
undefined-def undefined-quot set ;
: emit-special-objects ( -- )
special-objects get keys [ emit-special-object ] each ;

48
basis/compiler/codegen/codegen.factor Normal file → Executable file
View File

@ -5,7 +5,7 @@ kernel kernel.private layouts assocs words summary arrays
combinators classes.algebra alien alien.c-types
alien.strings alien.arrays alien.complex alien.libraries sets libc
continuations.private fry cpu.architecture classes classes.struct locals
source-files.errors slots parser generic.parser
source-files.errors slots parser generic.parser strings
compiler.errors
compiler.alien
compiler.constants
@ -24,24 +24,12 @@ H{ } clone insn-counts set-global
GENERIC: generate-insn ( insn -- )
TUPLE: asm label code calls ;
SYMBOL: calls
: add-call ( word -- )
#! Compile this word later.
calls get push ;
! Mapping _label IDs to label instances
SYMBOL: labels
: init-generator ( -- )
H{ } clone labels set
V{ } clone calls set ;
: generate-insns ( asm -- code )
: generate ( mr -- code )
dup label>> [
init-generator
H{ } clone labels set
instructions>> [
[ class insn-counts get inc-at ]
[ generate-insn ]
@ -49,22 +37,12 @@ SYMBOL: labels
] each
] with-fixup ;
: generate ( mr -- asm )
[
[ label>> ] [ generate-insns ] bi calls get
asm boa
] with-scope ;
: lookup-label ( id -- label )
labels get [ drop <label> ] cache ;
! Special cases
M: ##no-tco generate-insn drop ;
M: ##call generate-insn word>> [ add-call ] [ %call ] bi ;
M: ##jump generate-insn word>> [ add-call ] [ %jump ] bi ;
M: _dispatch-label generate-insn
label>> lookup-label
cell 0 <repetition> %
@ -104,6 +82,8 @@ CODEGEN: ##peek %peek
CODEGEN: ##replace %replace
CODEGEN: ##inc-d %inc-d
CODEGEN: ##inc-r %inc-r
CODEGEN: ##call %call
CODEGEN: ##jump %jump
CODEGEN: ##return %return
CODEGEN: ##slot %slot
CODEGEN: ##slot-imm %slot-imm
@ -409,20 +389,28 @@ M: c-type-name flatten-value-type c-type flatten-value-type ;
: box-return* ( node -- )
return>> [ ] [ box-return %push-stack ] if-void ;
GENERIC# dlsym-valid? 1 ( symbols dll -- ? )
M: string dlsym-valid? dlsym ;
M: array dlsym-valid? '[ _ dlsym ] any? ;
: check-dlsym ( symbols dll -- )
dup dll-valid? [
dupd '[ _ dlsym ] any?
dupd dlsym-valid?
[ drop ] [ compiling-word get no-such-symbol ] if
] [
dll-path compiling-word get no-such-library drop
] if ;
: stdcall-mangle ( symbol params -- symbol )
parameters>> parameter-offsets drop number>string "@" glue ;
: stdcall-mangle ( params -- symbols )
[ function>> ] [ parameters>> parameter-offsets drop number>string ] bi
[ drop ] [ "@" glue ] [ "@" glue "_" prepend ] 2tri
3array ;
: alien-invoke-dlsym ( params -- symbols dll )
[ [ function>> dup ] keep stdcall-mangle 2array ]
[ library>> library dup [ dll>> ] when ]
[ dup abi>> "stdcall" = [ stdcall-mangle ] [ function>> ] if ]
[ library>> load-library ]
bi 2dup check-dlsym ;
M: ##alien-invoke generate-insn

View File

@ -1,7 +1,7 @@
USING: assocs compiler.cfg.builder compiler.cfg.optimizer
compiler.errors compiler.tree.builder compiler.tree.optimizer
compiler.units help.markup help.syntax io parser quotations
sequences words ;
compiler.units compiler.codegen help.markup help.syntax io
parser quotations sequences words ;
IN: compiler
HELP: enable-optimizer
@ -21,8 +21,6 @@ ARTICLE: "compiler-usage" "Calling the optimizing compiler"
ARTICLE: "compiler-impl" "Compiler implementation"
"The " { $vocab-link "compiler" } "vocabulary, in addition to providing the user-visible words of the compiler, implements the main compilation loop."
$nl
"Words are added to the " { $link compile-queue } " variable as needed and compiled."
{ $subsections compile-queue }
"Once compiled, a word is added to the assoc stored in the " { $link compiled } " variable. When compilation is complete, this assoc is passed to " { $link modify-code-heap } "."
$nl
"The " { $link compile-word } " word performs the actual task of compiling an individual word. The process proceeds as follows:"
@ -30,7 +28,7 @@ $nl
{ "The " { $link frontend } " word calls " { $link build-tree } ". If this fails, the error is passed to " { $link deoptimize } ". The logic for ignoring certain compile errors generated for inline words and macros is located here. If the error is not ignorable, it is added to the global " { $link compiler-errors } " assoc (see " { $link "compiler-errors" } ")." }
{ "If the word contains a breakpoint, compilation ends here. Otherwise, all remaining steps execute until machine code is generated. Any further errors thrown by the compiler are not reported as compile errors, but instead are ordinary exceptions. This is because they indicate bugs in the compiler, not errors in user code." }
{ "The " { $link frontend } " word then calls " { $link optimize-tree } ". This produces the final optimized tree IR, and this stage of the compiler is complete." }
{ "The " { $link backend } " word calls " { $link build-cfg } " followed by " { $link optimize-cfg } " and a few other stages. Finally, it calls " { $link save-asm } ", and adds any uncompiled words called by this word to the compilation queue with " { $link compile-dependency } "." }
{ "The " { $link backend } " word calls " { $link build-cfg } " followed by " { $link optimize-cfg } " and a few other stages. Finally, it calls " { $link generate } "." }
}
"If compilation fails, the word is stored in the " { $link compiled } " assoc with a value of " { $link f } ". This causes the VM to compile the word with the non-optimizing compiler."
$nl

View File

@ -1,8 +1,8 @@
! Copyright (C) 2004, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors kernel namespaces arrays sequences io words fry
continuations vocabs assocs dlists definitions math graphs generic
generic.single combinators deques search-deques macros
continuations vocabs assocs definitions math graphs generic
generic.single combinators combinators.smart macros
source-files.errors combinators.short-circuit classes.algebra
stack-checker stack-checker.dependencies stack-checker.inlining
@ -21,29 +21,15 @@ compiler.cfg.mr
compiler.codegen ;
IN: compiler
SYMBOL: compile-queue
SYMBOL: compiled
: compile? ( word -- ? )
#! Don't attempt to compile certain words.
{
[ "forgotten" word-prop ]
[ compiled get key? ]
[ inlined-block? ]
} 1|| not ;
: queue-compile ( word -- )
dup compile? [ compile-queue get push-front ] [ drop ] if ;
: recompile-callers? ( word -- ? )
changed-effects get key? ;
: recompile-callers ( word -- )
#! If a word's stack effect changed, recompile all words
#! that have compiled calls to it.
dup recompile-callers?
[ effect-dependencies-of keys [ queue-compile ] each ] [ drop ] if ;
: compiler-message ( string -- )
"trace-compilation" get [ global [ print flush ] bind ] [ drop ] if ;
@ -54,7 +40,7 @@ SYMBOL: compiled
GENERIC: no-compile? ( word -- ? )
M: method-body no-compile? "method-generic" word-prop no-compile? ;
M: method no-compile? "method-generic" word-prop no-compile? ;
M: predicate-engine-word no-compile? "owner-generic" word-prop no-compile? ;
@ -63,7 +49,7 @@ M: word no-compile?
GENERIC: combinator? ( word -- ? )
M: method-body combinator? "method-generic" word-prop combinator? ;
M: method combinator? "method-generic" word-prop combinator? ;
M: predicate-engine-word combinator? "owner-generic" word-prop combinator? ;
@ -81,7 +67,6 @@ M: word combinator? inline? ;
#! Recompile callers if the word's stack effect changed, then
#! save the word's dependencies so that if they change, the
#! word can get recompiled too.
[ recompile-callers ]
[ compiled-unxref ]
[
dup crossref? [
@ -89,7 +74,7 @@ M: word combinator? inline? ;
[ conditional-dependencies get set-dependency-checks ]
bi
] [ drop ] if
] tri ;
] bi ;
: deoptimize-with ( word def -- * )
#! If the word failed to infer, compile it with the
@ -138,29 +123,10 @@ M: word combinator? inline? ;
contains-breakpoints? [ nip deoptimize* ] [ drop ] if
] [ deoptimize* ] if ;
: compile-dependency ( word -- )
#! If a word calls an unoptimized word, try to compile the callee.
dup optimized? [ drop ] [ queue-compile ] if ;
! Only switch this off for debugging.
SYMBOL: compile-dependencies?
t compile-dependencies? set-global
: compile-dependencies ( asm -- )
compile-dependencies? get
[ calls>> [ compile-dependency ] each ] [ drop ] if ;
: save-asm ( asm -- )
[ [ code>> ] [ label>> ] bi compiled get set-at ]
[ compile-dependencies ]
bi ;
: backend ( tree word -- )
build-cfg [
[ optimize-cfg build-mr ] with-cfg
generate
save-asm
[ generate ] [ label>> ] bi compiled get set-at
] each ;
: compile-word ( word -- )
@ -175,9 +141,6 @@ t compile-dependencies? set-global
} cleave
] with-return ;
: compile-loop ( deque -- )
[ compile-word yield-hook get call( -- ) ] slurp-deque ;
SINGLETON: optimizing-compiler
M: optimizing-compiler update-call-sites ( class generic -- words )
@ -189,22 +152,20 @@ M: optimizing-compiler update-call-sites ( class generic -- words )
] assoc-filter keys ;
M: optimizing-compiler recompile ( words -- alist )
[
<hashed-dlist> compile-queue set
H{ } clone compiled set
[
[ queue-compile ]
[ subwords [ compile-dependency ] each ] bi
] each
compile-queue get compile-loop
H{ } clone compiled [
[ compile? ] filter
[ compile-word yield-hook get call( -- ) ] each
compiled get >alist
] with-scope
] with-variable
"--- compile done" compiler-message ;
M: optimizing-compiler to-recompile ( -- words )
changed-definitions get compiled-usages
maybe-changed get outdated-conditional-usages
append assoc-combine keys ;
[
changed-effects get new-words get assoc-diff outdated-effect-usages
changed-definitions get new-words get assoc-diff outdated-definition-usages
maybe-changed get new-words get assoc-diff outdated-conditional-usages
changed-definitions get [ drop word? ] assoc-filter 1array
] append-outputs assoc-combine keys ;
M: optimizing-compiler process-forgotten-words
[ delete-compiled-xref ] each ;

View File

@ -22,9 +22,13 @@ generic-call-site-crossref [ H{ } clone ] initialize
: conditional-dependencies-of ( word -- assoc )
effect-dependencies-of [ nip conditional-dependency dependency>= ] assoc-filter ;
: compiled-usages ( assoc -- assocs )
: outdated-definition-usages ( assoc -- assocs )
[ drop word? ] assoc-filter
[ [ drop definition-dependencies-of ] { } assoc>map ] keep suffix ;
[ drop definition-dependencies-of ] { } assoc>map ;
: outdated-effect-usages ( assoc -- assocs )
[ drop word? ] assoc-filter
[ drop effect-dependencies-of ] { } assoc>map ;
: dependencies-satisfied? ( word cache -- ? )
[ "dependency-checks" word-prop ] dip

View File

@ -5,7 +5,7 @@ sequences vocabs words tools.test tools.test.private ;
IN: compiler.test
: decompile ( word -- )
dup def>> 2array 1array modify-code-heap ;
dup def>> 2array 1array t t modify-code-heap ;
: recompile-all ( -- )
all-words compile ;

5
basis/compiler/tests/alien.factor Normal file → Executable file
View File

@ -556,6 +556,9 @@ FUNCTION: test_struct_14 ffi_test_44 ( ) ; inline
[ ] [ stack-frame-bustage 2drop ] unit-test
! C99 tests
os windows? [
FUNCTION: complex-float ffi_test_45 ( int x ) ;
[ C{ 3.0 0.0 } ] [ 3 ffi_test_45 ] unit-test
@ -585,6 +588,8 @@ FUNCTION: short ffi_test_48 ( bool-field-test x ) ;
ffi_test_48
] unit-test
] unless
! Regression: calling an undefined function would raise a protection fault
FUNCTION: void this_does_not_exist ( ) ;

View File

@ -8,8 +8,8 @@ IN: compiler.tests.low-level-ir
: compile-cfg ( cfg -- word )
gensym
[ build-mr generate code>> ] dip
[ associate >alist modify-code-heap ] keep ;
[ build-mr generate ] dip
[ associate >alist t t modify-code-heap ] keep ;
: compile-test-cfg ( -- word )
cfg new 0 get >>entry

View File

@ -77,8 +77,8 @@ M: integer test-7 + ;
! Indirect dependency on an unoptimized word
: test-9 ( -- ) ;
<< SYMBOL: quot
[ test-9 ] quot set-global >>
MACRO: test-10 ( -- quot ) quot get ;
[ test-9 ] quot set-global
MACRO: test-10 ( -- quot ) quot get ; >>
: test-11 ( -- ) test-10 ;
[ ] [ test-11 ] unit-test

View File

@ -3,7 +3,7 @@ IN: compiler.tests.redefine13
: breakage-word ( a b -- c ) + ;
MACRO: breakage-macro ( a -- ) '[ _ breakage-word ] ;
<< MACRO: breakage-macro ( a -- ) '[ _ breakage-word ] ; >>
GENERIC: breakage-caller ( a -- c )

View File

@ -0,0 +1,10 @@
USING: kernel tools.test definitions compiler.units ;
IN: compiler.tests.redefine21
[ ] [ : a ( -- ) ; << : b ( quot -- ) call a ; inline >> [ ] b ] unit-test
[ ] [ [ { a b } forget-all ] with-compilation-unit ] unit-test
[ ] [ : A ( -- ) ; << : B ( -- ) A ; inline >> B ] unit-test
[ ] [ [ { A B } forget-all ] with-compilation-unit ] unit-test

View File

@ -5,7 +5,7 @@ IN: compiler.tests.stack-trace
: symbolic-stack-trace ( -- newseq )
error-continuation get call>> callstack>array
2 group flip first ;
3 group flip first ;
: foo ( -- * ) 3 throw 7 ;
: bar ( -- * ) foo 4 ;

View File

@ -162,7 +162,7 @@ SYMBOL: node-count
word>> {
{ [ dup "intrinsic" word-prop ] [ intrinsics-called ] }
{ [ dup generic? ] [ generics-called ] }
{ [ dup method-body? ] [ methods-called ] }
{ [ dup method? ] [ methods-called ] }
[ words-called ]
} cond get inc-at
] [ drop ] if

View File

@ -78,7 +78,7 @@ TUPLE: a-tuple x ;
[ ] [ "IN: compiler.tree.propagation.call-effect.tests USE: math : call(-redefine-test ( a -- c ) 1 + ;" eval( -- ) ] unit-test
[ 1 3 test-quotatation inline-cache-invalidation-test ] [ T{ wrong-values f (( a b -- c )) } = ] must-fail-with
[ 1 3 test-quotatation inline-cache-invalidation-test ] [ T{ wrong-values f [ call(-redefine-test ] (( a b -- c )) } = ] must-fail-with
! See if redefining a tuple class bumps effect counter
TUPLE: my-tuple a b c ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009, 2010 Slava Pestov, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays combinators combinators.private effects
fry kernel kernel.private make sequences continuations
fry kernel kernel.private make namespaces sequences continuations
quotations words math stack-checker stack-checker.dependencies
combinators.short-circuit stack-checker.transforms
compiler.tree.propagation.info
@ -63,7 +63,11 @@ M: compose cached-effect
[ first>> ] [ second>> ] bi [ cached-effect ] bi@ compose-effects* ;
: safe-infer ( quot -- effect )
[ [ infer ] [ 2drop +unknown+ ] recover ] without-dependencies ;
! Save and restore error variables here, so that we don't
! pollute words such as :error and :c for the user.
error get-global error-continuation get-global
[ [ [ infer ] [ 2drop +unknown+ ] recover ] without-dependencies ] 2dip
[ error set-global ] [ error-continuation set-global ] bi* ;
: cached-effect-valid? ( quot -- ? )
cache-counter>> effect-counter eq? ; inline
@ -81,17 +85,9 @@ M: quotation cached-effect
over +unknown+ eq?
[ 2drop f ] [ [ { effect } declare ] dip effect<= ] if ; inline
: (call-effect-slow>quot) ( in out effect -- quot )
[
[ [ datastack ] dip dip ] %
[ [ , ] bi@ \ check-datastack , ] dip
'[ _ wrong-values ] , \ unless ,
] [ ] make ;
: call-effect-slow>quot ( effect -- quot )
[ in>> length ] [ out>> length ] [ ] tri
[ (call-effect-slow>quot) ] keep add-effect-input
[ call-effect-unsafe ] 2curry ;
[ \ call-effect def>> curry ] [ add-effect-input ] bi
'[ _ _ call-effect-unsafe ] ;
: call-effect-slow ( quot effect -- ) drop call ;
@ -118,7 +114,10 @@ M: quotation cached-effect
[ '[ _ execute ] ] dip call-effect-slow ; inline
: execute-effect-unsafe? ( word effect -- ? )
over optimized? [ [ stack-effect ] dip effect<= ] [ 2drop f ] if ; inline
over optimized?
[ [ stack-effect { effect } declare ] dip effect<= ]
[ 2drop f ]
if ; inline
: execute-effect-fast ( word effect inline-cache -- )
2over execute-effect-unsafe?

View File

@ -434,6 +434,7 @@ HOOK: %set-alien-double cpu ( ptr offset value -- )
HOOK: %set-alien-vector cpu ( ptr offset value rep -- )
HOOK: %alien-global cpu ( dst symbol library -- )
HOOK: %vm-field cpu ( dst fieldname -- )
HOOK: %vm-field-ptr cpu ( dst fieldname -- )
HOOK: %allot cpu ( dst size class temp -- )

View File

@ -1,4 +1,4 @@
! Copyright (C) 2005, 2009 Slava Pestov.
! Copyright (C) 2005, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors assocs sequences kernel combinators make math
math.order math.ranges system namespaces locals layouts words
@ -57,10 +57,11 @@ CONSTANT: vm-reg 15
: %load-vm-addr ( reg -- ) vm-reg MR ;
: %load-vm-field-addr ( reg symbol -- )
[ vm-reg ] dip vm-field-offset ADDI ;
M: ppc %vm-field ( dst field -- )
[ vm-reg ] dip vm-field-offset LWZ ;
M: ppc %vm-field-ptr ( dst field -- ) %load-vm-field-addr ;
M: ppc %vm-field-ptr ( dst field -- )
[ vm-reg ] dip vm-field-offset ADDI ;
GENERIC: loc-reg ( loc -- reg )
@ -383,7 +384,7 @@ M: ppc %set-alien-float -rot STFS ;
M: ppc %set-alien-double -rot STFD ;
: load-zone-ptr ( reg -- )
"nursery" %load-vm-field-addr ;
"nursery" %vm-field-ptr ;
: load-allot-ptr ( nursery-ptr allot-ptr -- )
[ drop load-zone-ptr ] [ swap 0 LWZ ] 2bi ;
@ -601,26 +602,19 @@ M: ppc %push-stack ( -- )
ds-reg ds-reg 4 ADDI
int-regs return-reg ds-reg 0 STW ;
:: %load-context-datastack ( dst -- )
! Load context struct
dst "ctx" %vm-field-ptr
dst dst 0 LWZ
! Load context datastack pointer
dst dst "datastack" context-field-offset ADDI ;
M: ppc %push-context-stack ( -- )
11 %load-context-datastack
12 11 0 LWZ
11 "ctx" %vm-field
12 11 "datastack" context-field-offset LWZ
12 12 4 ADDI
12 11 0 STW
12 11 "datastack" context-field-offset STW
int-regs return-reg 12 0 STW ;
M: ppc %pop-context-stack ( -- )
11 %load-context-datastack
12 11 0 LWZ
11 "ctx" %vm-field
12 11 "datastack" context-field-offset LWZ
int-regs return-reg 12 0 LWZ
12 12 4 SUBI
12 11 0 STW ;
12 11 "datastack" context-field-offset STW ;
M: ppc %unbox ( n rep func -- )
! Value must be in r3
@ -682,19 +676,17 @@ M: ppc %box-large-struct ( n c-type -- )
"from_value_struct" f %alien-invoke ;
M:: ppc %restore-context ( temp1 temp2 -- )
temp1 "ctx" %load-vm-field-addr
temp1 temp1 0 LWZ
temp1 "ctx" %vm-field
temp2 1 stack-frame get total-size>> ADDI
temp2 temp1 "callstack-bottom" context-field-offset STW
ds-reg temp1 8 LWZ
rs-reg temp1 12 LWZ ;
ds-reg temp1 "datastack" context-field-offset LWZ
rs-reg temp1 "retainstack" context-field-offset LWZ ;
M:: ppc %save-context ( temp1 temp2 -- )
temp1 "ctx" %load-vm-field-addr
temp1 temp1 0 LWZ
1 temp1 0 STW
ds-reg temp1 8 STW
rs-reg temp1 12 STW ;
temp1 "ctx" %vm-field
1 temp1 "callstack-top" context-field-offset STW
ds-reg temp1 "datastack" context-field-offset STW
rs-reg temp1 "retainstack" context-field-offset STW ;
M: ppc %alien-invoke ( symbol dll -- )
[ 11 ] 2dip %alien-global 11 MTLR BLRL ;

37
basis/cpu/x86/32/32.factor Normal file → Executable file
View File

@ -27,6 +27,9 @@ M: x86.32 temp-reg ECX ;
M: x86.32 %mov-vm-ptr ( reg -- )
0 MOV 0 rc-absolute-cell rel-vm ;
M: x86.32 %vm-field ( dst field -- )
[ 0 [] MOV ] dip vm-field-offset rc-absolute-cell rel-vm ;
M: x86.32 %vm-field-ptr ( dst field -- )
[ 0 MOV ] dip vm-field-offset rc-absolute-cell rel-vm ;
@ -102,6 +105,9 @@ M: x86.32 %prologue ( n -- )
0 PUSH rc-absolute-cell rel-this
3 cells - decr-stack-reg ;
M: x86.32 %prepare-jump
pic-tail-reg 0 MOV xt-tail-pic-offset rc-absolute-cell rel-here ;
M: x86.32 %load-param-reg
stack-params assert=
[ [ EAX ] dip local@ MOV ] dip
@ -160,10 +166,10 @@ M: x86.32 %pop-stack ( n -- )
EAX swap ds-reg reg-stack MOV ;
M: x86.32 %pop-context-stack ( -- )
temp-reg %load-context-datastack
EAX temp-reg [] MOV
temp-reg "ctx" %vm-field
EAX temp-reg "datastack" context-field-offset [+] MOV
EAX EAX [] MOV
temp-reg [] bootstrap-cell SUB ;
temp-reg "datastack" context-field-offset [+] bootstrap-cell SUB ;
: call-unbox-func ( func -- )
4 save-vm-ptr
@ -287,6 +293,15 @@ M:: x86.32 %binary-float-function ( dst src1 src2 func -- )
func "libm" load-library %alien-invoke
dst float-function-return ;
: stdcall? ( params -- ? )
abi>> "stdcall" = ;
: funny-large-struct-return? ( params -- ? )
#! MINGW ABI incompatibility disaster
[ return>> large-struct? ]
[ abi>> "mingw" = os windows? not or ]
bi and ;
M: x86.32 %cleanup ( params -- )
#! a) If we just called an stdcall function in Windows, it
#! cleaned up the stack frame for us. But we don't want that
@ -294,13 +309,8 @@ M: x86.32 %cleanup ( params -- )
#! b) If we just called a function returning a struct, we
#! have to fix ESP.
{
{
[ dup abi>> "stdcall" = ]
[ drop ESP stack-frame get params>> SUB ]
} {
[ dup return>> large-struct? ]
[ drop EAX PUSH ]
}
{ [ dup stdcall? ] [ drop ESP stack-frame get params>> SUB ] }
{ [ dup funny-large-struct-return? ] [ drop EAX PUSH ] }
[ drop ]
} cond ;
@ -323,11 +333,8 @@ M: x86.32 callback-return-rewind ( params -- n )
#! b) If the callback is returning a large struct, we have
#! to fix ESP.
{
{ [ dup abi>> "stdcall" = ] [
<alien-stack-frame>
[ params>> ] [ return>> ] bi +
] }
{ [ dup return>> large-struct? ] [ drop 4 ] }
{ [ dup stdcall? ] [ <alien-stack-frame> [ params>> ] [ return>> ] bi + ] }
{ [ dup funny-large-struct-return? ] [ drop 4 ] }
[ drop 0 ]
} cond ;

View File

@ -36,6 +36,11 @@ IN: bootstrap.x86
ESP stack-frame-size 3 bootstrap-cells - SUB
] jit-prolog jit-define
[
temp3 0 MOV rc-absolute-cell rt-here jit-rel
0 JMP rc-relative rt-entry-point-pic-tail jit-rel
] jit-word-jump jit-define
: jit-load-vm ( -- )
vm-reg 0 MOV 0 rc-absolute-cell jit-vm ;

View File

@ -42,17 +42,23 @@ M: x86.64 machine-registers
M: x86.64 %mov-vm-ptr ( reg -- )
vm-reg MOV ;
M: x86.64 %vm-field ( dst field -- )
[ vm-reg ] dip vm-field-offset [+] MOV ;
M: x86.64 %vm-field-ptr ( dst field -- )
[ vm-reg ] dip vm-field-offset [+] LEA ;
: param@ ( n -- op ) reserved-stack-space + stack@ ;
M: x86.64 %prologue ( n -- )
temp-reg 0 MOV rc-absolute-cell rel-this
temp-reg -7 [] LEA
dup PUSH
temp-reg PUSH
stack-reg swap 3 cells - SUB ;
M: x86.64 %prepare-jump
pic-tail-reg xt-tail-pic-offset [] LEA ;
: load-cards-offset ( dst -- )
0 MOV rc-absolute-cell rel-cards-offset ;
@ -104,10 +110,10 @@ M: x86.64 %pop-stack ( n -- )
param-reg-0 swap ds-reg reg-stack MOV ;
M: x86.64 %pop-context-stack ( -- )
temp-reg %load-context-datastack
param-reg-0 temp-reg [] MOV
temp-reg "ctx" %vm-field
param-reg-0 temp-reg "datastack" context-field-offset [+] MOV
param-reg-0 param-reg-0 [] MOV
temp-reg [] bootstrap-cell SUB ;
temp-reg "datastack" context-field-offset [+] bootstrap-cell SUB ;
M:: x86.64 %unbox ( n rep func -- )
param-reg-1 %mov-vm-ptr

View File

@ -28,7 +28,7 @@ IN: bootstrap.x86
[
! load entry point
safe-reg 0 MOV rc-absolute-cell rt-this jit-rel
safe-reg -7 [] LEA
! save stack frame size
stack-frame-size PUSH
! push entry point
@ -37,6 +37,11 @@ IN: bootstrap.x86
RSP stack-frame-size 3 bootstrap-cells - SUB
] jit-prolog jit-define
[
temp3 5 [] LEA
0 JMP rc-relative rt-entry-point-pic-tail jit-rel
] jit-word-jump jit-define
: jit-load-context ( -- )
ctx-reg vm-reg vm-context-offset [+] MOV ;

View File

@ -76,11 +76,6 @@ big-endian off
ds-reg [] temp0 MOV
] jit-push jit-define
[
temp3 0 MOV rc-absolute-cell rt-here jit-rel
0 JMP rc-relative rt-entry-point-pic-tail jit-rel
] jit-word-jump jit-define
[
0 CALL rc-relative rt-entry-point-pic jit-rel
] jit-word-call jit-define

View File

@ -88,8 +88,10 @@ M: x86 %call ( word -- ) 0 CALL rc-relative rel-word-pic ;
#! See the comment in vm/cpu-x86.hpp
4 1 + ; inline
HOOK: %prepare-jump cpu ( -- )
M: x86 %jump ( word -- )
pic-tail-reg 0 MOV xt-tail-pic-offset rc-absolute-cell rel-here
%prepare-jump
0 JMP rc-relative rel-word-pic-tail ;
M: x86 %jump-label ( label -- ) 0 JMP rc-relative label-fixup ;
@ -474,17 +476,10 @@ M: x86 %push-stack ( -- )
ds-reg cell ADD
ds-reg [] int-regs return-reg MOV ;
:: %load-context-datastack ( dst -- )
! Load context struct
dst "ctx" %vm-field-ptr
dst dst [] MOV
! Load context datastack pointer
dst "datastack" context-field-offset ADD ;
M: x86 %push-context-stack ( -- )
temp-reg %load-context-datastack
temp-reg [] bootstrap-cell ADD
temp-reg temp-reg [] MOV
temp-reg "ctx" %vm-field
temp-reg "datastack" context-field-offset [+] bootstrap-cell ADD
temp-reg temp-reg "datastack" context-field-offset [+] MOV
temp-reg [] int-regs return-reg MOV ;
M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
@ -1409,8 +1404,7 @@ M: x86 %loop-entry 16 code-alignment [ NOP ] times ;
M:: x86 %restore-context ( temp1 temp2 -- )
#! Load Factor stack pointers on entry from C to Factor.
#! Also save callstack bottom!
temp1 "ctx" %vm-field-ptr
temp1 temp1 [] MOV
temp1 "ctx" %vm-field
temp2 stack-reg stack-frame get total-size>> cell - [+] LEA
temp1 "callstack-bottom" context-field-offset [+] temp2 MOV
ds-reg temp1 "datastack" context-field-offset [+] MOV
@ -1420,8 +1414,7 @@ M:: x86 %save-context ( temp1 temp2 -- )
#! Save Factor stack pointers in case the C code calls a
#! callback which does a GC, which must reliably trace
#! all roots.
temp1 "ctx" %vm-field-ptr
temp1 temp1 [] MOV
temp1 "ctx" %vm-field
temp2 stack-reg cell neg [+] LEA
temp1 "callstack-top" context-field-offset [+] temp2 MOV
temp1 "datastack" context-field-offset [+] ds-reg MOV

View File

@ -4,36 +4,36 @@ USING: accessors kernel continuations fry words ;
IN: db.errors
ERROR: db-error ;
ERROR: sql-error location ;
TUPLE: sql-error location ;
ERROR: bad-schema ;
ERROR: sql-unknown-error < sql-error message ;
TUPLE: sql-unknown-error < sql-error message ;
: <sql-unknown-error> ( message -- error )
\ sql-unknown-error new
swap >>message ;
ERROR: sql-table-exists < sql-error table ;
TUPLE: sql-table-exists < sql-error table ;
: <sql-table-exists> ( table -- error )
\ sql-table-exists new
swap >>table ;
ERROR: sql-table-missing < sql-error table ;
TUPLE: sql-table-missing < sql-error table ;
: <sql-table-missing> ( table -- error )
\ sql-table-missing new
swap >>table ;
ERROR: sql-syntax-error < sql-error message ;
TUPLE: sql-syntax-error < sql-error message ;
: <sql-syntax-error> ( message -- error )
\ sql-syntax-error new
swap >>message ;
ERROR: sql-function-exists < sql-error message ;
TUPLE: sql-function-exists < sql-error message ;
: <sql-function-exists> ( message -- error )
\ sql-function-exists new
swap >>message ;
ERROR: sql-function-missing < sql-error message ;
TUPLE: sql-function-missing < sql-error message ;
: <sql-function-missing> ( message -- error )
\ sql-function-missing new
swap >>message ;

View File

@ -34,7 +34,7 @@ PostgresqlSqlError = (TableError | FunctionError | SyntaxError | UnknownError)
;EBNF
ERROR: parse-postgresql-location column line text ;
TUPLE: parse-postgresql-location column line text ;
C: <parse-postgresql-location> parse-postgresql-location
EBNF: parse-postgresql-line-error

View File

@ -11,17 +11,12 @@ IN: db.sqlite.lib
ERROR: sqlite-error < db-error n string ;
ERROR: sqlite-sql-error < sql-error n string ;
: <sqlite-sql-error> ( n string -- error )
\ sqlite-sql-error new
swap >>string
swap >>n ;
: throw-sqlite-error ( n -- * )
dup sqlite-error-messages nth sqlite-error ;
: sqlite-statement-error ( -- * )
SQLITE_ERROR
db-connection get handle>> sqlite3_errmsg <sqlite-sql-error> throw ;
db-connection get handle>> sqlite3_errmsg sqlite-sql-error ;
: sqlite-check-result ( n -- )
{

View File

@ -236,7 +236,10 @@ M: redefine-error error.
def>> . ;
M: undefined summary
drop "Calling a deferred word before it has been defined" ;
word>> undefined?
"Cannot execute a deferred word before it has been defined"
"Cannot execute a word before it has been compiled"
? ;
M: no-compilation-unit error.
"Attempting to define " write
@ -336,7 +339,7 @@ M: check-mixin-class summary drop "Not a mixin class" ;
M: not-found-in-roots summary drop "Cannot resolve vocab: path" ;
M: wrong-values summary drop "Quotation called with wrong stack effect" ;
M: wrong-values summary drop "Quotation's stack effect does not match call site" ;
M: stack-effect-omits-dashes summary drop "Stack effect must contain “--”" ;

View File

@ -39,7 +39,7 @@ TUPLE: consultation group class quot loc ;
[ class>> swap first create-method dup fake-definition ] keep
[ drop ] [ "consultation" set-word-prop ] 2bi ;
PREDICATE: consult-method < method-body "consultation" word-prop ;
PREDICATE: consult-method < method "consultation" word-prop ;
M: consult-method reset-word
[ call-next-method ] [ f "consultation" set-word-prop ] bi ;

View File

@ -37,7 +37,7 @@ ARTICLE: "eval-vocabs" "Evaluating strings with a different vocabulary search pa
(eval)
with-file-vocabs
}
"Code in the listener tool starts out with a different initial search path, with more vocabularies are available by default. Strings of code can be evaluated in this search path by using " { $link (eval) } " with a different combinator:"
"Code in the listener tool starts out with a different initial search path, with more vocabularies available by default. Strings of code can be evaluated in this search path by using " { $link (eval) } " with a different combinator:"
{ $subsections
with-interactive-vocabs
}

View File

@ -58,7 +58,7 @@ C: <ftp-disconnect> ftp-disconnect
send-response ;
: serving? ( path -- ? )
normalize-path server get serving-directory>> head? ;
resolve-symlinks server get serving-directory>> head? ;
: can-serve-directory? ( path -- ? )
{ [ exists? ] [ file-info directory? ] [ serving? ] } 1&& ;
@ -343,7 +343,7 @@ M: ftp-server handle-client* ( server -- )
: <ftp-server> ( directory port -- server )
latin1 ftp-server new-threaded-server
swap >>insecure
swap normalize-path >>serving-directory
swap resolve-symlinks >>serving-directory
"ftp.server" >>name
5 minutes >>timeout ;

View File

@ -37,7 +37,7 @@ M: array (fake-quotations>)
[ [ (fake-quotations>) ] each ] { } make , ;
M: fake-call-next-method (fake-quotations>)
drop method-body get literalize , \ (call-next-method) , ;
drop \ method get literalize , \ (call-next-method) , ;
M: object (fake-quotations>) , ;
@ -74,7 +74,7 @@ FUNCTOR-SYNTAX: MIXIN:
FUNCTOR-SYNTAX: M:
scan-param suffix!
scan-param suffix!
[ create-method-in dup method-body set ] append!
[ create-method-in dup \ method set ] append!
parse-definition*
\ define* suffix! ;

View File

@ -28,10 +28,10 @@ TUPLE: action rest init authorize display validate submit ;
action new-action ;
: merge-forms ( form -- )
form get
[ [ errors>> ] bi@ push-all ]
[ [ values>> ] bi@ swap update ]
[ swap validation-failed>> >>validation-failed drop ]
[ form get ] dip
[ [ errors>> ] bi@ append! drop ]
[ [ values>> ] bi@ assoc-union! drop ]
[ validation-failed>> >>validation-failed drop ]
2tri ;
: set-nested-form ( form name -- )

View File

@ -136,7 +136,7 @@ CHLOE: form
XML> body>> clone ;
: add-tag-attrs ( attrs tag -- )
attrs>> swap update ;
attrs>> swap assoc-union! drop ;
CHLOE: button
button-tag-markup

View File

@ -51,6 +51,7 @@ $nl
{ $table
{ "General form" "Description" "Examples" }
{ { $snippet { $emphasis "foo" } "?" } "outputs a boolean" { { $link empty? } } }
{ { $snippet { $emphasis "foo" } "!" } { "a variant of " { $snippet "foo" } " which mutates one of its arguments" } { { $link append! } } }
{ { $snippet "?" { $emphasis "foo" } } { "conditionally performs " { $snippet { $emphasis "foo" } } } { { $links ?nth } } }
{ { $snippet "<" { $emphasis "foo" } ">" } { "creates a new " { $snippet "foo" } } { { $link <array> } } }
{ { $snippet ">" { $emphasis "foo" } } { "converts the top of the stack into a " { $snippet "foo" } } { { $link >array } } }

View File

@ -52,7 +52,7 @@ M: object specializer-declaration class ;
specializer [ specialize-quot ] when* ;
: standard-method? ( method -- ? )
dup method-body? [
dup method? [
"method-generic" word-prop standard-generic?
] [ drop f ] if ;

View File

@ -35,10 +35,10 @@ M: form clone
[ [ value ] keep ] dip ; inline
: from-object ( object -- )
[ values ] [ make-mirror ] bi* update ;
[ values ] [ make-mirror ] bi* assoc-union! drop ;
: to-object ( destination names -- )
[ make-mirror ] [ values extract-keys ] bi* update ;
[ make-mirror ] [ values extract-keys ] bi* assoc-union! drop ;
: with-each-value ( name quot -- )
[ value ] dip '[

View File

@ -142,11 +142,6 @@ ARTICLE: "io.directories.create" "Creating directories"
} ;
ARTICLE: "delete-move-copy" "Deleting, moving, and copying files"
"Operations for deleting and copying files come in two forms:"
{ $list
{ "Words named " { $snippet { $emphasis "operation" } "-file" } " which work on regular files only." }
{ "Words named " { $snippet { $emphasis "operation" } "-tree" } " works on directory trees recursively, and also accepts regular files." }
}
"The operations for moving and copying files come in three flavors:"
{ $list
{ "A word named " { $snippet { $emphasis "operation" } } " which takes a source and destination path." }
@ -175,7 +170,7 @@ $nl
"On most operating systems, files can only be moved within the same file system. To move files between file systems, use " { $link copy-file } " followed by " { $link delete-file } " on the old name." ;
ARTICLE: "io.directories" "Directory manipulation"
"The " { $vocab-link "io.directories" } " vocabulary defines words for inspecting and manipulating directory trees."
"The " { $vocab-link "io.directories" } " vocabulary defines words for inspecting and manipulating directories."
{ $subsections
home
"current-directory"

View File

@ -26,6 +26,11 @@ HELP: copy-trees-into
ARTICLE: "io.directories.hierarchy" "Directory hierarchy manipulation"
"The " { $vocab-link "io.directories.hierarchy" } " vocabulary defines words for operating on directory hierarchies recursively."
$nl
"There is a naming scheme used by " { $vocab-link "io.directories" } " and " { $vocab-link "io.directories.hierarchy" } ". Operations for deleting and copying files come in two forms:"
{ $list
{ "Words named " { $snippet { $emphasis "operation" } "-file" } " which work on regular files only." }
{ "Words named " { $snippet { $emphasis "operation" } "-tree" } " works on directory trees recursively, and also accepts regular files." }
}
"Deleting directory trees recursively:"
{ $subsections delete-tree }
"Copying directory trees recursively:"

View File

@ -204,7 +204,7 @@ HELP: foreground
{ $description "Character style. An instance of " { $link color } ". See " { $link "colors" } "." }
{ $examples
{ $code
"10 ["
"10 iota ["
" \"Hello world\\n\""
" swap 10 / 1 <gray> foreground associate format"
"] each"
@ -215,9 +215,9 @@ HELP: background
{ $description "Character style. An instance of " { $link color } ". See " { $link "colors" } "." }
{ $examples
{ $code
"10 ["
"10 iota ["
" \"Hello world\\n\""
" swap 10 / 1 1 over - over 1 <rgba>"
" swap 10 / 1 over - over 1 <rgba>"
" background associate format nl"
"] each"
}

View File

@ -131,7 +131,6 @@ SYMBOL: interactive-vocabs
"arrays"
"assocs"
"combinators"
"compiler"
"compiler.errors"
"compiler.units"
"continuations"
@ -173,6 +172,7 @@ SYMBOL: interactive-vocabs
"tools.test"
"tools.threads"
"tools.time"
"tools.walker"
"vocabs"
"vocabs.loader"
"vocabs.refresh"

View File

@ -24,7 +24,7 @@ M: lambda-macro definition
M: lambda-macro reset-word
[ call-next-method ] [ f "lambda" set-word-prop ] bi ;
INTERSECTION: lambda-method method-body lambda-word ;
INTERSECTION: lambda-method method lambda-word ;
M: lambda-method definer drop \ M:: \ ; ;

View File

@ -14,9 +14,9 @@ HELP: [let
HELP: :>
{ $syntax ":> var" ":> var!" ":> ( var-1 var-2 ... )" }
{ $description "Binds one or more new lexical variables. In the " { $snippet ":> var" } " form, the value on the top of the datastack to a new lexical variable named " { $snippet "var" } " and scoped to the enclosing quotation, " { $link POSTPONE: [let } " form, or " { $link POSTPONE: :: } " definition."
{ $description "Binds one or more new lexical variables. In the " { $snippet ":> var" } " form, the value on the top of the datastack is bound to a new lexical variable named " { $snippet "var" } " and is scoped to the enclosing quotation, " { $link POSTPONE: [let } " form, or " { $link POSTPONE: :: } " definition."
$nl
"The " { $snippet ":> ( var-1 ... )" } " form binds multiple variables to the top values off the datastack in left to right order. These two snippets have the same effect:"
"The " { $snippet ":> ( var-1 ... )" } " form binds multiple variables to the top values of the datastack in right to left order, with the last variable bound to the top of the datastack. These two snippets have the same effect:"
{ $code ":> c :> b :> a" }
{ $code ":> ( a b c )" }
$nl
@ -112,7 +112,7 @@ $nl
$nl
{ $heading "Mutable bindings" }
"This next example demonstrates closures and mutable variable bindings. The " { $snippet "make-counter" } " word outputs a tuple containing a pair of quotations that respectively increment and decrement an internal counter in the mutable " { $snippet "value" } " variable and then return the new value. The quotations close over the counter, so each invocation of the word gives new quotations with a new internal counter."
"This next example demonstrates closures and mutable variable bindings. The " { $snippet "<counter>" } " word outputs a tuple containing a pair of quotations that respectively increment and decrement an internal counter in the mutable " { $snippet "value" } " variable and then return the new value. The quotations close over the counter, so each invocation of the word gives new quotations with a new internal counter."
{ $example
"""USING: locals kernel math ;
IN: scratchpad

View File

@ -1,6 +1,7 @@
IN: macros.tests
USING: tools.test macros math kernel arrays
vectors io.streams.string prettyprint parser eval see ;
vectors io.streams.string prettyprint parser eval see
stack-checker compiler.units definitions vocabs ;
IN: macros.tests
MACRO: see-test ( a b -- quot ) + ;
@ -19,7 +20,21 @@ unit-test
[ f ] [ \ see-test macro? ] unit-test
[ ] [ "USING: macros stack-checker kernel ; IN: hanging-macro MACRO: c ( quot -- ) infer drop [ ] ; : a ( -- ) [ a ] c ;" eval( -- ) ] unit-test
[ ] [ "USING: macros stack-checker kernel ; IN: hanging-macro MACRO: c ( quot -- ) infer drop [ ] ;" eval( -- ) ] unit-test
[ ] [ "USING: macros kernel ; IN: hanging-macro : a ( -- ) [ a ] c ;" eval( -- ) ] unit-test
[ ] [ [ "hanging-macro" forget-vocab ] with-compilation-unit ] unit-test
[ ] [ "IN: macros.tests USE: macros MACRO: foo ( -- x ) [ ] ;" eval( -- ) ] unit-test
[ "IN: macros.tests USE: macros MACRO: foo ( -- x ) [ ] ; inline" eval( -- ) ] must-fail
! The macro expander code should infer
MACRO: bad-macro ( a -- b ) 1 2 3 [ ] ;
! Must fail twice, and not memoize a bad result
[ [ 0 bad-macro ] call ] must-fail
[ [ 0 bad-macro ] call ] must-fail
[ [ 0 bad-macro ] infer ] must-fail
[ ] [ [ \ bad-macro forget ] with-compilation-unit ] unit-test

View File

@ -1,7 +1,7 @@
! Copyright (C) 2007, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: parser kernel sequences words effects combinators assocs
definitions quotations namespaces memoize accessors
definitions quotations namespaces memoize accessors fry
compiler.units ;
IN: macros
@ -14,7 +14,11 @@ PRIVATE>
: define-macro ( word definition effect -- )
real-macro-effect {
[ [ memoize-quot [ call ] append ] keep define-declared ]
[
[ '[ _ _ call-effect ] ] keep
[ memoize-quot '[ @ call ] ] keep
define-declared
]
[ drop "macro" set-word-prop ]
[ 2drop changed-effect ]
} 3cleave ;

View File

@ -84,7 +84,7 @@ HELP: histogram
}
{ $description "Returns a hashtable where the keys are the elements of the sequence and the values are the number of times they appeared in that sequence." } ;
HELP: histogram*
HELP: histogram!
{ $values
{ "hashtable" hashtable } { "seq" sequence }
{ "hashtable" hashtable }
@ -92,7 +92,7 @@ HELP: histogram*
{ $examples
{ $example "! Count the number of times the elements of two sequences appear."
"USING: prettyprint math.statistics ;"
"\"aaabc\" histogram \"aaaaaabc\" histogram* ."
"\"aaabc\" histogram \"aaaaaabc\" histogram! ."
"H{ { 97 9 } { 98 2 } { 99 2 } }"
}
}
@ -125,7 +125,7 @@ HELP: sequence>assoc
}
{ $description "Iterates over a sequence, allowing elements of the sequence to be added to a newly created " { $snippet "assoc" } " according to the passed quotation." } ;
HELP: sequence>assoc*
HELP: sequence>assoc!
{ $values
{ "assoc" assoc } { "seq" sequence } { "quot" quotation }
{ "assoc" assoc }
@ -133,7 +133,7 @@ HELP: sequence>assoc*
{ $examples
{ $example "! Iterate over a sequence and add the counts to an existing assoc"
"USING: assocs prettyprint math.statistics kernel ;"
"H{ { 97 2 } { 98 1 } } clone \"aaabc\" [ inc-at ] sequence>assoc* ."
"H{ { 97 2 } { 98 1 } } clone \"aaabc\" [ inc-at ] sequence>assoc! ."
"H{ { 97 5 } { 98 2 } { 99 1 } }"
}
}
@ -157,13 +157,13 @@ ARTICLE: "histogram" "Computing histograms"
"Counting elements in a sequence:"
{ $subsections
histogram
histogram*
histogram!
sorted-histogram
}
"Combinators for implementing histogram:"
{ $subsections
sequence>assoc
sequence>assoc*
sequence>assoc!
sequence>hashtable
} ;

View File

@ -64,7 +64,7 @@ IN: math.statistics
PRIVATE>
: sequence>assoc* ( assoc seq quot: ( obj assoc -- ) -- assoc )
: sequence>assoc! ( assoc seq quot: ( obj assoc -- ) -- assoc )
rot (sequence>assoc) ; inline
: sequence>assoc ( seq quot: ( obj assoc -- ) exemplar -- assoc )
@ -73,8 +73,8 @@ PRIVATE>
: sequence>hashtable ( seq quot: ( obj hashtable -- ) -- hashtable )
H{ } sequence>assoc ; inline
: histogram* ( hashtable seq -- hashtable )
[ inc-at ] sequence>assoc* ;
: histogram! ( hashtable seq -- hashtable )
[ inc-at ] sequence>assoc! ;
: histogram ( seq -- hashtable )
[ inc-at ] sequence>hashtable ;

View File

@ -37,7 +37,7 @@ M: parsing-word pprint*
M: word pprint*
[ pprint-word ] [ ?start-group ] [ ?end-group ] tri ;
M: method-body pprint*
M: method pprint*
[
[
[ "M\\ " % "method-class" word-prop word-name* % ]
@ -229,7 +229,7 @@ M: compose pprint* pprint-object ;
M: wrapper pprint*
{
{ [ dup wrapped>> method-body? ] [ wrapped>> pprint* ] }
{ [ dup wrapped>> method? ] [ wrapped>> pprint* ] }
{ [ dup wrapped>> word? ] [ <block \ \ pprint-word wrapped>> pprint-word block> ] }
[ pprint-object ]
} cond ;

View File

@ -1,10 +1,10 @@
! Copyright (C) 2003, 2009 Slava Pestov.
! Copyright (C) 2003, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: arrays accessors assocs colors combinators grouping io
io.streams.string io.styles kernel make math math.parser namespaces
parser prettyprint.backend prettyprint.config prettyprint.custom
prettyprint.sections quotations sequences sorting strings vocabs
vocabs.prettyprint words sets ;
vocabs.prettyprint words sets generic ;
IN: prettyprint
: with-use ( obj quot -- )
@ -72,24 +72,55 @@ SYMBOL: ->
] [ ] make ;
: remove-breakpoints ( quot pos -- quot' )
over quotation? [
1 + short cut [ (remove-breakpoints) ] bi@
[ -> ] glue
] [
drop
] if ;
1 + short cut [ (remove-breakpoints) ] bi@ [ -> ] glue ;
: optimized-frame? ( triple -- ? ) second word? ;
: frame-word? ( triple -- ? )
first word? ;
: frame-word. ( triple -- )
first {
{ [ dup method? ] [ "Method: " write pprint ] }
{ [ dup word? ] [ "Word: " write pprint ] }
[ drop ]
} cond ;
: optimized-frame. ( triple -- )
[
[ "(O)" write ] with-cell
[ frame-word. ] with-cell
] with-row ;
: unoptimized-frame. ( triple -- )
[
[ "(U)" write ] with-cell
[
"Quotation: " write
dup [ second ] [ third ] bi remove-breakpoints
[
3 nesting-limit set
100 length-limit set
pprint
] with-scope
] with-cell
] with-row
dup frame-word? [
[
[ ] with-cell
[ frame-word. ] with-cell
] with-row
] [ drop ] if ;
: callframe. ( triple -- )
dup optimized-frame?
[ optimized-frame. ] [ unoptimized-frame. ] if ;
PRIVATE>
: callstack. ( callstack -- )
callstack>array 2 <groups> [
remove-breakpoints
[
3 nesting-limit set
100 length-limit set
.
] with-scope
] assoc-each ;
callstack>array 3 <groups>
{ { table-gap { 5 5 } } } [ [ callframe. ] each ] tabular-output nl ;
: .c ( -- ) callstack callstack. ;

View File

@ -44,7 +44,7 @@ CONSTANT: fail-state -1
unify-final-state renumber-states box-transitions
[ start-state>> ]
[ final-states>> keys first ]
[ nfa-table get [ transitions>> ] bi@ swap update ] tri ;
[ nfa-table get [ transitions>> ] bi@ swap assoc-union! drop ] tri ;
: ast>dfa ( parse-tree -- minimal-dfa )
construct-nfa disambiguate construct-dfa minimize ;

View File

@ -76,7 +76,7 @@ M: hook-generic synopsis*
[ stack-effect. ]
} cleave ;
M: method-body synopsis*
M: method synopsis*
[ definer. ]
[ "method-class" word-prop pprint-word ]
[ "method-generic" word-prop pprint-word ] tri ;

View File

@ -236,7 +236,7 @@ SYMBOL: deserialized
: deserialize-hashtable ( -- hashtable )
H{ } clone
[ intern-object ]
[ (deserialize) update ]
[ (deserialize) assoc-union! drop ]
[ ] tri ;
: copy-seq-to-tuple ( seq tuple -- )

View File

@ -523,6 +523,9 @@ M: bad-executable summary
\ data-room { } { byte-array } define-primitive
\ data-room make-flushable
\ (code-blocks) { } { array } define-primitive
\ (code-blocks) make-flushable
\ code-room { } { byte-array } define-primitive
\ code-room make-flushable
@ -711,7 +714,7 @@ M: bad-executable summary
\ dll-valid? { object } { object } define-primitive
\ modify-code-heap { array } { } define-primitive
\ modify-code-heap { array object object } { } define-primitive
\ unimplemented { } { } define-primitive

View File

@ -40,7 +40,7 @@ ARTICLE: "inference-combinators" "Combinator stack effects"
"The following code now passes the stack checker; it would fail were " { $snippet "twice" } " not declared " { $link POSTPONE: inline } ":"
{ $unchecked-example "USE: math.functions" "[ [ sqrt ] twice ] infer." "( x -- x )" }
{ $subheading "Defining a combinator for unknown quotations" }
"In the next example, " { $link POSTPONE: call( } " must be used because the quotation the result of calling a runtime accessor, and the compiler cannot make any static assumptions about this quotation at all:"
"In the next example, " { $link POSTPONE: call( } " must be used because the quotation is the result of calling a runtime accessor, and the compiler cannot make any static assumptions about this quotation at all:"
{ $code
"TUPLE: action name quot ;"
": perform ( value action -- result ) quot>> call( value -- result ) ;"

View File

@ -3,8 +3,7 @@ USING: help.markup help.syntax combinators words kernel ;
HELP: define-transform
{ $values { "word" word } { "quot" "a quotation taking " { $snippet "n" } " inputs from the stack and producing another quotation as output" } { "n" "a non-negative integer" } }
{ $description "Defines a compiler transform for the optimizing compiler."
"When a call to " { $snippet "word" } " is being compiled, the compiler first checks that the top " { $snippet "n" } " stack values are literal, and if so, calls the quotation with those inputs at compile time. The quotation can output a new quotation, or " { $link f } "."
{ $description "Defines a compiler transform for the optimizing compiler. When a call to " { $snippet "word" } " is being compiled, the compiler first checks that the top " { $snippet "n" } " stack values are literal, and if so, calls the quotation with those inputs at compile time. The quotation can output a new quotation, or " { $link f } "."
$nl
"If the quotation outputs " { $link f } ", or if not all inputs are literal, a call to the word is compiled as usual, or compilation fails if the word does not have a static stack effect."
$nl

View File

@ -78,7 +78,7 @@ MACRO: curry-folding-test ( quot -- )
\ bad-macro [ "OOPS" throw ] 0 define-transform
[ [ bad-macro ] infer ] [ f >>continuation T{ transform-expansion-error f "OOPS" f bad-macro } = ] must-fail-with
[ [ bad-macro ] infer ] [ [ transform-expansion-error? ] [ error>> "OOPS" = ] [ word>> \ bad-macro = ] tri and and ] must-fail-with
MACRO: two-params ( a b -- c ) + 1quotation ;

View File

@ -5,4 +5,8 @@ IN: strings.tables.tests
[ { "A BB" "CC D" } ] [ { { "A" "BB" } { "CC" "D" } } format-table ] unit-test
[ { "A C" "B " "D E" } ] [ { { "A\nB" "C" } { "D" "E" } } format-table ] unit-test
[ { "A C" "B " "D E" } ] [ { { "A\nB" "C" } { "D" "E" } } format-table ] unit-test
[ { "A B" " C" "D E" } ] [ { { "A" "B\nC" } { "D" "E" } } format-table ] unit-test
[ { "A B" "C D" " E" } ] [ { { "A" "B" } { "C" "D\nE" } } format-table ] unit-test

View File

@ -11,11 +11,9 @@ IN: strings.tables
: max-length ( seq -- n )
[ length ] [ max ] map-reduce ;
: format-row ( seq ? -- seq )
[
dup max-length
'[ _ "" pad-tail ] map
] unless ;
: format-row ( seq -- seq )
dup max-length
'[ _ "" pad-tail ] map ;
: format-column ( seq ? -- seq )
[
@ -26,5 +24,5 @@ IN: strings.tables
PRIVATE>
: format-table ( table -- seq )
[ [ [ string-lines ] map ] dip format-row flip ] map-last concat
[ [ string-lines ] map format-row flip ] map concat
flip [ format-column ] map-last flip [ " " join ] map ;

View File

@ -103,7 +103,7 @@ GENERIC: smart-usage ( defspec -- seq )
M: object smart-usage usage [ irrelevant? not ] filter ;
M: method-body smart-usage "method-generic" word-prop smart-usage ;
M: method smart-usage "method-generic" word-prop smart-usage ;
M: f smart-usage drop \ f smart-usage ;
@ -124,7 +124,7 @@ M: f smart-usage drop \ f smart-usage ;
[ [ vocab-name ] [ words [ generic? not ] filter ] bi ] dip map
[
[ [ word? ] [ generic? not ] bi and ] filter [
dup method-body?
dup method?
[ "method-generic" word-prop ] when
vocabulary>>
] map

View File

@ -106,18 +106,12 @@ IN: tools.deploy.shaker
: strip-word-props ( stripped-props words -- )
"Stripping word properties" show
[
swap '[
[
[ drop _ member? not ] assoc-filter sift-assoc
>alist f like
] change-props drop
] each
] [
H{ } clone '[
[ [ _ [ ] cache ] map ] change-props drop
] each
] bi ;
swap '[
[
[ drop _ member? not ] assoc-filter sift-assoc
>alist f like
] change-props drop
] each ;
: stripped-word-props ( -- seq )
[

View File

@ -1,11 +1,10 @@
! Copyright (C) 2008, 2010 Slava Pestov, Jorge Acereda Macia.
! See http://factorcode.org/license.txt for BSD license.
USING: tools.disassembler namespaces combinators
alien alien.syntax alien.c-types lexer parser kernel
sequences layouts math math.order alien.libraries
math.parser system make fry arrays libc destructors
tools.disassembler.utils tools.disassembler.private splitting
alien.data classes.struct ;
USING: tools.disassembler namespaces combinators alien
alien.syntax alien.c-types lexer parser kernel sequences layouts
math math.order alien.libraries math.parser system make fry
arrays libc destructors tools.memory tools.disassembler.utils
tools.disassembler.private splitting alien.data classes.struct ;
IN: tools.disassembler.udis
<<
@ -105,7 +104,7 @@ FUNCTION: char* ud_lookup_mnemonic ( int c ) ;
dup UD_SYN_INTEL ud_set_syntax ;
: with-ud ( quot: ( ud -- ) -- )
[ [ [ <ud> ] dip call ] with-destructors ] with-word-entry-points ; inline
[ [ [ <ud> ] dip call ] with-destructors ] with-code-blocks ; inline
SINGLETON: udis-disassembler

View File

@ -1,43 +1,20 @@
USING: accessors arrays binary-search kernel math math.order
math.parser namespaces sequences sorting splitting vectors vocabs words ;
USING: accessors kernel math math.parser prettyprint sequences
splitting tools.memory ;
IN: tools.disassembler.utils
SYMBOL: word-entry-points
SYMBOL: smallest-xt
SYMBOL: greatest-xt
: (word-entry-points) ( -- assoc )
vocabs [ words ] map concat [ [ word-code ] keep 3array ] map
[ first ] sort-with ;
: 0x ( str -- str' ) "0x" prepend ;
: complete-address ( n seq -- str )
[ first - ] [ third name>> ] bi
over zero? [ nip ] [ swap 16 >base "0x" prepend "+" glue ] if ;
[ nip owner>> unparse-short ] [ entry-point>> - ] 2bi
[ 16 >base 0x " + " glue ] unless-zero ;
: search-xt ( n -- str/f )
dup [ smallest-xt get < ] [ greatest-xt get > ] bi or [
drop f
] [
word-entry-points get over [ swap first <=> ] curry search nip
2dup second <= [
[ complete-address ] [ drop f ] if*
] [
2drop f
] if
] if ;
: search-xt ( addr -- str/f )
dup lookup-return-address
dup [ complete-address ] [ 2drop f ] if ;
: resolve-xt ( str -- str' )
[ "0x" prepend ] [ 16 base> ] bi
[ 0x ] [ 16 base> ] bi
[ search-xt [ " (" ")" surround append ] when* ] when* ;
: resolve-call ( str -- str' )
"0x" split1-last [ resolve-xt "0x" glue ] when* ;
: with-word-entry-points ( quot -- )
[
(word-entry-points)
[ word-entry-points set ]
[ first first smallest-xt set ]
[ last second greatest-xt set ] tri
call
] with-scope ; inline

View File

@ -1,10 +1,11 @@
! Copyright (C) 2005, 2009 Slava Pestov.
! Copyright (C) 2005, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays assocs classes classes.struct
combinators combinators.smart continuations fry generalizations
generic grouping io io.styles kernel make math math.parser
math.statistics memory namespaces parser prettyprint sequences
sorting splitting strings system vm words ;
USING: accessors arrays assocs binary-search classes
classes.struct combinators combinators.smart continuations fry
generalizations generic grouping io io.styles kernel make math
math.order math.parser math.statistics memory memory.private
layouts namespaces parser prettyprint sequences sorting
splitting strings system vm words hints hashtables ;
IN: tools.memory
<PRIVATE
@ -54,6 +55,8 @@ IN: tools.memory
{ "Mark stack:" [ mark-stack>> kilobytes ] }
} object-table. ;
PRIVATE>
: data-room. ( -- )
"== Data heap ==" print nl
data-room data-heap-room memory>struct {
@ -63,14 +66,6 @@ IN: tools.memory
[ misc-room. ]
} cleave ;
: code-room. ( -- )
"== Code heap ==" print nl
code-room mark-sweep-sizes memory>struct mark-sweep-table. ;
PRIVATE>
: room. ( -- ) data-room. nl code-room. ;
<PRIVATE
: heap-stat-step ( obj counts sizes -- )
@ -195,3 +190,105 @@ PRIVATE>
{ "Code heap sweep time:" [ [ code-sweep-time>> ] map-sum nanos>string ] }
{ "Compaction time:" [ [ compaction-time>> ] map-sum nanos>string ] }
} object-table. ;
SINGLETONS: +unoptimized+ +optimized+ +profiling+ +pic+ ;
TUPLE: code-block
{ owner read-only }
{ parameters read-only }
{ relocation read-only }
{ type read-only }
{ size read-only }
{ entry-point read-only } ;
TUPLE: code-blocks { blocks sliced-groups } { cache hashtable } ;
<PRIVATE
: code-block-type ( n -- type )
{ +unoptimized+ +optimized+ +profiling+ +pic+ } nth ;
: <code-block> ( seq -- code-block )
6 firstn-unsafe {
[ ]
[ ]
[ ]
[ code-block-type ]
[ ]
[ tag-bits get shift ]
} spread code-block boa ; inline
: <code-blocks> ( seq -- code-blocks )
6 <sliced-groups> H{ } clone \ code-blocks boa ;
SYMBOL: code-heap-start
SYMBOL: code-heap-end
: in-code-heap? ( address -- ? )
code-heap-start get code-heap-end get between? ;
: (lookup-return-address) ( addr seq -- code-block )
[ entry-point>> <=> ] with search nip ;
HINTS: (lookup-return-address) code-blocks ;
PRIVATE>
M: code-blocks length blocks>> length ; inline
FROM: sequences.private => nth-unsafe ;
M: code-blocks nth-unsafe
[ cache>> ] [ blocks>> ] bi
'[ _ nth-unsafe <code-block> ] cache ; inline
INSTANCE: code-blocks immutable-sequence
: code-blocks ( -- blocks )
(code-blocks) <code-blocks> ;
: with-code-blocks ( quot -- )
[
code-blocks
[ \ code-blocks set ]
[ first entry-point>> code-heap-start set ]
[ last [ entry-point>> ] [ size>> ] bi + code-heap-end set ] tri
call
] with-scope ; inline
: lookup-return-address ( addr -- code-block )
dup in-code-heap?
[ \ code-blocks get (lookup-return-address) ] [ drop f ] if ;
<PRIVATE
: code-block-stats ( code-blocks -- counts sizes )
H{ } clone H{ } clone
[ '[ [ size>> ] [ type>> ] bi [ nip _ inc-at ] [ _ at+ ] 2bi ] each ]
2keep ;
: blocks ( n -- str ) number>string " blocks" append ;
: code-block-table-row ( string type counts sizes -- triple )
[ at 0 or blocks ] [ at 0 or kilobytes ] bi-curry* bi 3array ;
: code-block-table. ( counts sizes -- )
[
{
{ "Optimized code:" +optimized+ }
{ "Unoptimized code:" +unoptimized+ }
{ "Inline caches:" +pic+ }
{ "Profiling stubs:" +profiling+ }
}
] 2dip '[ _ _ code-block-table-row ] { } assoc>map
simple-table. ;
PRIVATE>
: code-room. ( -- )
"== Code heap ==" print nl
code-room mark-sweep-sizes memory>struct mark-sweep-table. nl
code-blocks code-block-stats code-block-table. ;
: room. ( -- )
data-room. nl code-room. ;

View File

@ -1 +1 @@
Heap introspection tools
Data and code heap introspection tools

View File

@ -58,7 +58,7 @@ IN: tools.profiler.tests
[ ] [ [ [ ] compile-call ] profile ] unit-test
[ [ gensym execute ] profile ] [ T{ undefined } = ] must-fail-with
[ [ gensym execute ] profile ] [ undefined? ] must-fail-with
: crash-bug-1 ( -- x ) "hi" <uninterned-word> ;
: crash-bug-2 ( -- ) 100000 [ crash-bug-1 drop ] times ;

View File

@ -10,9 +10,7 @@ IN: tuple-arrays
MACRO: boa-unsafe ( class -- quot ) tuple-layout '[ _ <tuple-boa> ] ;
MACRO: infer-in ( class -- quot ) inputs '[ _ ] ;
: tuple-arity ( class -- quot ) '[ _ boa ] infer-in ; inline
: tuple-arity ( class -- quot ) '[ _ boa ] inputs ; inline
: smart-tuple>array ( tuple class -- array )
'[ [ _ boa ] undo ] output>array ; inline

View File

@ -67,7 +67,8 @@ M: word command-description ( word -- str )
H{ { +nullary+ f } { +listener+ f } { +description+ f } } ;
: define-command ( word hash -- )
[ props>> ] [ default-flags swap assoc-union ] bi* update ;
default-flags swap assoc-union
'[ _ assoc-union ] change-props drop ;
: command-quot ( target command -- quot )
[ 1quotation ] [ +nullary+ word-prop ] bi

View File

@ -119,7 +119,7 @@ M: object completion-string present ;
: method-completion-string ( word -- string )
"method-generic" word-prop present ;
M: method-body completion-string method-completion-string ;
M: method completion-string method-completion-string ;
GENERIC# accept-completion-hook 1 ( item popup -- )

View File

@ -60,7 +60,7 @@ SINGLETON: method-renderer
M: method-renderer column-alignment drop { 0 0 1 } ;
M: method-renderer filled-column drop 1 ;
! Value is a { method-body count } pair
! Value is a { method count } pair
M: method-renderer row-columns
drop [
[ [ definition-icon <image-name> ] [ synopsis ] bi ]

View File

@ -1,10 +1,10 @@
# GraphemeBreakTest-5.1.0.txt
# Date: 2008-03-11, 02:19:22 GMT [MD]
# GraphemeBreakTest-5.2.0.txt
# Date: 2009-09-19, 00:42:12 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Grapheme Break Test
#
@ -15,7 +15,7 @@
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the line_break property* for the sample character
# - (x) the Grapheme_Break property* for the sample character
# - [x] the rule that determines whether there is a break or not
#
# These samples may be extended or changed in the future.

View File

@ -1,10 +1,10 @@
# WordBreakTest-5.1.0.txt
# Date: 2008-03-11, 02:19:28 GMT [MD]
# WordBreakTest-5.2.0.txt
# Date: 2009-09-19, 00:42:16 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
# Default Word Break Test
#
@ -15,7 +15,7 @@
# × wherever there is not.
# <comment> the format can change, but currently it shows:
# - the sample character name
# - (x) the line_break property* for the sample character
# - (x) the Word_Break property* for the sample character
# - [x] the rule that determines whether there is a break or not
#
# These samples may be extended or changed in the future.

291149
basis/unicode/collation/CollationTest_SHIFTED.txt Executable file → Normal file

File diff suppressed because it is too large Load Diff

38395
basis/unicode/collation/allkeys.txt Executable file → Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
# CompositionExclusions-5.1.0.txt
# Date: 2008-03-20, 17:45:00 PDT [KW]
# CompositionExclusions-5.2.0.txt
# Date: 2009-05-22, 12:52:00 PDT [KW]
#
# This file lists the characters for the Composition Exclusion Table
# defined in UAX #15, Unicode Normalization Forms.
@ -7,7 +7,7 @@
# This file is a normative contributory data file in the
# Unicode Character Database.
#
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
#
# For more information, see
@ -170,11 +170,11 @@ FB4E # HEBREW LETTER PE WITH RAFE
# FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
# FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
# FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
# FA30..FA6A [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
# FA30..FA6D [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
# FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
# 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 1030
# Total code points: 1033
# ================================================
# (4) Non-Starter Decompositions

View File

@ -1,10 +1,10 @@
# PropList-5.1.0.txt
# Date: 2008-03-20, 17:55:27 GMT [MD]
# PropList-5.2.0.txt
# Date: 2009-08-22, 04:58:40 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -41,6 +41,7 @@
002D ; Dash # Pd HYPHEN-MINUS
058A ; Dash # Pd ARMENIAN HYPHEN
05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF
1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN
1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN
2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR
2053 ; Dash # Po SWUNG DASH
@ -57,7 +58,7 @@ FE58 ; Dash # Pd SMALL EM DASH
FE63 ; Dash # Pd SMALL HYPHEN-MINUS
FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS
# Total code points: 24
# Total code points: 25
# ================================================
@ -124,6 +125,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION
070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS
07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK
0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD
@ -137,6 +139,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS
1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP
1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU
1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA
1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN
1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
@ -145,11 +148,16 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET
2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK
3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP
A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK
A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK
A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA
A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI
AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI
ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI
FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP
FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK
FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK
@ -161,10 +169,13 @@ FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP
FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA
1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER
103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER
10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN
1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR
10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION
110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
12470..12473 ; Terminal_Punctuation # Po [4] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
# Total code points: 119
# Total code points: 161
# ================================================
@ -347,11 +358,17 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH
0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA
07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN
0901..0902 ; Other_Alphabetic # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF
081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN
0900..0902 ; Other_Alphabetic # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA
093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
094E ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
0955 ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN CANDRA LONG E
0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU
0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
@ -454,6 +471,8 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA
1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E
1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y
109C ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN AITON A
109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI
135F ; Other_Alphabetic # Mn ETHIOPIC COMBINING GEMINATION MARK
1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U
1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U
@ -476,6 +495,16 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
19C8..19C9 ; Other_Alphabetic # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A19..1A1B ; Other_Alphabetic # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA
1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI
1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A
1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT
1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG
1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH
1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG
@ -494,6 +523,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU
1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1CF2 ; Other_Alphabetic # Mc VEDIC SIGN ARDHAVISARGA
24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I
@ -504,6 +534,14 @@ A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAU
A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O
A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H
A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN
A9B3 ; Other_Alphabetic # Mn JAVANESE SIGN CECAK TELU
A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
A9BC ; Other_Alphabetic # Mn JAVANESE VOWEL SIGN PEPET
A9BD..A9BF ; Other_Alphabetic # Mc [3] JAVANESE CONSONANT SIGN KERET..JAVANESE CONSONANT SIGN CAKRA
AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
@ -512,12 +550,25 @@ AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONA
AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG
AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M
AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H
AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG
AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM
ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP
ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP
ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R
10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA
110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
# Total code points: 663
# Total code points: 759
# ================================================
@ -526,14 +577,15 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE
3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
3400..4DB5 ; Ideographic # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FC3 ; Ideographic # Lo [20932] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FC3
4E00..9FCB ; Ideographic # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
F900..FA2D ; Ideographic # Lo [302] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA2D
FA30..FA6A ; Ideographic # Lo [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
FA30..FA6D ; Ideographic # Lo [62] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
20000..2A6D6 ; Ideographic # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Ideographic # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
# Total code points: 71248
# Total code points: 75408
# ================================================
@ -577,6 +629,7 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN
07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH
093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA
094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA
0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
@ -611,25 +664,35 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3
17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT
17DD ; Diacritic # Mn KHMER SIGN ATTHACAN
1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I
1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1B34 ; Diacritic # Mn BALINESE SIGN REREKAN
1B44 ; Diacritic # Mc BALINESE ADEG ADEG
1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH
1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA
1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Diacritic # Mn VEDIC SIGN TIRYAK
1D2C..1D61 ; Diacritic # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
1D62..1D6A ; Diacritic # L& [9] LATIN SUBSCRIPT SMALL LETTER I..GREEK SUBSCRIPT SMALL LETTER CHI
1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW
1DFE..1DFF ; Diacritic # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1DFD..1DFF ; Diacritic # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1FBD ; Diacritic # Sk GREEK KORONIS
1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA
1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA
2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
2E2F ; Diacritic # Lm VERTICAL TILDE
302A..302F ; Diacritic # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
@ -638,13 +701,24 @@ FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COM
A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET
A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
A67F ; Diacritic # Lm CYRILLIC PAYEROK
A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE
A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT
A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA
A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU
A92E ; Diacritic # Po KAYAH LI SIGN CWI
A953 ; Diacritic # Mc REJANG VIRAMA
A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU
A9C0 ; Diacritic # Mc JAVANESE PANGKON
AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE
AABF ; Diacritic # Mn TAI VIET TONE MAI EK
AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG
AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO
AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG
ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK
ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK
FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FE20..FE26 ; Diacritic # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT
@ -652,13 +726,14 @@ FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT
FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
FFE3 ; Diacritic # Sk FULLWIDTH MACRON
110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE
1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE
1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO
# Total code points: 565
# Total code points: 639
# ================================================
@ -669,6 +744,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK
0EC6 ; Extender # Lm LAO KO LA
1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN
1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK
1C36 ; Extender # Mn LEPCHA SIGN RAN
1C7B ; Extender # Lm OL CHIKI RELAA
3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK
@ -677,9 +753,12 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON
30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
A015 ; Extender # Lm YI SYLLABLE WU
A60C ; Extender # Lm VAI SYLLABLE LENGTHENER
A9CF ; Extender # Lm JAVANESE PANGRANGKEP
AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION
AADD ; Extender # Lm TAI VIET SYMBOL SAM
FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
# Total code points: 24
# Total code points: 28
# ================================================
@ -774,7 +853,7 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND
# ================================================
3400..4DB5 ; Unified_Ideograph # Lo [6582] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DB5
4E00..9FC3 ; Unified_Ideograph # Lo [20932] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FC3
4E00..9FCB ; Unified_Ideograph # Lo [20940] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FCB
FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11
FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
@ -783,8 +862,9 @@ FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21
FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24
FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29
20000..2A6D6 ; Unified_Ideograph # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6
2A700..2B734 ; Unified_Ideograph # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734
# Total code points: 70237
# Total code points: 74394
# ================================================
@ -803,14 +883,17 @@ E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] <reserved-E01F0>.
# ================================================
0340..0341 ; Deprecated # Mn [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
17A3 ; Deprecated # Lo KHMER INDEPENDENT VOWEL QAQ
17D3 ; Deprecated # Mn KHMER SIGN BATHAMASAT
0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR
0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL
17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA
206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET
232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET
E0001 ; Deprecated # Cf LANGUAGE TAG
E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
# Total code points: 107
# Total code points: 110
# ================================================
@ -829,7 +912,7 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL
1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW
1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW
2071 ; Soft_Dotted # L& SUPERSCRIPT LATIN SMALL LETTER I
2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I
2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J
2C7C ; Soft_Dotted # L& LATIN SUBSCRIPT SMALL LETTER J
1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J
@ -852,8 +935,11 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI
0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O
AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA
AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY
# Total code points: 10
# Total code points: 15
# ================================================
@ -899,19 +985,25 @@ E0020..E007F ; Deprecated # Cf [96] TAG SPACE..CANCEL TAG
2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
2E2E ; STerm # Po REVERSED QUESTION MARK
3002 ; STerm # Po IDEOGRAPHIC FULL STOP
A4FF ; STerm # Po LISU PUNCTUATION FULL STOP
A60E..A60F ; STerm # Po [2] VAI FULL STOP..VAI QUESTION MARK
A6F3 ; STerm # Po BAMUM FULL STOP
A6F7 ; STerm # Po BAMUM QUESTION MARK
A876..A877 ; STerm # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD
A8CE..A8CF ; STerm # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
A92F ; STerm # Po KAYAH LI SIGN SHYA
A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI
AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA
ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI
FE52 ; STerm # Po SMALL FULL STOP
FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK
FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK
FF0E ; STerm # Po FULLWIDTH FULL STOP
FF1F ; STerm # Po FULLWIDTH QUESTION MARK
FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP
110BE..110C1 ; STerm # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA
# Total code points: 56
# Total code points: 66
# ================================================
@ -1024,8 +1116,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM
23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE
23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET
23E2..23E7 ; Pattern_Syntax # So [6] WHITE TRAPEZIUM..ELECTRICAL INTERSECTION
23E8..23FF ; Pattern_Syntax # Cn [24] <reserved-23E8>..<reserved-23FF>
23E2..23E8 ; Pattern_Syntax # So [7] WHITE TRAPEZIUM..DECIMAL EXPONENT SYMBOL
23E9..23FF ; Pattern_Syntax # Cn [23] <reserved-23E9>..<reserved-23FF>
2400..2426 ; Pattern_Syntax # So [39] SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
2427..243F ; Pattern_Syntax # Cn [25] <reserved-2427>..<reserved-243F>
2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH
@ -1038,12 +1130,14 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE
2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN
266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN
2670..269D ; Pattern_Syntax # So [46] WEST SYRIAC CROSS..OUTLINED WHITE STAR
269E..269F ; Pattern_Syntax # Cn [2] <reserved-269E>..<reserved-269F>
26A0..26BC ; Pattern_Syntax # So [29] WARNING SIGN..SESQUIQUADRATE
26BD..26BF ; Pattern_Syntax # Cn [3] <reserved-26BD>..<reserved-26BF>
26C0..26C3 ; Pattern_Syntax # So [4] WHITE DRAUGHTS MAN..BLACK DRAUGHTS KING
26C4..2700 ; Pattern_Syntax # Cn [61] <reserved-26C4>..<reserved-2700>
2670..26CD ; Pattern_Syntax # So [94] WEST SYRIAC CROSS..DISABLED CAR
26CE ; Pattern_Syntax # Cn <reserved-26CE>
26CF..26E1 ; Pattern_Syntax # So [19] PICK..RESTRICTED LEFT ENTRY-2
26E2 ; Pattern_Syntax # Cn <reserved-26E2>
26E3 ; Pattern_Syntax # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
26E4..26E7 ; Pattern_Syntax # Cn [4] <reserved-26E4>..<reserved-26E7>
26E8..26FF ; Pattern_Syntax # So [24] BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
2700 ; Pattern_Syntax # Cn <reserved-2700>
2701..2704 ; Pattern_Syntax # So [4] UPPER BLADE SCISSORS..WHITE SCISSORS
2705 ; Pattern_Syntax # Cn <reserved-2705>
2706..2709 ; Pattern_Syntax # So [4] TELEPHONE LOCATION SIGN..ENVELOPE
@ -1056,9 +1150,7 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
274E ; Pattern_Syntax # Cn <reserved-274E>
274F..2752 ; Pattern_Syntax # So [4] LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
2753..2755 ; Pattern_Syntax # Cn [3] <reserved-2753>..<reserved-2755>
2756 ; Pattern_Syntax # So BLACK DIAMOND MINUS WHITE X
2757 ; Pattern_Syntax # Cn <reserved-2757>
2758..275E ; Pattern_Syntax # So [7] LIGHT VERTICAL BAR..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
2756..275E ; Pattern_Syntax # So [9] BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
275F..2760 ; Pattern_Syntax # Cn [2] <reserved-275F>..<reserved-2760>
2761..2767 ; Pattern_Syntax # So [7] CURVED STEM PARAGRAPH SIGN ORNAMENT..ROTATED FLORAL HEART BULLET
2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT
@ -1138,8 +1230,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW
2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
2B4D..2B4F ; Pattern_Syntax # Cn [3] <reserved-2B4D>..<reserved-2B4F>
2B50..2B54 ; Pattern_Syntax # So [5] WHITE MEDIUM STAR..WHITE RIGHT-POINTING PENTAGON
2B55..2BFF ; Pattern_Syntax # Cn [171] <reserved-2B55>..<reserved-2BFF>
2B50..2B59 ; Pattern_Syntax # So [10] WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
2B5A..2BFF ; Pattern_Syntax # Cn [166] <reserved-2B5A>..<reserved-2BFF>
2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER
2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET
2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET
@ -1171,8 +1263,8 @@ E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION S
2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS
2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK
2E2F ; Pattern_Syntax # Lm VERTICAL TILDE
2E30 ; Pattern_Syntax # Po RING POINT
2E31..2E7F ; Pattern_Syntax # Cn [79] <reserved-2E31>..<reserved-2E7F>
2E30..2E31 ; Pattern_Syntax # Po [2] RING POINT..WORD SEPARATOR MIDDLE DOT
2E32..2E7F ; Pattern_Syntax # Cn [78] <reserved-2E32>..<reserved-2E7F>
3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK
3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET
3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,10 @@
# WordBreakProperty-5.1.0.txt
# Date: 2008-03-20, 17:55:36 GMT [MD]
# WordBreakProperty-5.2.0.txt
# Date: 2009-07-12, 04:17:35 GMT [MD]
#
# Unicode Character Database
# Copyright (c) 1991-2008 Unicode, Inc.
# Copyright (c) 1991-2009 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see UCD.html
# For documentation, see http://www.unicode.org/reports/tr44/
# ================================================
@ -58,14 +58,19 @@
0730..074A ; Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH
07A6..07B0 ; Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN
07EB..07F3 ; Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
0901..0902 ; Extend # Mn [2] DEVANAGARI SIGN CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0816..0819 ; Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH
081B..0823 ; Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0900..0902 ; Extend # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA
0903 ; Extend # Mc DEVANAGARI SIGN VISARGA
093C ; Extend # Mn DEVANAGARI SIGN NUKTA
093E..0940 ; Extend # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II
0941..0948 ; Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI
0949..094C ; Extend # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU
094D ; Extend # Mn DEVANAGARI SIGN VIRAMA
0951..0954 ; Extend # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT
094E ; Extend # Mc DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
0951..0955 ; Extend # Mn [5] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN CANDRA LONG E
0962..0963 ; Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL
0981 ; Extend # Mn BENGALI SIGN CANDRABINDU
0982..0983 ; Extend # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA
@ -190,6 +195,8 @@
1087..108C ; Extend # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3
108D ; Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE
108F ; Extend # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5
109A..109C ; Extend # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A
109D ; Extend # Mn MYANMAR VOWEL SIGN AITON AI
135F ; Extend # Mn ETHIOPIC COMBINING GEMINATION MARK
1712..1714 ; Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA
1732..1734 ; Extend # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD
@ -216,6 +223,18 @@
19C8..19C9 ; Extend # Mc [2] NEW TAI LUE TONE MARK-1..NEW TAI LUE TONE MARK-2
1A17..1A18 ; Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U
1A19..1A1B ; Extend # Mc [3] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN AE
1A55 ; Extend # Mc TAI THAM CONSONANT SIGN MEDIAL RA
1A56 ; Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA
1A57 ; Extend # Mc TAI THAM CONSONANT SIGN LA TANG LAI
1A58..1A5E ; Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA
1A60 ; Extend # Mn TAI THAM SIGN SAKOT
1A61 ; Extend # Mc TAI THAM VOWEL SIGN A
1A62 ; Extend # Mn TAI THAM VOWEL SIGN MAI SAT
1A63..1A64 ; Extend # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA
1A65..1A6C ; Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW
1A6D..1A72 ; Extend # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI
1A73..1A7C ; Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN
1A7F ; Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT
1B00..1B03 ; Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG
1B04 ; Extend # Mc BALINESE SIGN BISAH
1B34 ; Extend # Mn BALINESE SIGN REREKAN
@ -238,20 +257,28 @@
1C2C..1C33 ; Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T
1C34..1C35 ; Extend # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG
1C36..1C37 ; Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA
1CD0..1CD2 ; Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
1CD4..1CE0 ; Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA
1CE1 ; Extend # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA
1CE2..1CE8 ; Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL
1CED ; Extend # Mn VEDIC SIGN TIRYAK
1CF2 ; Extend # Mc VEDIC SIGN ARDHAVISARGA
1DC0..1DE6 ; Extend # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
1DFE..1DFF ; Extend # Mn [2] COMBINING LEFT ARROWHEAD ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
1DFD..1DFF ; Extend # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
200C..200D ; Extend # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
20D0..20DC ; Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE
20DD..20E0 ; Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH
20E1 ; Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE
20E2..20E4 ; Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE
20E5..20F0 ; Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE
2CEF..2CF1 ; Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS
2DE0..2DFF ; Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
302A..302F ; Extend # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK
3099..309A ; Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
A66F ; Extend # Mn COMBINING CYRILLIC VZMET
A670..A672 ; Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN
A67C..A67D ; Extend # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
A802 ; Extend # Mn SYLOTI NAGRI SIGN DVISVARA
A806 ; Extend # Mn SYLOTI NAGRI SIGN HASANTA
A80B ; Extend # Mn SYLOTI NAGRI SIGN ANUSVARA
@ -261,9 +288,18 @@ A827 ; Extend # Mc SYLOTI NAGRI VOWEL SIGN OO
A880..A881 ; Extend # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA
A8B4..A8C3 ; Extend # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU
A8C4 ; Extend # Mn SAURASHTRA SIGN VIRAMA
A8E0..A8F1 ; Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA
A926..A92D ; Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU
A947..A951 ; Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R
A952..A953 ; Extend # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA
A980..A982 ; Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR
A983 ; Extend # Mc JAVANESE SIGN WIGNYAN
A9B3 ; Extend # Mn JAVANESE SIGN CECAK TELU
A9B4..A9B5 ; Extend # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG
A9B6..A9B9 ; Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT
A9BA..A9BB ; Extend # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE
A9BC ; Extend # Mn JAVANESE VOWEL SIGN PEPET
A9BD..A9C0 ; Extend # Mc [4] JAVANESE CONSONANT SIGN KERET..JAVANESE PANGKON
AA29..AA2E ; Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE
AA2F..AA30 ; Extend # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI
AA31..AA32 ; Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE
@ -272,6 +308,19 @@ AA35..AA36 ; Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA
AA43 ; Extend # Mn CHAM CONSONANT SIGN FINAL NG
AA4C ; Extend # Mn CHAM CONSONANT SIGN FINAL M
AA4D ; Extend # Mc CHAM CONSONANT SIGN FINAL H
AA7B ; Extend # Mc MYANMAR SIGN PAO KAREN TONE
AAB0 ; Extend # Mn TAI VIET MAI KANG
AAB2..AAB4 ; Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U
AAB7..AAB8 ; Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA
AABE..AABF ; Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK
AAC1 ; Extend # Mn TAI VIET TONE MAI THO
ABE3..ABE4 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
ABE5 ; Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP
ABE6..ABE7 ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP
ABE8 ; Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP
ABE9..ABEA ; Extend # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG
ABEC ; Extend # Mc MEETEI MAYEK LUM IYEK
ABED ; Extend # Mn MEETEI MAYEK APUN IYEK
FB1E ; Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA
FE00..FE0F ; Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16
FE20..FE26 ; Extend # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
@ -282,6 +331,12 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10A0C..10A0F ; Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10A38..10A3A ; Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
10A3F ; Extend # Mn KHAROSHTHI VIRAMA
11080..11081 ; Extend # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA
11082 ; Extend # Mc KAITHI SIGN VISARGA
110B0..110B2 ; Extend # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II
110B3..110B6 ; Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI
110B7..110B8 ; Extend # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU
110B9..110BA ; Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA
1D165..1D166 ; Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM
1D167..1D169 ; Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3
1D16D..1D172 ; Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5
@ -291,7 +346,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
# Total code points: 1285
# Total code points: 1455
# ================================================
@ -300,13 +355,13 @@ E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
06DD ; Format # Cf ARABIC END OF AYAH
070F ; Format # Cf SYRIAC ABBREVIATION MARK
17B4..17B5 ; Format # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
200B ; Format # Cf ZERO WIDTH SPACE
200E..200F ; Format # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK
202A..202E ; Format # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
2060..2064 ; Format # Cf [5] WORD JOINER..INVISIBLE PLUS
206A..206F ; Format # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES
FEFF ; Format # Cf ZERO WIDTH NO-BREAK SPACE
FFF9..FFFB ; Format # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR
110BD ; Format # Cf KAITHI NUMBER SIGN
1D173..1D17A ; Format # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE
E0001 ; Format # Cf LANGUAGE TAG
E0020..E007F ; Format # Cf [96] TAG SPACE..CANCEL TAG
@ -362,7 +417,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
038E..03A1 ; ALetter # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO
03A3..03F5 ; ALetter # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL
03F7..0481 ; ALetter # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA
048A..0523 ; ALetter # L& [154] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK
048A..0525 ; ALetter # L& [156] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER PE WITH DESCENDER
0531..0556 ; ALetter # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
0559 ; ALetter # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING
0561..0587 ; ALetter # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
@ -386,13 +441,17 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
07CA..07EA ; ALetter # Lo [33] NKO LETTER A..NKO LETTER JONA RA
07F4..07F5 ; ALetter # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE
07FA ; ALetter # Lm NKO LAJANYALAN
0800..0815 ; ALetter # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF
081A ; ALetter # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT
0824 ; ALetter # Lm SAMARITAN MODIFIER LETTER SHORT A
0828 ; ALetter # Lm SAMARITAN MODIFIER LETTER I
0904..0939 ; ALetter # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA
093D ; ALetter # Lo DEVANAGARI SIGN AVAGRAHA
0950 ; ALetter # Lo DEVANAGARI OM
0958..0961 ; ALetter # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL
0971 ; ALetter # Lm DEVANAGARI SIGN HIGH SPACING DOT
0972 ; ALetter # Lo DEVANAGARI LETTER CANDRA A
097B..097F ; ALetter # Lo [5] DEVANAGARI LETTER GGA..DEVANAGARI LETTER BBA
0979..097F ; ALetter # Lo [7] DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
0985..098C ; ALetter # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L
098F..0990 ; ALetter # Lo [2] BENGALI LETTER E..BENGALI LETTER AI
0993..09A8 ; ALetter # Lo [22] BENGALI LETTER O..BENGALI LETTER NA
@ -479,10 +538,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
10A0..10C5 ; ALetter # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE
10D0..10FA ; ALetter # Lo [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
10FC ; ALetter # Lm MODIFIER LETTER GEORGIAN NAR
1100..1159 ; ALetter # Lo [90] HANGUL CHOSEONG KIYEOK..HANGUL CHOSEONG YEORINHIEUH
115F..11A2 ; ALetter # Lo [68] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG SSANGARAEA
11A8..11F9 ; ALetter # Lo [82] HANGUL JONGSEONG KIYEOK..HANGUL JONGSEONG YEORINHIEUH
1200..1248 ; ALetter # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
1100..1248 ; ALetter # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA
124A..124D ; ALetter # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
1250..1256 ; ALetter # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
1258 ; ALetter # Lo ETHIOPIC SYLLABLE QHWA
@ -501,7 +557,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1380..138F ; ALetter # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
13A0..13F4 ; ALetter # Lo [85] CHEROKEE LETTER A..CHEROKEE LETTER YV
1401..166C ; ALetter # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
166F..1676 ; ALetter # Lo [8] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS NNGAA
166F..167F ; ALetter # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
1681..169A ; ALetter # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH
16A0..16EA ; ALetter # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
16EE..16F0 ; ALetter # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL
@ -516,6 +572,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1844..1877 ; ALetter # Lo [52] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER MANCHU ZHA
1880..18A8 ; ALetter # Lo [41] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI BHA
18AA ; ALetter # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA
18B0..18F5 ; ALetter # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
1900..191C ; ALetter # Lo [29] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
1A00..1A16 ; ALetter # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA
1B05..1B33 ; ALetter # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA
@ -526,6 +583,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1CE9..1CEC ; ALetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
1CEE..1CF1 ; ALetter # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA
1D00..1D2B ; ALetter # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
1D2C..1D61 ; ALetter # Lm [54] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL CHI
1D62..1D77 ; ALetter # L& [22] LATIN SUBSCRIPT SMALL LETTER I..LATIN SMALL LETTER TURNED G
@ -551,8 +610,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1FE0..1FEC ; ALetter # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA
1FF2..1FF4 ; ALetter # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1FF6..1FFC ; ALetter # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
2071 ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER I
207F ; ALetter # L& SUPERSCRIPT LATIN SMALL LETTER N
2071 ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER I
207F ; ALetter # Lm SUPERSCRIPT LATIN SMALL LETTER N
2090..2094 ; ALetter # Lm [5] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
2102 ; ALetter # L& DOUBLE-STRUCK CAPITAL C
2107 ; ALetter # L& EULER CONSTANT
@ -575,10 +634,10 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
24B6..24E9 ; ALetter # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z
2C00..2C2E ; ALetter # L& [47] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
2C30..2C5E ; ALetter # L& [47] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
2C60..2C6F ; ALetter # L& [16] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN CAPITAL LETTER TURNED A
2C71..2C7C ; ALetter # L& [12] LATIN SMALL LETTER V WITH RIGHT HOOK..LATIN SUBSCRIPT SMALL LETTER J
2C60..2C7C ; ALetter # L& [29] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN SUBSCRIPT SMALL LETTER J
2C7D ; ALetter # Lm MODIFIER LETTER CAPITAL V
2C80..2CE4 ; ALetter # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI
2C7E..2CE4 ; ALetter # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI
2CEB..2CEE ; ALetter # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA
2D00..2D25 ; ALetter # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
2D30..2D65 ; ALetter # Lo [54] TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
2D6F ; ALetter # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK
@ -601,6 +660,8 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
A000..A014 ; ALetter # Lo [21] YI SYLLABLE IT..YI SYLLABLE E
A015 ; ALetter # Lm YI SYLLABLE WU
A016..A48C ; ALetter # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR
A4D0..A4F7 ; ALetter # Lo [40] LISU LETTER BA..LISU LETTER OE
A4F8..A4FD ; ALetter # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU
A500..A60B ; ALetter # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG
A60C ; ALetter # Lm VAI SYLLABLE LENGTHENER
A610..A61F ; ALetter # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG
@ -610,6 +671,8 @@ A662..A66D ; ALetter # L& [12] CYRILLIC CAPITAL LETTER SOFT DE..CYRILLIC SMA
A66E ; ALetter # Lo CYRILLIC LETTER MULTIOCULAR O
A67F ; ALetter # Lm CYRILLIC PAYEROK
A680..A697 ; ALetter # L& [24] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER SHWE
A6A0..A6E5 ; ALetter # Lo [70] BAMUM LETTER A..BAMUM LETTER KI
A6E6..A6EF ; ALetter # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM
A717..A71F ; ALetter # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
A722..A76F ; ALetter # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON
A770 ; ALetter # Lm MODIFIER LETTER US
@ -622,12 +685,20 @@ A807..A80A ; ALetter # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER G
A80C..A822 ; ALetter # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO
A840..A873 ; ALetter # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
A882..A8B3 ; ALetter # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA
A8F2..A8F7 ; ALetter # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
A8FB ; ALetter # Lo DEVANAGARI HEADSTROKE
A90A..A925 ; ALetter # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO
A930..A946 ; ALetter # Lo [23] REJANG LETTER KA..REJANG LETTER A
A960..A97C ; ALetter # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH
A984..A9B2 ; ALetter # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA
A9CF ; ALetter # Lm JAVANESE PANGRANGKEP
AA00..AA28 ; ALetter # Lo [41] CHAM LETTER A..CHAM LETTER HA
AA40..AA42 ; ALetter # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG
AA44..AA4B ; ALetter # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS
ABC0..ABE2 ; ALetter # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
AC00..D7A3 ; ALetter # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH
D7B0..D7C6 ; ALetter # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
D7CB..D7FB ; ALetter # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
FB00..FB06 ; ALetter # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
FB13..FB17 ; ALetter # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
FB1D ; ALetter # Lo HEBREW LETTER YOD WITH HIRIQ
@ -677,15 +748,22 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1080A..10835 ; ALetter # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
10837..10838 ; ALetter # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
1083C ; ALetter # Lo CYPRIOT SYLLABLE ZA
1083F ; ALetter # Lo CYPRIOT SYLLABLE ZO
1083F..10855 ; ALetter # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW
10900..10915 ; ALetter # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
10920..10939 ; ALetter # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C
10A00 ; ALetter # Lo KHAROSHTHI LETTER A
10A10..10A13 ; ALetter # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA
10A15..10A17 ; ALetter # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
10A19..10A33 ; ALetter # Lo [27] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
10A60..10A7C ; ALetter # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
10B00..10B35 ; ALetter # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE
10B40..10B55 ; ALetter # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
10B60..10B72 ; ALetter # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
10C00..10C48 ; ALetter # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
11083..110AF ; ALetter # Lo [45] KAITHI LETTER A..KAITHI LETTER HA
12000..1236E ; ALetter # Lo [879] CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
12400..12462 ; ALetter # Nl [99] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
13000..1342E ; ALetter # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
1D400..1D454 ; ALetter # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
1D456..1D49C ; ALetter # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
1D49E..1D49F ; ALetter # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
@ -717,7 +795,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1D7AA..1D7C2 ; ALetter # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
1D7C4..1D7CB ; ALetter # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
# Total code points: 21903
# Total code points: 23694
# ================================================
@ -788,7 +866,9 @@ FF0E ; MidNumLet # Po FULLWIDTH FULL STOP
17E0..17E9 ; Numeric # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE
1810..1819 ; Numeric # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
1946..194F ; Numeric # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE
19D0..19D9 ; Numeric # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE
19D0..19DA ; Numeric # Nd [11] NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
1A80..1A89 ; Numeric # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE
1A90..1A99 ; Numeric # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
1B50..1B59 ; Numeric # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE
1BB0..1BB9 ; Numeric # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE
1C40..1C49 ; Numeric # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
@ -796,11 +876,13 @@ FF0E ; MidNumLet # Po FULLWIDTH FULL STOP
A620..A629 ; Numeric # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE
A8D0..A8D9 ; Numeric # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
A900..A909 ; Numeric # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE
A9D0..A9D9 ; Numeric # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE
AA50..AA59 ; Numeric # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE
ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
1D7CE..1D7FF ; Numeric # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
# Total code points: 361
# Total code points: 402
# ================================================

View File

@ -1 +1 @@
Unicode 5.1 support
Unicode 5.2 support

View File

@ -2,7 +2,7 @@ USING: help.markup help.syntax strings ;
IN: unicode
ARTICLE: "unicode" "Unicode support"
"The " { $vocab-link "unicode" } " vocabulary and its sub-vocabularies implement support for the Unicode 5.1 character set."
"The " { $vocab-link "unicode" } " vocabulary and its sub-vocabularies implement support for the Unicode 5.2 character set."
$nl
"The Unicode character set contains most of the world's writing systems. Unicode is intended as a replacement for, and is a superset of, such legacy character sets as ASCII, Latin1, MacRoman, and so on. Unicode characters are called " { $emphasis "code points" } "; Factor's " { $link "strings" } " are sequences of code points."
$nl

View File

@ -67,7 +67,7 @@ DEFER: finalize-rule-set
[ resolve-delegate ] each-rule ;
: ?update ( keyword-map/f keyword-map -- keyword-map )
over [ dupd update ] [ nip clone ] if ;
over [ assoc-union! ] [ nip clone ] if ;
: import-keywords ( parent child -- )
over [ [ keywords>> ] bi@ ?update ] dip (>>keywords) ;

View File

@ -8,7 +8,7 @@ f <keyword-map> dup "k" set
{ "int" KEYWORD1 }
{ "void" KEYWORD2 }
{ "size_t" KEYWORD3 }
} update
} assoc-union! drop
[ 3 ] [ "k" get assoc-size ] unit-test
[ KEYWORD1 ] [ "int" "k" get at ] unit-test
@ -21,7 +21,7 @@ t <keyword-map> dup "k" set
{ "Foo" KEYWORD1 }
{ "bbar" KEYWORD2 }
{ "BAZ" KEYWORD3 }
} update
} assoc-union! drop
[ KEYWORD1 ] [ "fOo" "k" get at ] unit-test

View File

@ -80,7 +80,7 @@ ARTICLE: "assocs-lookup" "Lookup and querying of assocs"
{ $see-also at* assoc-size } ;
ARTICLE: "assocs-values" "Transposed assoc operations"
"default Most assoc words take a key and find the corresponding value. The following words take a value and find the corresponding key:"
"Most assoc words take a key and find the corresponding value. The following words take a value and find the corresponding key:"
{ $subsections
value-at
value-at*
@ -93,12 +93,16 @@ ARTICLE: "assocs-sets" "Set-theoretic operations on assocs"
{ $subsections
assoc-subset?
assoc-intersect
update
assoc-union
assoc-diff
substitute
extract-keys
}
"Destructive operations:"
{ $subsections
assoc-union!
assoc-diff!
}
{ $see-also key? assoc-any? assoc-all? "sets" } ;
ARTICLE: "assocs-mutation" "Storing keys and values in assocs"
@ -135,17 +139,21 @@ $nl
assoc-map
assoc-filter
assoc-filter-as
assoc-partition
assoc-any?
assoc-all?
}
"Additional combinators:"
"Mapping between assocs and sequences:"
{ $subsections
assoc-partition
cache
2cache
map>assoc
assoc>map
assoc-map-as
}
"Destructive combinators:"
{ $subsections
assoc-filter!
cache
2cache
} ;
ARTICLE: "assocs" "Associative mapping operations"
@ -260,7 +268,12 @@ HELP: assoc-filter-as
{ $values { "assoc" assoc } { "quot" { $quotation "( key value -- ? )" } } { "exemplar" assoc } { "subassoc" "a new assoc" } }
{ $description "Outputs an assoc of the same type as " { $snippet "exemplar" } " consisting of all entries for which the predicate quotation yields true." } ;
{ assoc-filter assoc-filter-as } related-words
HELP: assoc-filter!
{ $values { "assoc" assoc } { "quot" { $quotation "( key value -- ? )" } } }
{ $description "Removes all entries for which the predicate quotation yields true." }
{ $side-effects "assoc" } ;
{ assoc-filter assoc-filter-as assoc-filter! } related-words
HELP: assoc-partition
{ $values
@ -333,7 +346,7 @@ HELP: assoc-intersect
{ $description "Outputs an assoc consisting of all entries from " { $snippet "assoc2" } " such that the key is also present in " { $snippet "assoc1" } "." }
{ $notes "The values of the keys in " { $snippet "assoc1" } " are disregarded, so this word is usually used for set-theoretic calculations where the assoc in question either has dummy sentinels as values, or the values equal the keys." } ;
HELP: update
HELP: assoc-union!
{ $values { "assoc1" assoc } { "assoc2" assoc } }
{ $description "Adds all entries from " { $snippet "assoc2" } " to " { $snippet "assoc1" } "." }
{ $side-effects "assoc1" } ;
@ -347,6 +360,11 @@ HELP: assoc-diff
{ $description "Outputs an assoc consisting of all entries from " { $snippet "assoc1" } " whose key is not contained in " { $snippet "assoc2" } "." }
;
HELP: assoc-diff!
{ $values { "assoc1" assoc } { "assoc2" assoc } }
{ $description "Removes all entries from " { $snippet "assoc1" } " whose key is contained in " { $snippet "assoc2" } "." }
{ $side-effects "assoc1" } ;
HELP: substitute
{ $values { "seq" sequence } { "assoc" assoc } { "newseq" sequence } }
{ $description "Creates a new sequence where elements of " { $snippet "seq" } " which appear as keys in " { $snippet "assoc" } " are replaced by the corresponding values, and all other elements are unchanged." } ;

View File

@ -32,11 +32,24 @@ IN: assocs.tests
[ f ] [ H{ { 1 2 } { 2 2 } } [ = ] assoc-all? ] unit-test
[ H{ } ] [ H{ { t f } { f t } } [ 2drop f ] assoc-filter ] unit-test
[ H{ } ] [ H{ { t f } { f t } } clone dup [ 2drop f ] assoc-filter! drop ] unit-test
[ H{ } ] [ H{ { t f } { f t } } clone [ 2drop f ] assoc-filter! ] unit-test
[ H{ { 3 4 } { 4 5 } { 6 7 } } ] [
H{ { 1 2 } { 2 3 } { 3 4 } { 4 5 } { 6 7 } }
[ drop 3 >= ] assoc-filter
] unit-test
[ H{ { 3 4 } { 4 5 } { 6 7 } } ] [
H{ { 1 2 } { 2 3 } { 3 4 } { 4 5 } { 6 7 } } clone
[ drop 3 >= ] assoc-filter!
] unit-test
[ H{ { 3 4 } { 4 5 } { 6 7 } } ] [
H{ { 1 2 } { 2 3 } { 3 4 } { 4 5 } { 6 7 } } clone dup
[ drop 3 >= ] assoc-filter! drop
] unit-test
[ 21 ] [
0 H{
{ 1 2 }
@ -69,6 +82,20 @@ H{ } clone "cache-test" set
assoc-union
] unit-test
[
H{ { 1 2 } { 2 3 } { 6 5 } }
] [
H{ { 2 4 } { 6 5 } } clone dup H{ { 1 2 } { 2 3 } }
assoc-union! drop
] unit-test
[
H{ { 1 2 } { 2 3 } { 6 5 } }
] [
H{ { 2 4 } { 6 5 } } clone H{ { 1 2 } { 2 3 } }
assoc-union!
] unit-test
[ H{ { 1 2 } { 2 3 } } t ] [
f H{ { 1 2 } { 2 3 } } [ assoc-union ] 2keep swap assoc-union dupd =
] unit-test
@ -79,6 +106,24 @@ H{ } clone "cache-test" set
H{ { 1 f } } H{ { 1 f } } assoc-intersect
] unit-test
[
H{ { 3 4 } }
] [
H{ { 1 2 } { 3 4 } } H{ { 1 3 } } assoc-diff
] unit-test
[
H{ { 3 4 } }
] [
H{ { 1 2 } { 3 4 } } clone dup H{ { 1 3 } } assoc-diff! drop
] unit-test
[
H{ { 3 4 } }
] [
H{ { 1 2 } { 3 4 } } clone H{ { 1 3 } } assoc-diff!
] unit-test
[ H{ { "hi" 2 } { 3 4 } } ]
[ "hi" 1 H{ { 1 2 } { 3 4 } } clone [ rename-at ] keep ]
unit-test

View File

@ -72,6 +72,12 @@ PRIVATE>
: assoc-filter ( assoc quot -- subassoc )
over assoc-filter-as ; inline
: assoc-filter! ( assoc quot -- assoc )
[
over [ [ [ drop ] 2bi ] dip [ delete-at ] 2curry unless ] 2curry
assoc-each
] [ drop ] 2bi ; inline
: assoc-partition ( assoc quot -- true-assoc false-assoc )
[ (assoc-each) partition ] [ drop ] 2bi
[ assoc-like ] curry bi@ ; inline
@ -119,21 +125,27 @@ M: assoc assoc-clone-like ( assoc exemplar -- newassoc )
: assoc-intersect ( assoc1 assoc2 -- intersection )
swap [ nip key? ] curry assoc-filter ;
: update ( assoc1 assoc2 -- )
swap [ set-at ] with-assoc assoc-each ;
: assoc-union! ( assoc1 assoc2 -- assoc1 )
over [ set-at ] with-assoc assoc-each ;
: assoc-union ( assoc1 assoc2 -- union )
[ [ [ assoc-size ] bi@ + ] [ drop ] 2bi new-assoc ] 2keep
[ dupd update ] bi@ ;
[ assoc-union! ] bi@ ;
: assoc-combine ( seq -- union )
H{ } clone [ dupd update ] reduce ;
H{ } clone [ assoc-union! ] reduce ;
: assoc-refine ( seq -- assoc )
[ f ] [ [ ] [ assoc-intersect ] map-reduce ] if-empty ;
: assoc-differ ( key -- quot )
[ nip key? not ] curry ; inline
: assoc-diff ( assoc1 assoc2 -- diff )
[ nip key? not ] curry assoc-filter ;
assoc-differ assoc-filter ;
: assoc-diff! ( assoc1 assoc2 -- assoc1 )
assoc-differ assoc-filter! ;
: substitute ( seq assoc -- newseq )
substituter map ;

View File

@ -422,7 +422,7 @@ tuple
{ "resize-byte-array" "byte-arrays" "primitive_resize_byte_array" (( n byte-array -- newbyte-array )) }
{ "<tuple-boa>" "classes.tuple.private" "primitive_tuple_boa" (( ... layout -- tuple )) }
{ "<tuple>" "classes.tuple.private" "primitive_tuple" (( layout -- tuple )) }
{ "modify-code-heap" "compiler.units" "primitive_modify_code_heap" (( alist -- )) }
{ "modify-code-heap" "compiler.units" "primitive_modify_code_heap" (( alist update-existing? reset-pics? -- )) }
{ "lookup-method" "generic.single.private" "primitive_lookup_method" (( object methods -- method )) }
{ "mega-cache-miss" "generic.single.private" "primitive_mega_cache_miss" (( methods index cache -- method )) }
{ "(exists?)" "io.files.private" "primitive_existsp" (( path -- ? )) }
@ -506,6 +506,7 @@ tuple
{ "float>bignum" "math.private" "primitive_float_to_bignum" (( x -- y )) }
{ "float>fixnum" "math.private" "primitive_float_to_fixnum" (( x -- y )) }
{ "all-instances" "memory" "primitive_all_instances" (( -- array )) }
{ "(code-blocks)" "memory.private" "primitive_code_blocks" (( -- array )) }
{ "code-room" "memory" "primitive_code_room" (( -- code-room )) }
{ "compact-gc" "memory" "primitive_compact_gc" (( -- )) }
{ "data-room" "memory" "primitive_data_room" (( -- data-room )) }

View File

@ -101,9 +101,9 @@ ERROR: bad-slot-name class slot ;
over [ slot-named* ] dip check-slot-exists drop ;
: assoc>object ( class slots values -- tuple )
[ [ [ initial>> ] map ] keep ] dip
[ [ [ initial>> ] map <enum> ] keep ] dip
swap [ [ slot-named-checked ] curry dip ] curry assoc-map
[ dup <enum> ] dip update boa>object ;
assoc-union! seq>> boa>object ;
: parse-tuple-literal-slots ( class slots -- tuple )
scan {

View File

@ -171,13 +171,13 @@ $nl
}
"The " { $vocab-link "delegate" } " library provides a language abstraction for expressing has-a relationships."
{ $heading "Anti-pattern #2: subclassing for implementation sharing only" }
"Tuple subclassing purely for sharing implementations of methods is not a good idea either. If a class " { $snippet "A" } " is a subclass of a class " { $snippet "B" } ", then instances of " { $snippet "A" } " should be usable anywhere that an instance of " { $snippet "B" } " is. If this properly does not hold, then subclassing should not be used."
"Tuple subclassing purely for sharing implementations of methods is not a good idea either. If a class " { $snippet "A" } " is a subclass of a class " { $snippet "B" } ", then instances of " { $snippet "A" } " should be usable anywhere that an instance of " { $snippet "B" } " is. If this property does not hold, then subclassing should not be used."
$nl
"There are two alternatives which are preferred to subclassing in this case. The first is " { $link "mixins" } "."
$nl
"The second is to use ad-hoc slot polymorphism. If two classes define a slot with the same name, then code which uses " { $link "accessors" } " can operate on instances of both objects, assuming the values stored in that slot implement a common protocol. This allows code to be shared without creating contrieved relationships between classes."
{ $heading "Anti-pattern #3: subclassing to override a method definition" }
"While method overriding is a very powerful tool, improper use can cause tight coupling of code and lead to difficulty in testing and refactoring. Subclassing should not be used as a means of “monkey patching” methods to fix bugs and add features. Only subclass from classes which were designed to be inherited from, and when writing classes of your own which are intended to be subclassed, clearly document that subclasses may and may not do. This includes construction policy; document whether subclasses should use " { $link new } ", " { $link boa } ", or a custom parametrized constructor."
"While method overriding is a very powerful tool, improper use can cause tight coupling of code and lead to difficulty in testing and refactoring. Subclassing should not be used as a means of “monkey patching” methods to fix bugs and add features. Only subclass from classes which were designed to be inherited from, and when writing classes of your own which are intended to be subclassed, clearly document what subclasses may and may not do. This includes construction policy; document whether subclasses should use " { $link new } ", " { $link boa } ", or a custom parametrized constructor."
{ $see-also "parametrized-constructors" } ;
ARTICLE: "tuple-subclassing" "Tuple subclassing"
@ -425,7 +425,7 @@ HELP: <tuple-boa> ( ... layout -- tuple )
HELP: new
{ $values { "class" tuple-class } { "tuple" tuple } }
{ $description "Creates a new instance of " { $snippet "class" } " with all slots set to their initial values (see" { $link "tuple-declarations" } ")." }
{ $description "Creates a new instance of " { $snippet "class" } " with all slots set to their initial values (see " { $link "tuple-declarations" } ")." }
{ $examples
{ $example
"USING: kernel prettyprint ;"

View File

@ -706,14 +706,6 @@ ERROR: derived-error < base-error z ;
[ (( x y z -- * )) ] [ \ derived-error stack-effect ] unit-test
USE: classes.struct
[ { } ] [
classes
[ "prototype" word-prop ] map
[ '[ _ hashcode drop f ] [ drop t ] recover ] filter
] unit-test
! Make sure that tuple reshaping updates code heap roots
TUPLE: code-heap-ref ;

View File

@ -250,6 +250,13 @@ GENERIC# (define-tuple-class) 2 ( class superclass slots -- )
: thrower-effect ( slots -- effect )
[ name>> ] map { "*" } <effect> ;
: error-slots ( slots -- slots' )
[
dup string? [ 1array ] when
read-only swap remove
read-only suffix
] map ;
PRIVATE>
: define-tuple-class ( class superclass slots -- )
@ -265,6 +272,7 @@ M: tuple-class (define-tuple-class)
[ 2drop ?define-symbol ] [ redefine-tuple-class ] if ;
: define-error-class ( class superclass slots -- )
error-slots
[ define-tuple-class ]
[ 2drop reset-generic ]
[

View File

@ -58,12 +58,8 @@ $nl
"A generalization of the above combinators to any number of quotations can be found in " { $link "combinators" } "." ;
ARTICLE: "apply-combinators" "Apply combinators"
"The apply combinators apply a single quotation to multiple values. The asterisk (" { $snippet "@" } ") suffixed to these words' names signifies that they are apply combinators."
$nl
"Two quotations:"
{ $subsections bi@ 2bi@ }
"Three quotations:"
{ $subsections tri@ 2tri@ }
"The apply combinators apply a single quotation to multiple values. The at sign (" { $snippet "@" } ") suffixed to these words' names signifies that they are apply combinators."
{ $subsections bi@ 2bi@ tri@ 2tri@ }
"A pair of condition words built from " { $link bi@ } " to test two values:"
{ $subsections both? either? }
"All of the apply combinators are equivalent to using the corresponding " { $link "spread-combinators" } " with the same quotation supplied for every value." ;

View File

@ -1,4 +1,4 @@
! Copyright (C) 2006, 2009 Slava Pestov, Daniel Ehrenberg.
! Copyright (C) 2006, 2010 Slava Pestov, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors arrays sequences sequences.private math.private
kernel kernel.private math assocs quotations vectors
@ -17,16 +17,22 @@ M: object throw
PRIVATE>
ERROR: wrong-values effect ;
ERROR: wrong-values quot effect ;
! We can't USE: effects here so we forward reference slots instead
SLOT: in
SLOT: out
: call-effect ( quot effect -- )
[ [ datastack ] dip dip ] dip
[ in>> length ] [ out>> length ] [ ] tri [ check-datastack ] dip
[ wrong-values ] curry unless ;
! Don't use fancy combinators here, since this word always
! runs unoptimized
[ datastack ] 2dip
2dup [
[ dip ] dip
dup in>> length swap out>> length
check-datastack
] 2dip rot
[ 2drop ] [ wrong-values ] if ;
: execute-effect ( word effect -- )
[ [ execute ] curry ] dip call-effect ;

View File

@ -1,14 +1,10 @@
USING: help.markup help.syntax words math source-files
parser quotations definitions ;
parser quotations definitions stack-checker.errors ;
IN: compiler.units
ARTICLE: "compilation-units" "Compilation units"
"A " { $emphasis "compilation unit" } " scopes a group of related definitions. They are compiled and entered into the system in one atomic operation."
ARTICLE: "compilation-units-internals" "Compilation units internals"
"These words do not need to be called directly, and only serve to support the implementation."
$nl
"Words defined in a compilation unit may not be called until the compilation unit is finished. The parser detects this case for parsing words and throws a " { $link staging-violation } "; calling any other word from within its own compilation unit throws an " { $link undefined } " error."
$nl
"The parser groups all definitions in a source file into one compilation unit, and parsing words do not need to concern themselves with compilation units. However, if definitions are being created at run time, a compilation unit must be created explicitly:"
{ $subsections with-compilation-unit }
"Compiling a set of words:"
{ $subsections compile }
"Words called to associate a definition with a compilation unit and a source file location:"
@ -23,6 +19,25 @@ $nl
"Low-level compiler interface exported by the Factor VM:"
{ $subsections modify-code-heap } ;
ARTICLE: "compilation-units" "Compilation units"
"A " { $emphasis "compilation unit" } " scopes a group of related definitions. They are compiled and entered into the system in one atomic operation."
$nl
"When a source file is being parsed, all definitions are part of a single compilation unit, unless the " { $link POSTPONE: << } " parsing word is used to create nested compilation units."
$nl
"Words defined in a compilation unit may not be called until the compilation unit is finished. The parser detects this case for parsing words and throws a " { $link staging-violation } ". Similarly, an attempt to use a macro from a word defined in the same compilation unit will throw a " { $link transform-expansion-error } ". Calling any other word from within its own compilation unit throws an " { $link undefined } " error."
$nl
"This means that parsing words and macros generally cannot be used in the same source file as they are defined. There are two means of getting around this:"
{ $list
{ "The simplest way is to split off the parsing words and macros into sub-vocabularies; perhaps suffixed by " { $snippet ".syntax" } " and " { $snippet ".macros" } "." }
{ "Alternatively, nested compilation units can be created using " { $link "syntax-immediate" } "." }
}
"Parsing words which create new definitions at parse time will implicitly add them to the compilation unit of the current source file."
$nl
"Code which creates new definitions at run time will need to explicitly create a compilation unit with a combinator. There is an additional combinator used by the parser to implement " { $link "syntax-immediate" } "."
{ $subsections with-compilation-unit with-nested-compilation-unit }
"Additional topics:"
{ $subsections "compilation-units-internals" } ;
ABOUT: "compilation-units"
HELP: redefine-error
@ -43,12 +58,17 @@ HELP: new-definitions
HELP: with-compilation-unit
{ $values { "quot" quotation } }
{ $description "Calls a quotation in a new compilation unit. The quotation can define new words and classes, as well as forget words. When the quotation returns, any changed words are recompiled, and changes are applied atomically." }
{ $notes "Compilation units may be nested."
{ $notes "Calls to " { $link with-compilation-unit } " may be nested."
$nl
"The parser wraps every source file in a compilation unit, so parsing words may define new words without having to perform extra work; to define new words at any other time, you must wrap your defining code with this combinator."
$nl
"Since compilation is relatively expensive, you should try to batch up as many definitions into one compilation unit as possible." } ;
HELP: with-nested-compilation-unit
{ $values { "quot" quotation } }
{ $description "Calls a quotation in a new compilation unit. The only difference between this word and " { $link with-compilation-unit } " is that variables used by the parser to associate definitions with source files are not rebound." }
{ $notes "This word is used by " { $link "syntax-immediate" } " to ensure that definitions in nested blocks are correctly recorded. User code should not depend on parser internals in such a way that calling this combinator is required." } ;
HELP: recompile
{ $values { "words" "a sequence of words" } { "alist" "an association list mapping words to compiled definitions" } }
{ $contract "Internal word which compiles words. Called at the end of " { $link with-compilation-unit } "." } ;
@ -58,13 +78,19 @@ HELP: no-compilation-unit
{ $description "Throws a " { $link no-compilation-unit } " error." }
{ $error-description "Thrown when an attempt is made to define a word outside of a " { $link with-compilation-unit } " combinator." } ;
HELP: modify-code-heap ( alist -- )
{ $values { "alist" "an alist" } }
{ $description "Stores compiled code definitions in the code heap. The alist maps words to the following:"
HELP: modify-code-heap ( alist update-existing? reset-pics? -- )
{ $values { "alist" "an association list with words as keys" } { "update-existing?" "a boolean" } { "reset-pics?" "a boolean" } }
{ $description "Lowest-level primitive for defining words. Associates words with code blocks in the code heap."
$nl
"The alist maps words to the following:"
{ $list
{ "a quotation - in this case, the quotation is compiled with the non-optimizing compiler and the word will call the quotation when executed." }
{ { $snippet "{ code labels rel words literals }" } " - in this case, a code heap block is allocated with the given data and the word will call the code block when executed." }
} }
{ "a 5-element array " { $snippet "{ parameters literals relocation labels code }" } " - in this case, a code heap block is allocated with the given data and the word will call the code block when executed. This is used by the optimizing compiler." }
}
"If any of the redefined words may already be referenced by other words in the code heap, from outside of the compilation unit, then a scan of the code heap must be performed to update all word call sites. Passing " { $link t } " as the " { $snippet "update-existing?" } " parameter enables this code path."
$nl
"If classes, methods or generic words were redefined, then inline cache call sites need to be updated as well. Passing " { $link t } " as the " { $snippet "reset-pics?" } " parameter enables this code path."
}
{ $notes "This word is called at the end of " { $link with-compilation-unit } "." } ;
HELP: compile

View File

@ -7,7 +7,7 @@ IN: compiler.units.tests
! Non-optimizing compiler bugs
[ 1 1 ] [
"A" <uninterned-word> [ [ [ 1 ] dip ] 2array 1array modify-code-heap ] keep
"A" <uninterned-word> [ [ [ 1 ] dip ] 2array 1array t t modify-code-heap ] keep
1 swap execute
] unit-test

View File

@ -62,7 +62,8 @@ HOOK: to-recompile compiler-impl ( -- words )
HOOK: process-forgotten-words compiler-impl ( words -- )
: compile ( words -- ) recompile modify-code-heap ;
: compile ( words -- )
recompile t f modify-code-heap ;
! Non-optimizing compiler
M: f update-call-sites
@ -119,12 +120,12 @@ M: object always-bump-effect-counter? drop f ;
: updated-definitions ( -- assoc )
H{ } clone
dup forgotten-definitions get update
dup new-definitions get first update
dup new-definitions get second update
dup changed-definitions get update
dup maybe-changed get update
dup dup changed-vocabs update ;
forgotten-definitions get assoc-union!
new-definitions get first assoc-union!
new-definitions get second assoc-union!
changed-definitions get assoc-union!
maybe-changed get assoc-union!
dup changed-vocabs assoc-union! ;
: process-forgotten-definitions ( -- )
forgotten-definitions get keys
@ -149,17 +150,35 @@ M: object always-bump-effect-counter? drop f ;
updated-definitions dup assoc-empty?
[ drop ] [ notify-definition-observers notify-error-observers ] if ;
: update-existing? ( defs -- ? )
new-words get keys diff empty? not ;
: reset-pics? ( -- ? )
outdated-generics get assoc-empty? not ;
: finish-compilation-unit ( -- )
[ ] [
remake-generics
to-recompile recompile
update-tuples
process-forgotten-definitions
modify-code-heap
to-recompile [
recompile
update-tuples
process-forgotten-definitions
] keep update-existing? reset-pics? modify-code-heap
bump-effect-counter
notify-observers
] if-bootstrapping ;
TUPLE: nesting-observer new-words ;
M: nesting-observer definitions-changed new-words>> swap assoc-diff! drop ;
: add-nesting-observer ( -- )
new-words get nesting-observer boa
[ nesting-observer set ] [ add-definition-observer ] bi ;
: remove-nesting-observer ( -- )
nesting-observer get remove-definition-observer ;
PRIVATE>
: with-nested-compilation-unit ( quot -- )
@ -170,19 +189,17 @@ PRIVATE>
H{ } clone outdated-generics set
H{ } clone outdated-tuples set
H{ } clone new-words set
[ finish-compilation-unit ] [ ] cleanup
add-nesting-observer
[
remove-nesting-observer
finish-compilation-unit
] [ ] cleanup
] with-scope ; inline
: with-compilation-unit ( quot -- )
[
H{ } clone changed-definitions set
H{ } clone maybe-changed set
H{ } clone changed-effects set
H{ } clone outdated-generics set
H{ } clone forgotten-definitions set
H{ } clone outdated-tuples set
H{ } clone new-words set
<definitions> new-definitions set
<definitions> old-definitions set
[ finish-compilation-unit ] [ ] cleanup
H{ } clone forgotten-definitions set
with-nested-compilation-unit
] with-scope ; inline

View File

@ -26,9 +26,9 @@ $nl
{ $code
"USING: io sequences ;"
"IN: a"
": hello \"Hello\" ;"
": world \"world\" ;"
": hello-world hello " " world 3append print ;"
": hello ( -- str ) \"Hello\" ;"
": world ( -- str ) \"world\" ;"
": hello-world ( -- ) hello \" \" world 3append print ;"
}
"The definitions for " { $snippet "hello" } ", " { $snippet "world" } ", and " { $snippet "hello-world" } " are in the dictionary."
$nl
@ -36,9 +36,9 @@ $nl
{ $code
"USING: namespaces ;"
"IN: a"
": hello \"Hello\" % ;"
": hello-world [ hello " " % world ] \"\" make ;"
": world \"world\" % ;"
": hello ( -- ) \"Hello\" % ;"
": hello-world ( -- str ) [ hello \" \" % world ] \"\" make ;"
": world ( -- ) \"world\" % ;"
}
"Note that the developer has made a mistake, placing the definition of " { $snippet "world" } " " { $emphasis "after" } " its usage in " { $snippet "hello-world" } "."
$nl

View File

@ -11,12 +11,12 @@ $nl
{ $code
"GENERIC: explain ( object -- )"
"M: object explain drop \"an object\" print ;"
"M: generic explain drop \"a class word\" print ;"
"M: class explain drop \"a generic word\" print ;"
"M: generic explain drop \"a generic word\" print ;"
"M: class explain drop \"a class word\" print ;"
}
"The linear order is the following, from least-specific to most-specific:"
{ $code "{ object generic class }" }
"Neither " { $link class } " nor " { $link generic } " are subclasses of each other, and their intersection is non-empty. Calling " { $snippet "explain" } " with a word on the stack that is both a class and a generic word will print " { $snippet "a generic word" } " because " { $link class } " precedes " { $link generic } " in the class linearization order. (One example of a word which is both a class and a generic word is the class of classes, " { $link class } ", which is also a word to get the class of an object.)"
"Neither " { $link class } " nor " { $link generic } " are subclasses of each other, and their intersection is non-empty. Calling " { $snippet "explain" } " with a word on the stack that is both a class and a generic word will print " { $snippet "a class word" } " because " { $link class } " is more specific than " { $link generic } " in the class linearization order. (One example of a word which is both a class and a generic word is the class of classes, " { $link class } ", which is also a word to get the class of an object.)"
$nl
"The " { $link order } " word can be useful to clarify method dispatch order:"
{ $subsections order } ;
@ -24,7 +24,7 @@ $nl
ARTICLE: "generic-introspection" "Generic word introspection"
"In most cases, generic words and methods are defined at parse time with " { $link POSTPONE: GENERIC: } " (or some other parsing word) and " { $link POSTPONE: M: } "."
$nl
"Sometimes, generic words need to be inspected defined at run time; words for performing these tasks are found in the " { $vocab-link "generic" } " vocabulary."
"Sometimes, generic words need to be inspected or defined at run time; words for performing these tasks are found in the " { $vocab-link "generic" } " vocabulary."
$nl
"The set of generic words is a class which implements the " { $link "definition-protocol" } ":"
{ $subsections
@ -86,7 +86,7 @@ $nl
} ;
ARTICLE: "generic" "Generic words and methods"
"A " { $emphasis "generic word" } " is composed of zero or more " { $emphasis "methods" } " together with a " { $emphasis "method combination" } ". A method " { $emphasis "specializes" } " on a class; when a generic word executed, the method combination chooses the most appropriate method and calls its definition."
"A " { $emphasis "generic word" } " is composed of zero or more " { $emphasis "methods" } " together with a " { $emphasis "method combination" } ". A method " { $emphasis "specializes" } " on a class; when a generic word is executed, the method combination chooses the most appropriate method and calls its definition."
$nl
"A generic word behaves roughly like a long series of class predicate conditionals in a " { $link cond } " form, however methods can be defined in independent source files, reducing coupling and increasing extensibility. The method combination determines which object the generic word will " { $emphasis "dispatch" } " on; this could be the top of the stack, or some other value."
$nl
@ -131,12 +131,10 @@ HELP: M\
{ $class-description "Pushes a method on the stack." }
{ $examples { $code "M\\ fixnum + see" } { $code "USING: ui.gadgets ui.gadgets.editors ;" "M\\ editor draw-gadget* edit" } } ;
HELP: method-body
{ $class-description "The class of method bodies, which are words with special word properties set." } ;
HELP: method
{ $values { "class" class } { "generic" generic } { "method/f" { $maybe method-body } } }
{ $description "Looks up a method definition." } ;
{ $values { "class" class } { "generic" generic } { "method/f" { $maybe method } } }
{ $description "Looks up a method definition." }
{ $class-description "The class of method bodies, which are words with special word properties set." } ;
{ method create-method POSTPONE: M: } related-words
@ -159,14 +157,14 @@ HELP: with-methods
$low-level-note ;
HELP: create-method
{ $values { "class" class } { "generic" generic } { "method" method-body } }
{ $values { "class" class } { "generic" generic } { "method" method } }
{ $description "Creates a method or returns an existing one. This is the runtime equivalent of " { $link POSTPONE: M: } "." }
{ $notes "To define a method, pass the output value to " { $link define } "." } ;
{ sort-classes order } related-words
HELP: (call-next-method)
{ $values { "method" method-body } }
{ $values { "method" method } }
{ $description "Low-level word implementing " { $link POSTPONE: call-next-method } "." }
{ $notes "In most cases, " { $link POSTPONE: call-next-method } " should be used instead." } ;

View File

@ -207,7 +207,7 @@ M: integer forget-test 3 + ;
[ ] [ "IN: generic.tests USE: math FORGET: M\\ integer forget-test" eval( -- ) ] unit-test
[ { } ] [
\ + effect-dependencies-of keys [ method-body? ] filter
\ + effect-dependencies-of keys [ method? ] filter
[ "method-generic" word-prop \ forget-test eq? ] filter
] unit-test

Some files were not shown because too many files have changed in this diff Show More