Merge branch 'master' of git://factorcode.org/git/factor into clean-linux-x86-32

db4
Maxim Savchenko 2009-03-23 03:22:13 -04:00
commit 2c19aa1f25
130 changed files with 14201 additions and 618 deletions

View File

@ -26,4 +26,4 @@ M: F-destructor dispose* alien>> F ;
;FUNCTOR
: DESTRUCTOR: scan-word define-destructor ; parsing
SYNTAX: DESTRUCTOR: scan-word define-destructor ;

View File

@ -416,7 +416,7 @@ PRIVATE>
: define-fortran-record ( name vocab fields -- )
[ >lower ] [ ] [ fortran-record>c-struct ] tri* define-struct ;
: RECORD: scan in get parse-definition define-fortran-record ; parsing
SYNTAX: RECORD: scan in get parse-definition define-fortran-record ;
: set-fortran-abi ( library -- )
library-fortran-abis get-global at fortran-abi set ;
@ -437,16 +437,16 @@ MACRO: fortran-invoke ( return library function parameters -- )
return library function parameters return [ "void" ] unless* parse-arglist
[ \ fortran-invoke 5 [ ] nsequence ] dip define-declared ;
: SUBROUTINE:
SYNTAX: SUBROUTINE:
f "c-library" get scan ";" parse-tokens
[ "()" subseq? not ] filter define-fortran-function ; parsing
[ "()" subseq? not ] filter define-fortran-function ;
: FUNCTION:
SYNTAX: FUNCTION:
scan "c-library" get scan ";" parse-tokens
[ "()" subseq? not ] filter define-fortran-function ; parsing
[ "()" subseq? not ] filter define-fortran-function ;
: LIBRARY:
SYNTAX: LIBRARY:
scan
[ "c-library" set ]
[ set-fortran-abi ] bi ; parsing
[ set-fortran-abi ] bi ;

View File

@ -7,35 +7,34 @@ effects assocs combinators lexer strings.parser alien.parser
fry vocabs.parser words.constant ;
IN: alien.syntax
: DLL" lexer get skip-blank parse-string dlopen parsed ; parsing
SYNTAX: DLL" lexer get skip-blank parse-string dlopen parsed ;
: ALIEN: scan string>number <alien> parsed ; parsing
SYNTAX: ALIEN: scan string>number <alien> parsed ;
: BAD-ALIEN <bad-alien> parsed ; parsing
SYNTAX: BAD-ALIEN <bad-alien> parsed ;
: LIBRARY: scan "c-library" set ; parsing
SYNTAX: LIBRARY: scan "c-library" set ;
: FUNCTION:
SYNTAX: FUNCTION:
scan "c-library" get scan ";" parse-tokens
[ "()" subseq? not ] filter
define-function ; parsing
define-function ;
: TYPEDEF:
scan scan typedef ; parsing
SYNTAX: TYPEDEF:
scan scan typedef ;
: C-STRUCT:
scan in get parse-definition define-struct ; parsing
SYNTAX: C-STRUCT:
scan in get parse-definition define-struct ;
: C-UNION:
scan parse-definition define-union ; parsing
SYNTAX: C-UNION:
scan parse-definition define-union ;
: C-ENUM:
SYNTAX: C-ENUM:
";" parse-tokens
[ [ create-in ] dip define-constant ] each-index ;
parsing
: address-of ( name library -- value )
load-library dlsym [ "No such symbol" throw ] unless* ;
: &:
scan "c-library" get '[ _ _ address-of ] over push-all ; parsing
SYNTAX: &:
scan "c-library" get '[ _ _ address-of ] over push-all ;

View File

@ -68,7 +68,7 @@ M: bit-array resize
M: bit-array byte-length length 7 + -3 shift ;
: ?{ \ } [ >bit-array ] parse-literal ; parsing
SYNTAX: ?{ \ } [ >bit-array ] parse-literal ;
: integer>bit-array ( n -- bit-array )
dup 0 = [

View File

@ -31,7 +31,7 @@ M: bit-array new-resizable drop <bit-vector> ;
INSTANCE: bit-vector growable
: ?V{ \ } [ >bit-vector ] parse-literal ; parsing
SYNTAX: ?V{ \ } [ >bit-vector ] parse-literal ;
M: bit-vector >pprint-sequence ;
M: bit-vector pprint-delims drop \ ?V{ \ } ;

View File

@ -42,7 +42,7 @@ M: byte-array like
M: byte-array new-resizable drop <byte-vector> ;
: BV{ \ } [ >byte-vector ] parse-literal ; parsing
SYNTAX: BV{ \ } [ >byte-vector ] parse-literal ;
M: byte-vector pprint* pprint-object ;
M: byte-vector pprint-delims drop \ BV{ \ } ;

View File

@ -14,18 +14,14 @@ SYMBOL: sent-messages
: remember-send ( selector -- )
sent-messages (remember-send) ;
: ->
scan dup remember-send parsed \ send parsed ;
parsing
SYNTAX: -> scan dup remember-send parsed \ send parsed ;
SYMBOL: super-sent-messages
: remember-super-send ( selector -- )
super-sent-messages (remember-send) ;
: SUPER->
scan dup remember-super-send parsed \ super-send parsed ;
parsing
SYNTAX: SUPER-> scan dup remember-super-send parsed \ super-send parsed ;
SYMBOL: frameworks
@ -33,9 +29,9 @@ frameworks [ V{ } clone ] initialize
[ frameworks get [ load-framework ] each ] "cocoa.messages" add-init-hook
: FRAMEWORK: scan [ load-framework ] [ frameworks get push ] bi ; parsing
SYNTAX: FRAMEWORK: scan [ load-framework ] [ frameworks get push ] bi ;
: IMPORT: scan [ ] import-objc-class ; parsing
SYNTAX: IMPORT: scan [ ] import-objc-class ;
"Compiling Objective C bridge..." print

View File

@ -76,6 +76,6 @@ SYMBOL: +superclass+
import-objc-class
] bind ;
: CLASS:
SYNTAX: CLASS:
parse-definition unclip
>hashtable define-objc-class ; parsing
>hashtable define-objc-class ;

View File

@ -30,4 +30,4 @@ ERROR: no-such-color name ;
: named-color ( name -- color )
dup rgb.txt at [ ] [ no-such-color ] ?if ;
: COLOR: scan named-color parsed ; parsing
SYNTAX: COLOR: scan named-color parsed ;

View File

@ -13,10 +13,10 @@ IN: compiler.cfg.instructions.syntax
: insn-effect ( word -- effect )
boa-effect in>> but-last f <effect> ;
: INSN:
SYNTAX: INSN:
parse-tuple-definition "regs" suffix
[ dup tuple eq? [ drop insn-word ] when ] dip
[ define-tuple-class ]
[ 2drop save-location ]
[ 2drop [ ] [ '[ f _ boa , ] ] [ insn-effect ] tri define-inline ]
3tri ; parsing
3tri ;

View File

@ -17,6 +17,6 @@ C: <ds-loc> ds-loc
TUPLE: rs-loc < loc ;
C: <rs-loc> rs-loc
: V scan-word scan-word vreg boa parsed ; parsing
: D scan-word <ds-loc> parsed ; parsing
: R scan-word <rs-loc> parsed ; parsing
SYNTAX: V scan-word scan-word vreg boa parsed ;
SYNTAX: D scan-word <ds-loc> parsed ;
SYNTAX: R scan-word <rs-loc> parsed ;

View File

@ -16,8 +16,8 @@ MACRO: set-slots ( slots -- quot )
[ [ in>> '[ _ _ construct ] ] dip compose ] [ drop ] 2bi
define-declared ;
: CONSTRUCTOR:
SYNTAX: CONSTRUCTOR:
scan-word [ name>> "<" ">" surround create-in ] keep
"(" expect ")" parse-effect
parse-definition
define-constructor ; parsing
define-constructor ;

View File

@ -3,7 +3,7 @@
USING: words parser alien alien.c-types kernel fry accessors ;
IN: core-text.utilities
: C-GLOBAL:
SYNTAX: C-GLOBAL:
CREATE-WORD
dup name>> '[ _ f dlsym *void* ]
(( -- value )) define-declared ; parsing
(( -- value )) define-declared ;

View File

@ -21,7 +21,7 @@ IN: cpu.ppc.assembler.backend
: define-d-insn ( word opcode -- )
[ d-insn ] curry (( d a simm -- )) define-declared ;
: D: CREATE scan-word define-d-insn ; parsing
SYNTAX: D: CREATE scan-word define-d-insn ;
: sd-insn ( d a simm opcode -- )
[ s>u16 { 0 21 16 } bitfield ] dip insn ;
@ -29,7 +29,7 @@ IN: cpu.ppc.assembler.backend
: define-sd-insn ( word opcode -- )
[ sd-insn ] curry (( d a simm -- )) define-declared ;
: SD: CREATE scan-word define-sd-insn ; parsing
SYNTAX: SD: CREATE scan-word define-sd-insn ;
: i-insn ( li aa lk opcode -- )
[ { 0 1 0 } bitfield ] dip insn ;
@ -40,26 +40,26 @@ IN: cpu.ppc.assembler.backend
: (X) ( -- word quot )
CREATE scan-word scan-word scan-word [ x-insn ] 3curry ;
: X: (X) (( a s b -- )) define-declared ; parsing
SYNTAX: X: (X) (( a s b -- )) define-declared ;
: (1) ( quot -- quot' ) [ 0 ] prepose ;
: X1: (X) (1) (( a s -- )) define-declared ; parsing
SYNTAX: X1: (X) (1) (( a s -- )) define-declared ;
: xfx-insn ( d spr xo opcode -- )
[ { 1 11 21 } bitfield ] dip insn ;
: CREATE-MF ( -- word ) scan "MF" prepend create-in ;
: MFSPR:
SYNTAX: MFSPR:
CREATE-MF scan-word 5 shift [ 339 31 xfx-insn ] curry
(( d -- )) define-declared ; parsing
(( d -- )) define-declared ;
: CREATE-MT ( -- word ) scan "MT" prepend create-in ;
: MTSPR:
SYNTAX: MTSPR:
CREATE-MT scan-word 5 shift [ 467 31 xfx-insn ] curry
(( d -- )) define-declared ; parsing
(( d -- )) define-declared ;
: xo-insn ( d a b oe rc xo opcode -- )
[ { 1 0 10 11 16 21 } bitfield ] dip insn ;
@ -68,9 +68,9 @@ IN: cpu.ppc.assembler.backend
CREATE scan-word scan-word scan-word scan-word
[ xo-insn ] 2curry 2curry ;
: XO: (XO) (( a s b -- )) define-declared ; parsing
SYNTAX: XO: (XO) (( a s b -- )) define-declared ;
: XO1: (XO) (1) (( a s -- )) define-declared ; parsing
SYNTAX: XO1: (XO) (1) (( a s -- )) define-declared ;
GENERIC# (B) 2 ( dest aa lk -- )
M: integer (B) 18 i-insn ;
@ -84,11 +84,11 @@ M: label BC [ 0 BC ] dip rc-relative-ppc-2 label-fixup ;
: CREATE-B ( -- word ) scan "B" prepend create-in ;
: BC:
SYNTAX: BC:
CREATE-B scan-word scan-word
[ rot BC ] 2curry (( c -- )) define-declared ; parsing
[ rot BC ] 2curry (( c -- )) define-declared ;
: B:
SYNTAX: B:
CREATE-B scan-word scan-word scan-word scan-word scan-word
[ b-insn ] curry curry curry curry curry
(( bo -- )) define-declared ; parsing
(( bo -- )) define-declared ;

View File

@ -11,5 +11,4 @@ IN: cpu.x86.assembler.syntax
: define-registers ( names size -- )
'[ _ define-register ] each-index ;
: REGISTERS: ( -- )
scan-word ";" parse-tokens swap define-registers ; parsing
SYNTAX: REGISTERS: scan-word ";" parse-tokens swap define-registers ;

View File

@ -14,10 +14,10 @@ GENERIC: definition-icon ( definition -- path )
<<
: ICON:
SYNTAX: ICON:
scan-word \ definition-icon create-method
scan '[ drop _ definition-icon-path ]
define ; parsing
define ;
>>

View File

@ -85,9 +85,9 @@ PRIVATE>
: define-consult ( consultation -- )
[ register-consult ] [ consult-methods ] bi ;
: CONSULT:
SYNTAX: CONSULT:
scan-word scan-word parse-definition <consultation>
[ save-location ] [ define-consult ] bi ; parsing
[ save-location ] [ define-consult ] bi ;
M: consultation where loc>> ;
@ -144,8 +144,8 @@ PRIVATE>
[ initialize-protocol-props ] 2tri
] 2bi ;
: PROTOCOL:
CREATE-WORD parse-definition define-protocol ; parsing
SYNTAX: PROTOCOL:
CREATE-WORD parse-definition define-protocol ;
PREDICATE: protocol < word protocol-words ; ! Subclass of symbol?
@ -159,7 +159,7 @@ M: protocol definer drop \ PROTOCOL: \ ; ;
M: protocol group-words protocol-words ;
: SLOT-PROTOCOL:
SYNTAX: SLOT-PROTOCOL:
CREATE-WORD ";" parse-tokens
[ [ reader-word ] [ writer-word ] bi 2array ] map concat
define-protocol ; parsing
define-protocol ;

View File

@ -53,4 +53,4 @@ M: callable deep-fry
M: object deep-fry , ;
: '[ parse-quotation fry over push-all ; parsing
SYNTAX: '[ parse-quotation fry over push-all ;

View File

@ -16,6 +16,8 @@ IN: functors
: define* ( word def effect -- ) pick set-word define-declared ;
: define-syntax* ( word def -- ) over set-word define-syntax ;
TUPLE: fake-quotation seq ;
GENERIC: >fake-quotations ( quot -- fake )
@ -41,7 +43,7 @@ M: object fake-quotations> ;
: DEFINE* ( accum -- accum ) effect get parsed \ define* parsed ;
: `TUPLE:
SYNTAX: `TUPLE:
scan-param parsed
scan {
{ ";" [ tuple parsed f parsed ] }
@ -52,40 +54,44 @@ M: object fake-quotations> ;
make parsed
]
} case
\ define-tuple-class parsed ; parsing
\ define-tuple-class parsed ;
: `M:
SYNTAX: `M:
effect off
scan-param parsed
scan-param parsed
\ create-method-in parsed
parse-definition*
DEFINE* ; parsing
DEFINE* ;
: `C:
SYNTAX: `C:
effect off
scan-param parsed
scan-param parsed
[ [ boa ] curry ] over push-all
DEFINE* ; parsing
DEFINE* ;
: `:
SYNTAX: `:
effect off
scan-param parsed
parse-definition*
DEFINE* ; parsing
DEFINE* ;
: `INSTANCE:
SYNTAX: `SYNTAX:
effect off
scan-param parsed
parse-definition*
\ define-syntax* parsed ;
SYNTAX: `INSTANCE:
scan-param parsed
scan-param parsed
\ add-mixin-instance parsed ; parsing
\ add-mixin-instance parsed ;
: `inline [ word make-inline ] over push-all ; parsing
SYNTAX: `inline [ word make-inline ] over push-all ;
: `parsing [ word make-parsing ] over push-all ; parsing
: `(
")" parse-effect effect set ; parsing
SYNTAX: `(
")" parse-effect effect set ;
: (INTERPOLATE) ( accum quot -- accum )
[ scan interpolate-locals ] dip
@ -93,11 +99,11 @@ M: object fake-quotations> ;
PRIVATE>
: IS [ dup search [ ] [ no-word ] ?if ] (INTERPOLATE) ; parsing
SYNTAX: IS [ dup search [ ] [ no-word ] ?if ] (INTERPOLATE) ;
: DEFINES [ create-in ] (INTERPOLATE) ; parsing
SYNTAX: DEFINES [ create-in ] (INTERPOLATE) ;
: DEFINES-CLASS [ create-class-in ] (INTERPOLATE) ; parsing
SYNTAX: DEFINES-CLASS [ create-class-in ] (INTERPOLATE) ;
DEFER: ;FUNCTOR delimiter
@ -110,8 +116,8 @@ DEFER: ;FUNCTOR delimiter
{ "C:" POSTPONE: `C: }
{ ":" POSTPONE: `: }
{ "INSTANCE:" POSTPONE: `INSTANCE: }
{ "SYNTAX:" POSTPONE: `SYNTAX: }
{ "inline" POSTPONE: `inline }
{ "parsing" POSTPONE: `parsing }
{ "(" POSTPONE: `( }
} ;
@ -132,4 +138,4 @@ DEFER: ;FUNCTOR delimiter
PRIVATE>
: FUNCTOR: (FUNCTOR:) define ; parsing
SYNTAX: FUNCTOR: (FUNCTOR:) define ;

View File

@ -5,19 +5,19 @@ help.topics namespaces vocabs definitions compiler.units
vocabs.parser ;
IN: help.syntax
: HELP:
SYNTAX: HELP:
scan-word bootstrap-word
dup set-word
dup >link save-location
\ ; parse-until >array swap set-word-help ; parsing
\ ; parse-until >array swap set-word-help ;
: ARTICLE:
SYNTAX: ARTICLE:
location [
\ ; parse-until >array [ first2 ] keep 2 tail <article>
over add-article >link
] dip remember-definition ; parsing
] dip remember-definition ;
: ABOUT:
SYNTAX: ABOUT:
in get vocab
dup changed-definition
scan-object >>help drop ; parsing
scan-object >>help drop ;

View File

@ -59,12 +59,11 @@ M: object specializer-declaration class ;
: specialized-length ( specializer -- n )
dup [ array? ] all? [ first ] when length ;
: HINTS:
SYNTAX: HINTS:
scan-object
dup method-spec? [ first2 method ] when
[ redefined ]
[ parse-definition "specializer" set-word-prop ] bi ;
parsing
! Default specializers
{ first first2 first3 first4 }

View File

@ -25,8 +25,7 @@ M: tuple-class component-tag ( tag class -- )
[ compile-component-attrs ] 2bi
[ render ] [code] ;
: COMPONENT:
SYNTAX: COMPONENT:
scan-word
[ name>> ] [ '[ _ component-tag ] ] bi
define-chloe-tag ;
parsing

View File

@ -15,8 +15,8 @@ tags [ H{ } clone ] initialize
: define-chloe-tag ( name quot -- ) swap tags get set-at ;
: CHLOE:
scan parse-definition define-chloe-tag ; parsing
SYNTAX: CHLOE:
scan parse-definition define-chloe-tag ;
CONSTANT: chloe-ns "http://factorcode.org/chloe/1.0"

View File

@ -49,7 +49,7 @@ DEFER: <% delimiter
drop
] if ;
: %> lexer get parse-%> ; parsing
SYNTAX: %> lexer get parse-%> ;
: parse-template-lines ( lines -- quot )
<template-lexer> [

View File

@ -38,6 +38,6 @@ MACRO: interpolate ( string -- )
: interpolate-locals ( string -- quot )
[ search [ ] ] (interpolate) ;
: I[
SYNTAX: I[
"]I" parse-multiline-string
interpolate-locals over push-all ; parsing
interpolate-locals over push-all ;

View File

@ -1,22 +1,32 @@
USING: help.markup help.syntax ;
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax assocs kernel sequences ;
IN: interval-maps
HELP: interval-at*
{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } { "?" "whether the key is present" } }
{ $description "Looks up a key in an interval map, returning the corresponding value if the item is in an interval in the map, and a boolean flag. The operation takes O(log n) time." } ;
HELP: interval-at
{ $values { "key" "an object" } { "map" "an interval map" } { "value" "the value for the key, or f" } }
{ $values { "key" object } { "map" interval-map } { "value" "the value for the key, or f" } }
{ $description "Looks up a key in an interval map, returning the value of the corresponding interval, or f if the interval is not present in the map." } ;
HELP: interval-key?
{ $values { "key" "an object" } { "map" "an interval map" } { "?" "a boolean" } }
{ $values { "key" object } { "map" interval-map } { "?" "a boolean" } }
{ $description "Tests whether an object is in an interval in the interval map, returning t if the object is present." } ;
HELP: <interval-map>
{ $values { "specification" "an assoc" } { "map" "an interval map" } }
{ $values { "specification" assoc } { "map" interval-map } }
{ $description "From a specification, produce an interval tree. The specification is an assoc where the keys are intervals, or pairs of numbers to represent intervals, or individual numbers to represent singleton intervals. The values are the values int he interval map. Construction time is O(n log n)." } ;
HELP: interval-values
{ $values { "map" interval-map } { "values" sequence } }
{ $description "Constructs a list of all of the values that interval map keys are associated with. This list may contain duplicates." } ;
HELP: coalesce
{ $values { "alist" "an association list with integer keys" } { "specification" { "array of the format used by " { $link <interval-map> } } } }
{ $description "Finds ranges used in the given alist, coalescing them into a single range." } ;
ARTICLE: "interval-maps" "Interval maps"
"The " { $vocab-link "interval-maps" } " vocabulary implements a data structure, similar to assocs, where a set of closed intervals of keys are associated with values. As such, interval maps do not conform to the assoc protocol, because intervals of floats, for example, can be used, and it is impossible to get a list of keys in between."
$nl
@ -24,7 +34,9 @@ $nl
{ $subsection interval-at* }
{ $subsection interval-at }
{ $subsection interval-key? }
{ $subsection interval-values }
"Use the following to construct interval maps"
{ $subsection <interval-map> } ;
{ $subsection <interval-map> }
{ $subsection coalesce } ;
ABOUT: "interval-maps"

View File

@ -8,17 +8,21 @@ TUPLE: interval-map array ;
<PRIVATE
ALIAS: start first
ALIAS: end second
ALIAS: value third
: find-interval ( key interval-map -- interval-node )
[ first <=> ] with search nip ;
array>> [ start <=> ] with search nip ;
: interval-contains? ( key interval-node -- ? )
first2 between? ;
[ start ] [ end ] bi between? ;
: all-intervals ( sequence -- intervals )
[ [ dup number? [ dup 2array ] when ] dip ] { } assoc-map-as ;
: disjoint? ( node1 node2 -- ? )
[ second ] [ first ] bi* < ;
[ end ] [ start ] bi* < ;
: ensure-disjoint ( intervals -- intervals )
dup [ disjoint? ] monotonic?
@ -30,14 +34,17 @@ TUPLE: interval-map array ;
PRIVATE>
: interval-at* ( key map -- value ? )
[ drop ] [ array>> find-interval ] 2bi
[ drop ] [ find-interval ] 2bi
[ nip ] [ interval-contains? ] 2bi
[ third t ] [ drop f f ] if ;
[ value t ] [ drop f f ] if ;
: interval-at ( key map -- value ) interval-at* drop ;
: interval-key? ( key map -- ? ) interval-at* nip ;
: interval-values ( map -- values )
array>> [ value ] map ;
: <interval-map> ( specification -- map )
all-intervals [ [ first second ] compare ] sort
>intervals ensure-disjoint interval-map boa ;

View File

@ -63,6 +63,6 @@ SYMBOL: euc-table
PRIVATE>
: EUC:
SYNTAX: EUC:
! EUC: euc-kr "vocab:io/encodings/korean/cp949.txt"
CREATE-CLASS scan-object define-euc ; parsing
CREATE-CLASS scan-object define-euc ;

View File

@ -0,0 +1,208 @@
#
# Name: JIS X 0201 (1976) to Unicode 1.1 Table
# Unicode version: 1.1
# Table version: 0.9
# Table format: Format A
# Date: 8 March 1994
#
# Copyright (c) 1991-1994 Unicode, Inc. All Rights reserved.
#
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
# No claims are made as to fitness for any particular purpose. No
# warranties of any kind are expressed or implied. The recipient
# agrees to determine applicability of information provided. If this
# file has been provided on magnetic media by Unicode, Inc., the sole
# remedy for any claim will be exchange of defective media within 90
# days of receipt.
#
# Recipient is granted the right to make copies in any form for
# internal distribution and to freely use the information supplied
# in the creation of products supporting Unicode. Unicode, Inc.
# specifically excludes the right to re-distribute this file directly
# to third parties or other organizations whether for profit or not.
#
# General notes:
#
#
# This table contains one set of mappings from JIS X 0201 into Unicode.
# Note that these data are *possible* mappings only and may not be the
# same as those used by actual products, nor may they be the best suited
# for all uses. For more information on the mappings between various code
# pages incorporating the repertoire of JIS X 0201 and Unicode, consult the
# VENDORS mapping data. Normative information on the mapping between
# JIS X 0201 and Unicode may be found in the Unihan.txt file in the
# latest Unicode Character Database.
#
# If you have carefully considered the fact that the mappings in
# this table are only one possible set of mappings between JIS X 0201 and
# Unicode and have no normative status, but still feel that you
# have located an error in the table that requires fixing, you may
# report any such error to errata@unicode.org.
#
#
# Format: Three tab-separated columns
# Column #1 is the shift JIS code (in hex as 0xXX)
# Column #2 is the Unicode (in hex as 0xXXXX)
# Column #3 the Unicode (ISO 10646) name (follows a comment sign)
#
# The entries are in JIS order
#
#
0x20 0x0020 # SPACE
0x21 0x0021 # EXCLAMATION MARK
0x22 0x0022 # QUOTATION MARK
0x23 0x0023 # NUMBER SIGN
0x24 0x0024 # DOLLAR SIGN
0x25 0x0025 # PERCENT SIGN
0x26 0x0026 # AMPERSAND
0x27 0x0027 # APOSTROPHE
0x28 0x0028 # LEFT PARENTHESIS
0x29 0x0029 # RIGHT PARENTHESIS
0x2A 0x002A # ASTERISK
0x2B 0x002B # PLUS SIGN
0x2C 0x002C # COMMA
0x2D 0x002D # HYPHEN-MINUS
0x2E 0x002E # FULL STOP
0x2F 0x002F # SOLIDUS
0x30 0x0030 # DIGIT ZERO
0x31 0x0031 # DIGIT ONE
0x32 0x0032 # DIGIT TWO
0x33 0x0033 # DIGIT THREE
0x34 0x0034 # DIGIT FOUR
0x35 0x0035 # DIGIT FIVE
0x36 0x0036 # DIGIT SIX
0x37 0x0037 # DIGIT SEVEN
0x38 0x0038 # DIGIT EIGHT
0x39 0x0039 # DIGIT NINE
0x3A 0x003A # COLON
0x3B 0x003B # SEMICOLON
0x3C 0x003C # LESS-THAN SIGN
0x3D 0x003D # EQUALS SIGN
0x3E 0x003E # GREATER-THAN SIGN
0x3F 0x003F # QUESTION MARK
0x40 0x0040 # COMMERCIAL AT
0x41 0x0041 # LATIN CAPITAL LETTER A
0x42 0x0042 # LATIN CAPITAL LETTER B
0x43 0x0043 # LATIN CAPITAL LETTER C
0x44 0x0044 # LATIN CAPITAL LETTER D
0x45 0x0045 # LATIN CAPITAL LETTER E
0x46 0x0046 # LATIN CAPITAL LETTER F
0x47 0x0047 # LATIN CAPITAL LETTER G
0x48 0x0048 # LATIN CAPITAL LETTER H
0x49 0x0049 # LATIN CAPITAL LETTER I
0x4A 0x004A # LATIN CAPITAL LETTER J
0x4B 0x004B # LATIN CAPITAL LETTER K
0x4C 0x004C # LATIN CAPITAL LETTER L
0x4D 0x004D # LATIN CAPITAL LETTER M
0x4E 0x004E # LATIN CAPITAL LETTER N
0x4F 0x004F # LATIN CAPITAL LETTER O
0x50 0x0050 # LATIN CAPITAL LETTER P
0x51 0x0051 # LATIN CAPITAL LETTER Q
0x52 0x0052 # LATIN CAPITAL LETTER R
0x53 0x0053 # LATIN CAPITAL LETTER S
0x54 0x0054 # LATIN CAPITAL LETTER T
0x55 0x0055 # LATIN CAPITAL LETTER U
0x56 0x0056 # LATIN CAPITAL LETTER V
0x57 0x0057 # LATIN CAPITAL LETTER W
0x58 0x0058 # LATIN CAPITAL LETTER X
0x59 0x0059 # LATIN CAPITAL LETTER Y
0x5A 0x005A # LATIN CAPITAL LETTER Z
0x5B 0x005B # LEFT SQUARE BRACKET
0x5C 0x00A5 # YEN SIGN
0x5D 0x005D # RIGHT SQUARE BRACKET
0x5E 0x005E # CIRCUMFLEX ACCENT
0x5F 0x005F # LOW LINE
0x60 0x0060 # GRAVE ACCENT
0x61 0x0061 # LATIN SMALL LETTER A
0x62 0x0062 # LATIN SMALL LETTER B
0x63 0x0063 # LATIN SMALL LETTER C
0x64 0x0064 # LATIN SMALL LETTER D
0x65 0x0065 # LATIN SMALL LETTER E
0x66 0x0066 # LATIN SMALL LETTER F
0x67 0x0067 # LATIN SMALL LETTER G
0x68 0x0068 # LATIN SMALL LETTER H
0x69 0x0069 # LATIN SMALL LETTER I
0x6A 0x006A # LATIN SMALL LETTER J
0x6B 0x006B # LATIN SMALL LETTER K
0x6C 0x006C # LATIN SMALL LETTER L
0x6D 0x006D # LATIN SMALL LETTER M
0x6E 0x006E # LATIN SMALL LETTER N
0x6F 0x006F # LATIN SMALL LETTER O
0x70 0x0070 # LATIN SMALL LETTER P
0x71 0x0071 # LATIN SMALL LETTER Q
0x72 0x0072 # LATIN SMALL LETTER R
0x73 0x0073 # LATIN SMALL LETTER S
0x74 0x0074 # LATIN SMALL LETTER T
0x75 0x0075 # LATIN SMALL LETTER U
0x76 0x0076 # LATIN SMALL LETTER V
0x77 0x0077 # LATIN SMALL LETTER W
0x78 0x0078 # LATIN SMALL LETTER X
0x79 0x0079 # LATIN SMALL LETTER Y
0x7A 0x007A # LATIN SMALL LETTER Z
0x7B 0x007B # LEFT CURLY BRACKET
0x7C 0x007C # VERTICAL LINE
0x7D 0x007D # RIGHT CURLY BRACKET
0x7E 0x203E # OVERLINE
0xA1 0xFF61 # HALFWIDTH IDEOGRAPHIC FULL STOP
0xA2 0xFF62 # HALFWIDTH LEFT CORNER BRACKET
0xA3 0xFF63 # HALFWIDTH RIGHT CORNER BRACKET
0xA4 0xFF64 # HALFWIDTH IDEOGRAPHIC COMMA
0xA5 0xFF65 # HALFWIDTH KATAKANA MIDDLE DOT
0xA6 0xFF66 # HALFWIDTH KATAKANA LETTER WO
0xA7 0xFF67 # HALFWIDTH KATAKANA LETTER SMALL A
0xA8 0xFF68 # HALFWIDTH KATAKANA LETTER SMALL I
0xA9 0xFF69 # HALFWIDTH KATAKANA LETTER SMALL U
0xAA 0xFF6A # HALFWIDTH KATAKANA LETTER SMALL E
0xAB 0xFF6B # HALFWIDTH KATAKANA LETTER SMALL O
0xAC 0xFF6C # HALFWIDTH KATAKANA LETTER SMALL YA
0xAD 0xFF6D # HALFWIDTH KATAKANA LETTER SMALL YU
0xAE 0xFF6E # HALFWIDTH KATAKANA LETTER SMALL YO
0xAF 0xFF6F # HALFWIDTH KATAKANA LETTER SMALL TU
0xB0 0xFF70 # HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
0xB1 0xFF71 # HALFWIDTH KATAKANA LETTER A
0xB2 0xFF72 # HALFWIDTH KATAKANA LETTER I
0xB3 0xFF73 # HALFWIDTH KATAKANA LETTER U
0xB4 0xFF74 # HALFWIDTH KATAKANA LETTER E
0xB5 0xFF75 # HALFWIDTH KATAKANA LETTER O
0xB6 0xFF76 # HALFWIDTH KATAKANA LETTER KA
0xB7 0xFF77 # HALFWIDTH KATAKANA LETTER KI
0xB8 0xFF78 # HALFWIDTH KATAKANA LETTER KU
0xB9 0xFF79 # HALFWIDTH KATAKANA LETTER KE
0xBA 0xFF7A # HALFWIDTH KATAKANA LETTER KO
0xBB 0xFF7B # HALFWIDTH KATAKANA LETTER SA
0xBC 0xFF7C # HALFWIDTH KATAKANA LETTER SI
0xBD 0xFF7D # HALFWIDTH KATAKANA LETTER SU
0xBE 0xFF7E # HALFWIDTH KATAKANA LETTER SE
0xBF 0xFF7F # HALFWIDTH KATAKANA LETTER SO
0xC0 0xFF80 # HALFWIDTH KATAKANA LETTER TA
0xC1 0xFF81 # HALFWIDTH KATAKANA LETTER TI
0xC2 0xFF82 # HALFWIDTH KATAKANA LETTER TU
0xC3 0xFF83 # HALFWIDTH KATAKANA LETTER TE
0xC4 0xFF84 # HALFWIDTH KATAKANA LETTER TO
0xC5 0xFF85 # HALFWIDTH KATAKANA LETTER NA
0xC6 0xFF86 # HALFWIDTH KATAKANA LETTER NI
0xC7 0xFF87 # HALFWIDTH KATAKANA LETTER NU
0xC8 0xFF88 # HALFWIDTH KATAKANA LETTER NE
0xC9 0xFF89 # HALFWIDTH KATAKANA LETTER NO
0xCA 0xFF8A # HALFWIDTH KATAKANA LETTER HA
0xCB 0xFF8B # HALFWIDTH KATAKANA LETTER HI
0xCC 0xFF8C # HALFWIDTH KATAKANA LETTER HU
0xCD 0xFF8D # HALFWIDTH KATAKANA LETTER HE
0xCE 0xFF8E # HALFWIDTH KATAKANA LETTER HO
0xCF 0xFF8F # HALFWIDTH KATAKANA LETTER MA
0xD0 0xFF90 # HALFWIDTH KATAKANA LETTER MI
0xD1 0xFF91 # HALFWIDTH KATAKANA LETTER MU
0xD2 0xFF92 # HALFWIDTH KATAKANA LETTER ME
0xD3 0xFF93 # HALFWIDTH KATAKANA LETTER MO
0xD4 0xFF94 # HALFWIDTH KATAKANA LETTER YA
0xD5 0xFF95 # HALFWIDTH KATAKANA LETTER YU
0xD6 0xFF96 # HALFWIDTH KATAKANA LETTER YO
0xD7 0xFF97 # HALFWIDTH KATAKANA LETTER RA
0xD8 0xFF98 # HALFWIDTH KATAKANA LETTER RI
0xD9 0xFF99 # HALFWIDTH KATAKANA LETTER RU
0xDA 0xFF9A # HALFWIDTH KATAKANA LETTER RE
0xDB 0xFF9B # HALFWIDTH KATAKANA LETTER RO
0xDC 0xFF9C # HALFWIDTH KATAKANA LETTER WA
0xDD 0xFF9D # HALFWIDTH KATAKANA LETTER N
0xDE 0xFF9E # HALFWIDTH KATAKANA VOICED SOUND MARK
0xDF 0xFF9F # HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,13 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup ;
IN: io.encodings.iso2022
HELP: iso2022
{ $class-description "This encoding class implements ISO 2022-JP-1, a Japanese text encoding commonly used for email." }
{ $see-also "encodings-introduction" } ;
ARTICLE: "io.encodings.iso2022" "ISO 2022-JP-1 encoding"
{ $subsection iso2022 } ;
ABOUT: "io.encodings.iso2022"

View File

@ -0,0 +1,36 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: io.encodings.string io.encodings.iso2022 tools.test
io.encodings.iso2022.private literals strings byte-arrays ;
IN: io.encodings.iso2022
[ "hello" ] [ "hello" >byte-array iso2022 decode ] unit-test
[ "hello" ] [ "hello" iso2022 encode >string ] unit-test
[ "hi" ] [ B{ CHAR: h ESC CHAR: ( CHAR: B CHAR: i } iso2022 decode ] unit-test
[ "hi" ] [ B{ CHAR: h CHAR: i ESC CHAR: ( CHAR: B } iso2022 decode ] unit-test
[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i ESC CHAR: ( } iso2022 decode ] unit-test
[ "hi\u00fffd" ] [ B{ CHAR: h CHAR: i ESC } iso2022 decode ] unit-test
[ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: D8 } ] [ "h\u00ff98" iso2022 encode ] unit-test
[ "h\u00ff98" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: D8 } iso2022 decode ] unit-test
[ "hi" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J CHAR: i } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: ( CHAR: J HEX: 80 } iso2022 decode ] unit-test
[ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } ] [ "h\u007126" iso2022 encode ] unit-test
[ "h\u007126" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E HEX: 47 } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 3E } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: B HEX: 80 HEX: 80 } iso2022 decode ] unit-test
[ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } ] [ "h\u0058ce" iso2022 encode ] unit-test
[ "h\u0058ce" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 HEX: 54 } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 38 } iso2022 decode ] unit-test
[ "h" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( } iso2022 decode ] unit-test
[ "h\u00fffd" ] [ B{ CHAR: h ESC CHAR: $ CHAR: ( CHAR: D HEX: 70 HEX: 70 } iso2022 decode ] unit-test
[ "\u{syriac-music}" iso2022 encode ] must-fail

View File

@ -0,0 +1,107 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: io.encodings kernel sequences io simple-flat-file sets math
combinators.short-circuit io.binary values arrays assocs
locals accessors combinators biassocs byte-arrays parser ;
IN: io.encodings.iso2022
SINGLETON: iso2022
<PRIVATE
VALUE: jis201
VALUE: jis208
VALUE: jis212
"vocab:io/encodings/iso2022/201.txt" flat-file>biassoc to: jis201
"vocab:io/encodings/iso2022/208.txt" flat-file>biassoc to: jis208
"vocab:io/encodings/iso2022/212.txt" flat-file>biassoc to: jis212
VALUE: ascii
128 unique >biassoc to: ascii
TUPLE: iso2022-state type ;
: make-iso-coder ( encoding -- state )
drop ascii iso2022-state boa ;
M: iso2022 <encoder>
make-iso-coder <encoder> ;
M: iso2022 <decoder>
make-iso-coder <decoder> ;
<< SYNTAX: ESC HEX: 16 parsed ; >>
CONSTANT: switch-ascii B{ ESC CHAR: ( CHAR: B }
CONSTANT: switch-jis201 B{ ESC CHAR: ( CHAR: J }
CONSTANT: switch-jis208 B{ ESC CHAR: $ CHAR: B }
CONSTANT: switch-jis212 B{ ESC CHAR: $ CHAR: ( CHAR: D }
: find-type ( char -- code type )
{
{ [ dup ascii value? ] [ drop switch-ascii ascii ] }
{ [ dup jis201 value? ] [ drop switch-jis201 jis201 ] }
{ [ dup jis208 value? ] [ drop switch-jis208 jis208 ] }
{ [ dup jis212 value? ] [ drop switch-jis212 jis212 ] }
[ encode-error ]
} cond ;
: stream-write-num ( num stream -- )
over 256 >=
[ [ h>b/b swap 2byte-array ] dip stream-write ]
[ stream-write1 ] if ;
M:: iso2022-state encode-char ( char stream encoding -- )
char encoding type>> value? [
char find-type
[ stream stream-write ]
[ encoding (>>type) ] bi*
] unless
char encoding type>> value-at stream stream-write-num ;
: read-escape ( stream -- type/f )
dup stream-read1 {
{ CHAR: ( [
stream-read1 {
{ CHAR: B [ ascii ] }
{ CHAR: J [ jis201 ] }
[ drop f ]
} case
] }
{ CHAR: $ [
dup stream-read1 {
{ CHAR: @ [ drop jis208 ] } ! want: JIS X 0208-1978
{ CHAR: B [ drop jis208 ] }
{ CHAR: ( [
stream-read1 CHAR: D = jis212 f ?
] }
[ 2drop f ]
} case
] }
[ 2drop f ]
} case ;
: double-width? ( type -- ? )
{ [ jis208 eq? ] [ jis212 eq? ] } 1|| ;
: finish-decode ( num encoding -- char )
type>> at replacement-char or ;
M:: iso2022-state decode-char ( stream encoding -- char )
stream stream-read1 {
{ ESC [
stream read-escape [
encoding (>>type)
stream encoding decode-char
] [ replacement-char ] if*
] }
{ f [ f ] }
[
encoding type>> double-width? [
stream stream-read1
[ 2byte-array be> encoding finish-decode ]
[ drop replacement-char ] if*
] [ encoding finish-decode ] if
]
} case ;

View File

@ -0,0 +1 @@
ISO-2022-JP-1 text encoding

View File

@ -3,7 +3,7 @@ tools.test parser math namespaces continuations vocabs kernel
compiler.units eval vocabs.parser ;
IN: listener.tests
: hello "Hi" print ; parsing
SYNTAX: hello "Hi" print ;
: parse-interactive ( string -- quot )
<string-reader> stream-read-quot ;

View File

@ -5,27 +5,25 @@ vocabs.loader words kernel namespaces locals.parser locals.types
locals.errors ;
IN: locals
: :>
SYNTAX: :>
scan locals get [ :>-outside-lambda-error ] unless*
[ make-local ] bind <def> parsed ; parsing
[ make-local ] bind <def> parsed ;
: [| parse-lambda over push-all ; parsing
SYNTAX: [| parse-lambda over push-all ;
: [let parse-let over push-all ; parsing
SYNTAX: [let parse-let over push-all ;
: [let* parse-let* over push-all ; parsing
SYNTAX: [let* parse-let* over push-all ;
: [wlet parse-wlet over push-all ; parsing
SYNTAX: [wlet parse-wlet over push-all ;
: :: (::) define ; parsing
SYNTAX: :: (::) define ;
: M:: (M::) define ; parsing
SYNTAX: M:: (M::) define ;
: MACRO:: (::) define-macro ; parsing
SYNTAX: MACRO:: (::) define-macro ;
: MEMO:: (::) define-memoized ; parsing
USE: syntax
SYNTAX: MEMO:: (::) define-memoized ;
{
"locals.macros"

View File

@ -135,11 +135,11 @@ PRIVATE>
[ [ input-logging-quot ] 2keep drop error-logging-quot ]
(define-logging) ;
: LOG:
SYNTAX: LOG:
#! Syntax: name level
CREATE-WORD dup scan-word
'[ 1array stack>message _ _ log-message ]
(( message -- )) define-declared ; parsing
(( message -- )) define-declared ;
USE: vocabs.loader

View File

@ -16,7 +16,7 @@ PRIVATE>
[ over real-macro-effect memoize-quot [ call ] append define ]
2bi ;
: MACRO: (:) define-macro ; parsing
SYNTAX: MACRO: (:) define-macro ;
PREDICATE: macro < word "macro" word-prop >boolean ;

View File

@ -16,8 +16,8 @@ SYMBOL: _
: define-match-vars ( seq -- )
[ define-match-var ] each ;
: MATCH-VARS: ! vars ...
";" parse-tokens define-match-vars ; parsing
SYNTAX: MATCH-VARS: ! vars ...
";" parse-tokens define-match-vars ;
: match-var? ( symbol -- bool )
dup word? [ "match-var" word-prop ] [ drop f ] if ;

View File

@ -289,7 +289,7 @@ M: MATRIX n*V(*)V+M!
M: MATRIX n*V(*)Vconj+M!
(prepare-ger) [ XGERC ] dip ;
: XMATRIX{ \ } [ >MATRIX ] parse-literal ; parsing
SYNTAX: XMATRIX{ \ } [ >MATRIX ] parse-literal ;
M: MATRIX pprint-delims
drop \ XMATRIX{ \ } ;

View File

@ -179,7 +179,7 @@ M: VECTOR n*V+V!
M: VECTOR n*V!
(prepare-scal) [ XSCAL ] dip ;
: XVECTOR{ \ } [ >VECTOR ] parse-literal ; parsing
SYNTAX: XVECTOR{ \ } [ >VECTOR ] parse-literal ;
M: VECTOR pprint-delims
drop \ XVECTOR{ \ } ;

View File

@ -31,7 +31,7 @@ M: complex sqrt >polar [ fsqrt ] [ 2.0 / ] bi* polar> ;
IN: syntax
: C{ \ } [ first2 rect> ] parse-literal ; parsing
SYNTAX: C{ \ } [ first2 rect> ] parse-literal ;
USE: prettyprint.custom

View File

@ -41,7 +41,7 @@ PRIVATE>
[ drop "memoize" set-word-prop ]
3tri ;
: MEMO: (:) define-memoized ; parsing
SYNTAX: MEMO: (:) define-memoized ;
PREDICATE: memoized < word "memoize" word-prop ;

View File

@ -20,10 +20,10 @@ PRIVATE>
[ (parse-here) ] "" make but-last
lexer get next-line ;
: STRING:
SYNTAX: STRING:
CREATE-WORD
parse-here 1quotation
(( -- string )) define-inline ; parsing
(( -- string )) define-inline ;
<PRIVATE
@ -48,16 +48,16 @@ PRIVATE>
change-column drop
] "" make ;
: <"
"\">" parse-multiline-string parsed ; parsing
SYNTAX: <"
"\">" parse-multiline-string parsed ;
: <'
"'>" parse-multiline-string parsed ; parsing
SYNTAX: <'
"'>" parse-multiline-string parsed ;
: {'
"'}" parse-multiline-string parsed ; parsing
SYNTAX: {'
"'}" parse-multiline-string parsed ;
: {"
"\"}" parse-multiline-string parsed ; parsing
SYNTAX: {"
"\"}" parse-multiline-string parsed ;
: /* "*/" parse-multiline-string drop ; parsing
SYNTAX: /* "*/" parse-multiline-string drop ;

View File

@ -62,7 +62,7 @@ M: nibble-array resize
M: nibble-array byte-length length nibbles>bytes ;
: N{ \ } [ >nibble-array ] parse-literal ; parsing
SYNTAX: N{ \ } [ >nibble-array ] parse-literal ;
INSTANCE: nibble-array sequence

View File

@ -47,7 +47,7 @@ reset-gl-function-number-counter
parameters return parse-arglist [ abi indirect-quot ] dip
define-declared ;
: GL-FUNCTION:
SYNTAX: GL-FUNCTION:
gl-function-calling-convention
scan
scan dup
@ -55,5 +55,4 @@ reset-gl-function-number-counter
gl-function-number
[ gl-function-pointer ] 2curry swap
";" parse-tokens [ "()" subseq? not ] filter
define-indirect
; parsing
define-indirect ;

View File

@ -279,12 +279,12 @@ H{ } clone verify-messages set-global
: verify-message ( n -- word ) verify-messages get-global at ;
: X509_V_:
SYNTAX: X509_V_:
scan "X509_V_" prepend create-in
scan-word
[ 1quotation (( -- value )) define-inline ]
[ verify-messages get set-at ]
2bi ; parsing
2bi ;
>>

View File

@ -34,9 +34,9 @@ TUPLE: tokenizer any one many ;
: reset-tokenizer ( -- )
default-tokenizer \ tokenizer set-global ;
: TOKENIZER:
SYNTAX: TOKENIZER:
scan search [ "Tokenizer not found" throw ] unless*
execute( -- tokenizer ) \ tokenizer set-global ; parsing
execute( -- tokenizer ) \ tokenizer set-global ;
TUPLE: ebnf-non-terminal symbol ;
TUPLE: ebnf-terminal symbol ;
@ -522,16 +522,14 @@ M: ebnf-non-terminal (transform) ( ast -- parser )
parse-ebnf dup dup parser [ main swap at compile ] with-variable
[ compiled-parse ] curry [ with-scope ast>> ] curry ;
: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at
parsed reset-tokenizer ; parsing
SYNTAX: <EBNF "EBNF>" reset-tokenizer parse-multiline-string parse-ebnf main swap at
parsed reset-tokenizer ;
: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip
parsed \ call parsed reset-tokenizer ; parsing
SYNTAX: [EBNF "EBNF]" reset-tokenizer parse-multiline-string ebnf>quot nip
parsed \ call parsed reset-tokenizer ;
: EBNF:
SYNTAX: EBNF:
reset-tokenizer CREATE-WORD dup ";EBNF" parse-multiline-string
ebnf>quot swapd (( input -- ast )) define-declared "ebnf-parser" set-word-prop
reset-tokenizer ; parsing
reset-tokenizer ;

View File

@ -616,7 +616,7 @@ PRIVATE>
ERROR: parse-failed input word ;
: PEG:
SYNTAX: PEG:
(:)
[let | def [ ] word [ ] |
[
@ -630,7 +630,7 @@ ERROR: parse-failed input word ;
]
] with-compilation-unit
] over push-all
] ; parsing
] ;
USING: vocabs vocabs.loader ;

View File

@ -48,7 +48,7 @@ M: persistent-hash hashcode* nip assoc-size ;
M: persistent-hash clone ;
: PH{ \ } [ >persistent-hash ] parse-literal ; parsing
SYNTAX: PH{ \ } [ >persistent-hash ] parse-literal ;
M: persistent-hash pprint-delims drop \ PH{ \ } ;
M: persistent-hash >pprint-sequence >alist ;

View File

@ -179,7 +179,7 @@ M: persistent-vector equal?
: >persistent-vector ( seq -- pvec )
T{ persistent-vector } like ;
: PV{ \ } [ >persistent-vector ] parse-literal ; parsing
SYNTAX: PV{ \ } [ >persistent-vector ] parse-literal ;
M: persistent-vector pprint-delims drop \ PV{ \ } ;
M: persistent-vector >pprint-sequence ;

View File

@ -96,12 +96,12 @@ ARTICLE: "prettyprint-literal" "Literal prettyprinting protocol"
{ $code
"TUPLE: rect w h ;"
""
": RECT["
"SYNTAX: RECT["
" scan-word"
" scan-word \\ * assert="
" scan-word"
" scan-word \\ ] assert="
" <rect> parsed ; parsing"
" <rect> parsed ;"
}
"An example literal might be:"
{ $code "RECT[ 100 * 200 ]" }

View File

@ -1,32 +0,0 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: help.markup help.syntax strings ;
IN: quoting
HELP: quote?
{ $values
{ "ch" "a character" }
{ "?" "a boolean" }
}
{ $description "Returns true if the character is a single or double quote." } ;
HELP: quoted?
{ $values
{ "str" string }
{ "?" "a boolean" }
}
{ $description "Returns true if a string is surrounded by matching single or double quotes as the first and last characters." } ;
HELP: unquote
{ $values
{ "str" string }
{ "newstr" string }
}
{ $description "Removes a pair of matching single or double quotes from a string." } ;
ARTICLE: "quoting" "Quotation marks"
"The " { $vocab-link "quoting" } " vocabulary is for removing quotes from a string." $nl
"Removing quotes:"
{ $subsection unquote } ;
ABOUT: "quoting"

View File

@ -1,10 +0,0 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: tools.test quoting ;
IN: quoting.tests
[ "abc" ] [ "'abc'" unquote ] unit-test
[ "abc" ] [ "\"abc\"" unquote ] unit-test
[ "'abc" ] [ "'abc" unquote ] unit-test
[ "abc'" ] [ "abc'" unquote ] unit-test

View File

@ -1,6 +1,6 @@
! Copyright (C) 2009 Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: combinators.short-circuit kernel math sequences strings ;
USING: sequences math kernel strings combinators.short-circuit ;
IN: quoting
: quote? ( ch -- ? ) "'\"" member? ;
@ -13,4 +13,4 @@ IN: quoting
} 1&& ;
: unquote ( str -- newstr )
dup quoted? [ but-last-slice rest-slice >string ] when ;
dup quoted? [ but-last-slice rest-slice >string ] when ;

View File

@ -1 +1,2 @@
Doug Coleman
Daniel Ehrenberg

View File

@ -108,21 +108,24 @@ M: terminator-class class-member? ( obj class -- ? )
M: f class-member? 2drop f ;
: same? ( obj1 obj2 quot1: ( obj1 -- val1 ) quot2: ( obj2 -- val2 ) -- ? )
bi* = ; inline
M: script-class class-member?
[ script-of ] [ script>> ] bi* = ;
[ script-of ] [ script>> ] same? ;
M: category-class class-member?
[ category# ] [ category>> ] bi* = ;
[ category ] [ category>> ] same? ;
M: category-range-class class-member?
[ category first ] [ category>> ] bi* = ;
[ category first ] [ category>> ] same? ;
TUPLE: not-class class ;
PREDICATE: not-integer < not-class class>> integer? ;
UNION: simple-class
primitive-class range-class category-class category-range-class dot ;
primitive-class range-class dot ;
PREDICATE: not-simple < not-class class>> simple-class? ;
M: not-class class-member?
@ -227,7 +230,10 @@ TUPLE: class-partition integers not-integers simples not-simples and or other ;
dup or-class flatten partition-classes
dup not-integers>> length {
{ 0 [ nip make-or-class ] }
{ 1 [ not-integers>> first [ class>> '[ _ swap class-member? ] any? ] keep or ] }
{ 1 [
not-integers>> first
[ class>> '[ _ swap class-member? ] any? ] keep or
] }
[ 3drop t ]
} case ;
@ -248,6 +254,12 @@ M: or-class <not-class>
M: t <not-class> drop f ;
M: f <not-class> drop t ;
: <minus-class> ( a b -- a-b )
<not-class> 2array <and-class> ;
: <sym-diff-class> ( a b -- a~b )
2array [ <or-class> ] [ <and-class> ] bi <minus-class> ;
M: primitive-class class-member?
class>> class-member? ;

View File

@ -11,7 +11,7 @@ IN: regexp.parser.tests
"a|b" "a.b" "a|b|c" "abc|b" "a|bcd" "a|(b)" "(?-i:a)" "||"
"(a)|b" "(a|b)" "((a)|(b))" "(?:a)" "(?i:a)" "|b" "b|"
"[abc]" "[a-c]" "[^a-c]" "[^]]" "[]a]" "[[]" "[]-a]" "[a-]" "[-]"
"[--a]" "foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
"foo*" "(foo)*" "(a|b)|c" "(foo){2,3}" "(foo){2,}"
"(foo){2}" "{2,3}" "{," "{,}" "}" "foo}" "[^]-a]" "[^-]a]"
"[a-]" "[^a-]" "[^a-]" "a{,2}" "(?#foobar)"
"\\p{Space}" "\\t" "\\[" "[\\]]" "\\P{Space}"

View File

@ -2,7 +2,8 @@
! See http://factorcode.org/license.txt for BSD license.
USING: peg.ebnf kernel math.parser sequences assocs arrays fry math
combinators regexp.classes strings splitting peg locals accessors
regexp.ast unicode.case ;
regexp.ast unicode.case unicode.script.private unicode.categories
memoize interval-maps sets unicode.data combinators.short-circuit ;
IN: regexp.parser
: allowed-char? ( ch -- ? )
@ -18,15 +19,41 @@ ERROR: bad-number ;
ERROR: bad-class name ;
: simple ( str -- simple )
! Alternatively, first collation key level?
>case-fold [ " \t_" member? not ] filter ;
: simple-table ( seq -- table )
[ [ simple ] keep ] H{ } map>assoc ;
MEMO: simple-script-table ( -- table )
script-table interval-values prune simple-table ;
MEMO: simple-category-table ( -- table )
categories simple-table ;
: parse-unicode-class ( name -- class )
! Implement this!
drop f ;
{
{ [ dup { [ length 1 = ] [ first "clmnpsz" member? ] } 1&& ] [
>upper first
<category-range-class>
] }
{ [ dup >title categories member? ] [
simple-category-table at <category-class>
] }
{ [ "script=" ?head ] [
dup simple-script-table at
[ <script-class> ]
[ "script=" prepend bad-class ] ?if
] }
[ bad-class ]
} cond ;
: unicode-class ( name -- class )
dup parse-unicode-class [ ] [ bad-class ] ?if ;
: name>class ( name -- class )
>string >case-fold {
>string simple {
{ "lower" letter-class }
{ "upper" LETTER-class }
{ "alpha" Letter-class }
@ -121,19 +148,29 @@ Character = EscapeSequence
| "^" => [[ ^ <tagged-epsilon> ]]
| . ?[ allowed-char? ]?
AnyRangeCharacter = EscapeSequence | .
AnyRangeCharacter = !("&&"|"||"|"--"|"~~") (EscapeSequence | .)
RangeCharacter = !("]") AnyRangeCharacter
Range = RangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
Range = RangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
| RangeCharacter
StartRange = AnyRangeCharacter:a "-" RangeCharacter:b => [[ a b <range-class> ]]
StartRange = AnyRangeCharacter:a "-" !("-") RangeCharacter:b => [[ a b <range-class> ]]
| AnyRangeCharacter
Ranges = StartRange:s Range*:r => [[ r s prefix ]]
CharClass = "^"?:n Ranges:e => [[ e n char-class ]]
BasicCharClass = "^"?:n Ranges:e => [[ e n char-class ]]
CharClass = BasicCharClass:b "&&" CharClass:c
=> [[ b c 2array <and-class> ]]
| BasicCharClass:b "||" CharClass:c
=> [[ b c 2array <or-class> ]]
| BasicCharClass:b "~~" CharClass:c
=> [[ b c <sym-diff-class> ]]
| BasicCharClass:b "--" CharClass:c
=> [[ b c <minus-class> ]]
| BasicCharClass
Options = [idmsux]*

View File

@ -45,11 +45,11 @@ ARTICLE: { "regexp" "construction" } "Constructing regular expressions"
ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
"Regexp syntax is largely compatible with Perl, Java and extended POSIX regexps, but not completely. Below, the syntax is documented."
{ $heading "Characters" }
"At its core, regular expressions consist of character literals. For example, " { $snippet "R/ f/" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } "for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
"At its core, regular expressions consist of character literals. For example, " { $snippet "R/ f/" } " is a regular expression matching just the string 'f'. In addition, the normal escape codes are provided, like " { $snippet "\\t" } " for the tab character and " { $snippet "\\uxxxxxx" } " for an arbitrary Unicode code point, by its hex value. In addition, any character can be preceded by a backslash to escape it, unless this has special meaning. For example, to match a literal opening parenthesis, use " { $snippet "\\(" } "."
{ $heading "Concatenation, alternation and grouping" }
"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R/ ab/" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for gropuing. So " { $snippet "R/ f(oo|ar)/" } " would match either 'foo' or 'far'."
"Regular expressions can be built out of multiple characters by concatenation. For example, " { $snippet "R/ ab/" } " matches a followed by b. The " { $snippet "|" } " (alternation) operator can construct a regexp which matches one of two alternatives. Parentheses can be used for grouping. So " { $snippet "R/ f(oo|ar)/" } " would match either 'foo' or 'far'."
{ $heading "Character classes" }
"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a carat, as in " { $snippet "[^a]" } " which matches all characters which are not a."
"Square brackets define a convenient way to refer to a set of characters. For example, " { $snippet "[ab]" } " refers to either a or b. And " { $snippet "[a-z]" } " refers to all of the characters between a and z, in code point order. You can use these together, as in " { $snippet "[ac-fz]" } " which matches all of the characters between c and f, in addition to a and z. Character classes can be negated using a caret, as in " { $snippet "[^a]" } " which matches all characters which are not a."
{ $heading "Predefined character classes" }
"Several character classes are predefined, both for convenience and because they are too large to represent directly. In Factor regular expressions, all character classes are Unicode-aware."
{ $table
@ -72,8 +72,12 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
{ { $snippet "\\p{blank}" } "Non-newline whitespace" }
{ { $snippet "\\p{cntrl}" } "Control character" }
{ { $snippet "\\p{space}" } "Whitespace" }
{ { $snippet "\\p{xdigit}" } "Hexidecimal digit" } } ! In the future: Unicode
"Full unicode properties are not yet supported."
{ { $snippet "\\p{xdigit}" } "Hexadecimal digit" }
{ { $snippet "\\p{Nd}" } "Character in Unicode category Nd" }
{ { $snippet "\\p{Z}" } "Character in Unicode category beginning with Z" }
{ { $snippet "\\p{script=Cham}" } "Character in the Cham writing system" } }
{ $heading "Character class operations" }
"Character classes can be composed using four binary operations: " { $snippet "|| && ~~ --" } ". These do the operations union, intersection, symmetric difference and difference, respectively. For example, characters which are lower-case but not Latin script could be matched as " { $snippet "[\\p{lower}--\\p{script=latin}]" } ". These operations are right-associative, and " { $snippet "^" } " binds tighter than them. There is no syntax for grouping."
{ $heading "Boundaries" }
"Special operators exist to match certain points in the string. These are called 'zero-width' because they do not consume any characters."
{ $table
@ -105,9 +109,18 @@ ARTICLE: { "regexp" "syntax" } "Regular expression syntax"
{ $heading "Quotation" }
"To make it convenient to have a long string which uses regexp operators, a special syntax is provided. If a substring begins with " { $snippet "\\Q" } " then everything until " { $snippet "\\E" } " is quoted (escaped). For example, " { $snippet "R/ \\Qfoo\\bar|baz()\\E/" } " matches exactly the string " { $snippet "\"foo\\bar|baz()\"" } "."
{ $heading "Unsupported features" }
"One missing feature is backreferences. This is because of a design decision to allow only regular expressions following the formal theory of regular languages. For more information, see " { $link { "regexp" "theory" } } ". You can create a new regular expression to match a particular string using " { $vocab-link "regexp.combinators" } " and group capture is available to extract parts of a regular expression match." $nl
"Another feature is Perl's " { $snippet "\\G" } " syntax, which references the previous match, is not included. This is because that sequence is inherently stateful, and Factor regexps don't hold state." $nl
"None of the operations which embed code into a regexp are supported, as this would require the inclusion of the Factor parser and compiler in any application which wants to expose regexps to the user. None of the casing operations are included of Perl like \\L, for simplicity." ; ! Also describe syntax, from the beginning
{ $subheading "Group capture" }
{ $subheading "Reluctant and posessive quantifiers" }
{ $subheading "Backreferences" }
"Backreferences were omitted because of a design decision to allow only regular expressions following the formal theory of regular languages. For more information, see " { $link { "regexp" "theory" } } "."
$nl
"To work around the lack of backreferences, consider using group capture and then creating a new regular expression to match the captured string using " { $vocab-link "regexp.combinators" } "."
{ $subheading "Previous match" }
"Another feature that is not included is Perl's " { $snippet "\\G" } " syntax, which references the previous match. This is because that sequence is inherently stateful, and Factor regexps don't hold state."
{ $subheading "Embedding code" }
"Operations which embed code into a regexp are not supported. This would require the inclusion of the Factor parser and compiler in any deployed application which wants to expose regexps to the user, leading to an undesirable increase in the code size."
{ $heading "Casing operations" }
"No special casing operations are included, for example Perl's " { $snippet "\\L" } "." ;
ARTICLE: { "regexp" "options" } "Regular expression options"
"When " { $link { "regexp" "construction" } } ", various options can be provided. Options have single-character names. A string of options has one of the following two forms:"
@ -150,7 +163,7 @@ ARTICLE: { "regexp" "theory" } "The theory of regular expressions"
"This implies, by DeMorgan's law, that, if you have two regular languages, their intersection is also regular. That is, for any two regular expressions, there exists a regular expression which matches strings that match both inputs." $nl
"Traditionally, regular expressions on computer support an additional operation: backreferences. For example, the Perl regexp " { $snippet "/(.*)$1/" } " matches a string repated twice. If a backreference refers to a string with a predetermined maximum length, then the resulting language is still regular." $nl
"But, if not, the language is not regular. There is strong evidence that there is no efficient way to parse with backreferences in the general case. Perl uses a naive backtracking algorithm which has pathological behavior in some cases, taking exponential time to match even if backreferences aren't used. Additionally, expressions with backreferences don't have the properties with negation and intersection described above." $nl
"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex also use this algorithm, but in the Factor regular expression engine, all other features of regexps are still present." ;
"The Factor regular expression engine was built with the design decision to support negation and intersection at the expense of backreferences. This lets us have a guaranteed linear-time matching algorithm. Systems like Ragel and Lex use the same algorithm." ;
ARTICLE: { "regexp" "operations" } "Matching operations with regular expressions"
"Testing if a string matches a regular expression:"

View File

@ -480,3 +480,57 @@ IN: regexp-tests
[ f ] [ "a\r" R/ a$./mds matches? ] unit-test
[ t ] [ "a\n" R/ a$./ms matches? ] unit-test
[ t ] [ "a\n" R/ a$./mds matches? ] unit-test
! Unicode categories
[ t ] [ "a" R/ \p{L}/ matches? ] unit-test
[ t ] [ "A" R/ \p{L}/ matches? ] unit-test
[ f ] [ " " R/ \p{L}/ matches? ] unit-test
[ f ] [ "a" R/ \P{L}/ matches? ] unit-test
[ f ] [ "A" R/ \P{L}/ matches? ] unit-test
[ t ] [ " " R/ \P{L}/ matches? ] unit-test
[ t ] [ "a" R/ \p{Ll}/ matches? ] unit-test
[ f ] [ "A" R/ \p{Ll}/ matches? ] unit-test
[ f ] [ " " R/ \p{Ll}/ matches? ] unit-test
[ f ] [ "a" R/ \P{Ll}/ matches? ] unit-test
[ t ] [ "A" R/ \P{Ll}/ matches? ] unit-test
[ t ] [ " " R/ \P{Ll}/ matches? ] unit-test
[ t ] [ "a" R/ \p{script=Latin}/ matches? ] unit-test
[ f ] [ " " R/ \p{script=Latin}/ matches? ] unit-test
[ f ] [ "a" R/ \P{script=Latin}/ matches? ] unit-test
[ t ] [ " " R/ \P{script=Latin}/ matches? ] unit-test
! These should be case-insensitive
[ f ] [ " " R/ \p{l}/ matches? ] unit-test
[ f ] [ "a" R/ \P{l}/ matches? ] unit-test
[ f ] [ "a" R/ \P{ll}/ matches? ] unit-test
[ t ] [ " " R/ \P{LL}/ matches? ] unit-test
[ f ] [ "a" R/ \P{sCriPt = latin}/ matches? ] unit-test
[ t ] [ " " R/ \P{SCRIPT = laTIn}/ matches? ] unit-test
! Logical operators
[ t ] [ "a" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ t ] [ "π" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}\p{lower}]/ matches? ] unit-test
[ t ] [ "a" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ t ] [ "π" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}||\p{lower}]/ matches? ] unit-test
[ t ] [ "a" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "π" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "A" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}&&\p{lower}]/ matches? ] unit-test
[ f ] [ "a" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ t ] [ "π" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}~~\p{lower}]/ matches? ] unit-test
[ f ] [ "a" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
[ f ] [ "π" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
[ t ] [ "A" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test
[ f ] [ "3" R/ [\p{script=latin}--\p{lower}]/ matches? ] unit-test

View File

@ -204,17 +204,17 @@ PRIVATE>
PRIVATE>
: R! CHAR: ! parsing-regexp ; parsing
: R" CHAR: " parsing-regexp ; parsing
: R# CHAR: # parsing-regexp ; parsing
: R' CHAR: ' parsing-regexp ; parsing
: R( CHAR: ) parsing-regexp ; parsing
: R/ CHAR: / parsing-regexp ; parsing
: R@ CHAR: @ parsing-regexp ; parsing
: R[ CHAR: ] parsing-regexp ; parsing
: R` CHAR: ` parsing-regexp ; parsing
: R{ CHAR: } parsing-regexp ; parsing
: R| CHAR: | parsing-regexp ; parsing
SYNTAX: R! CHAR: ! parsing-regexp ;
SYNTAX: R" CHAR: " parsing-regexp ;
SYNTAX: R# CHAR: # parsing-regexp ;
SYNTAX: R' CHAR: ' parsing-regexp ;
SYNTAX: R( CHAR: ) parsing-regexp ;
SYNTAX: R/ CHAR: / parsing-regexp ;
SYNTAX: R@ CHAR: @ parsing-regexp ;
SYNTAX: R[ CHAR: ] parsing-regexp ;
SYNTAX: R` CHAR: ` parsing-regexp ;
SYNTAX: R{ CHAR: } parsing-regexp ;
SYNTAX: R| CHAR: | parsing-regexp ;
USING: vocabs vocabs.loader ;

View File

@ -74,4 +74,4 @@ PRIVATE>
: roman/mod ( str1 str2 -- str3 str4 )
[ /mod ] binary-roman-op [ >roman ] dip ;
: ROMAN: scan roman> parsed ; parsing
SYNTAX: ROMAN: scan roman> parsed ;

View File

@ -93,7 +93,6 @@ M: object declarations. drop ;
M: word declarations.
{
POSTPONE: parsing
POSTPONE: delimiter
POSTPONE: inline
POSTPONE: recursive

View File

@ -19,8 +19,8 @@ MACRO: shuffle-effect ( effect -- )
[ [ at \ swap \ nth [ ] 3sequence ] curry map , \ cleave , ] 2bi
] [ ] make ;
: shuffle(
")" parse-effect parsed \ shuffle-effect parsed ; parsing
SYNTAX: shuffle(
")" parse-effect parsed \ shuffle-effect parsed ;
: 2swap ( x y z t -- z t x y ) 2 2 mnswap ; inline

View File

@ -1,8 +1,24 @@
USING: help.syntax help.markup strings ;
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup strings biassocs arrays ;
IN: simple-flat-file
ABOUT: "simple-flat-file"
ARTICLE: "simple-flat-file" "Parsing simple flat files"
"The " { $vocab-link "simple-flat-file" } " vocabulary provides words for loading and parsing simple flat files in a particular format which is common for encoding tasks."
{ $subsection flat-file>biassoc } ;
"The " { $vocab-link "simple-flat-file" } " vocabulary provides words for loading and parsing simple flat files in a particular format which is common for encoding and Unicode tasks."
{ $subsection flat-file>biassoc }
{ $subsection load-interval-file }
{ $subsection data } ;
HELP: load-interval-file
{ $values { "filename" string } { "table" "an interval map" } }
{ $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ;
HELP: data
{ $values { "filename" string } { "data" array } }
{ $description "This loads a file that's delineated by semicolons and lines, returning an array of lines, where each line is an array split by the semicolons, with whitespace trimmed off." } ;
HELP: flat-file>biassoc
{ $values { "filename" string } { "biassoc" biassoc } }
{ $description "This loads a flat file, in the form that many encoding resource files are in, with two columns of numeric data in hex, and returns a biassoc associating them." } ;

View File

@ -1,7 +1,7 @@
! Copyright (C) 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: sequences splitting kernel math.parser io.files io.encodings.utf8
biassocs ascii ;
biassocs ascii namespaces arrays make assocs interval-maps sets ;
IN: simple-flat-file
: drop-comments ( seq -- newseq )
@ -30,3 +30,25 @@ IN: simple-flat-file
: data ( filename -- data )
utf8 file-lines drop-comments [ split-; ] map ;
SYMBOL: interned
: range, ( value key -- )
swap interned get
[ = ] with find nip 2array , ;
: expand-ranges ( assoc -- interval-map )
[
[
swap CHAR: . over member? [
".." split1 [ hex> ] bi@ 2array
] [ hex> ] if range,
] assoc-each
] { } make <interval-map> ;
: process-interval-file ( ranges -- table )
dup values prune interned
[ expand-ranges ] with-variable ;
: load-interval-file ( filename -- table )
data process-interval-file ;

View File

@ -70,7 +70,7 @@ M: A >pprint-sequence ;
M: A pprint* pprint-object ;
: A{ \ } [ >A ] parse-literal ; parsing
SYNTAX: A{ \ } [ >A ] parse-literal ;
INSTANCE: A sequence

View File

@ -39,7 +39,7 @@ M: V >pprint-sequence ;
M: V pprint* pprint-object ;
: V{ \ } [ >V ] parse-literal ; parsing
SYNTAX: V{ \ } [ >V ] parse-literal ;
INSTANCE: V growable

View File

@ -65,4 +65,9 @@ DEFER: curry-folding-test ( quot -- )
{ 3 0 } [ [ 1 2 3 ] curry-folding-test ] must-infer-as
{ 3 0 } [ 1 [ 2 3 ] curry curry-folding-test ] must-infer-as
{ 3 0 } [ [ 1 2 ] 3 [ ] curry compose curry-folding-test ] must-infer-as
{ 3 0 } [ [ 1 2 ] 3 [ ] curry compose curry-folding-test ] must-infer-as
: member?-test ( a -- ? ) { 1 2 3 10 7 58 } member? ;
[ f ] [ 1.0 member?-test ] unit-test
[ t ] [ \ member?-test def>> first [ member?-test ] all? ] unit-test

View File

@ -1,12 +1,12 @@
! Copyright (C) 2007, 2009 Slava Pestov, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: fry accessors arrays kernel kernel.private combinators.private
words sequences generic math namespaces make quotations assocs
combinators classes.tuple classes.tuple.private effects summary
hashtables classes generic sets definitions generic.standard
slots.private continuations locals generalizations
stack-checker.backend stack-checker.state stack-checker.visitor
stack-checker.errors stack-checker.values
words sequences generic math math.order namespaces make quotations assocs
combinators combinators.short-circuit classes.tuple
classes.tuple.private effects summary hashtables classes generic sets
definitions generic.standard slots.private continuations locals
generalizations stack-checker.backend stack-checker.state
stack-checker.visitor stack-checker.errors stack-checker.values
stack-checker.recursive-state ;
IN: stack-checker.transforms
@ -107,36 +107,28 @@ IN: stack-checker.transforms
] 1 define-transform
! Membership testing
CONSTANT: bit-member-n 256
CONSTANT: bit-member-max 256
: bit-member? ( seq -- ? )
#! Can we use a fast byte array test here?
{
{ [ dup length 8 < ] [ f ] }
{ [ dup [ integer? not ] any? ] [ f ] }
{ [ dup [ 0 < ] any? ] [ f ] }
{ [ dup [ bit-member-n >= ] any? ] [ f ] }
[ t ]
} cond nip ;
[ length 4 > ]
[ [ integer? ] all? ]
[ [ 0 bit-member-max between? ] any? ]
} 1&& ;
: bit-member-seq ( seq -- flags )
bit-member-n swap [ member? 1 0 ? ] curry B{ } map-as ;
: exact-float? ( f -- ? )
dup float? [ dup >integer >float = ] [ drop f ] if ; inline
[ supremum 1+ ] keep '[ _ member? 1 0 ? ] B{ } map-as ;
: bit-member-quot ( seq -- newquot )
[
bit-member-seq ,
[
{
{ [ over fixnum? ] [ ?nth 1 eq? ] }
{ [ over bignum? ] [ ?nth 1 eq? ] }
{ [ over exact-float? ] [ ?nth 1 eq? ] }
[ 2drop f ]
} cond
] %
] [ ] make ;
bit-member-seq
'[
_ {
{ [ over fixnum? ] [ ?nth 1 eq? ] }
{ [ over bignum? ] [ ?nth 1 eq? ] }
[ 2drop f ]
} cond
] ;
: member-quot ( seq -- newquot )
dup bit-member? [

View File

@ -32,7 +32,7 @@ PRIVATE>
: >suffix-array ( seq -- array )
[ suffixes ] map concat natural-sort ;
: SA{ \ } [ >suffix-array ] parse-literal ; parsing
SYNTAX: SA{ \ } [ >suffix-array ] parse-literal ;
: query ( begin suffix-array -- matches )
2dup find-index dup

2
basis/tools/disassembler/disassembler.factor Normal file → Executable file
View File

@ -18,7 +18,7 @@ M: word disassemble word-xt 2array disassemble ;
M: method-spec disassemble first2 method disassemble ;
cpu x86? os unix? and
cpu x86?
"tools.disassembler.udis"
"tools.disassembler.gdb" ?
require

4
basis/tools/disassembler/udis/udis.factor Normal file → Executable file
View File

@ -30,9 +30,9 @@ CONSTANT: UD_VENDOR_AMD 0
CONSTANT: UD_VENDOR_INTEL 1
FUNCTION: void ud_init ( ud* u ) ;
FUNCTION: void ud_set_mode ( ud* u, uint8_t mode ) ;
FUNCTION: void ud_set_mode ( ud* u, uchar mode ) ;
FUNCTION: void ud_set_pc ( ud* u, ulonglong pc ) ;
FUNCTION: void ud_set_input_buffer ( ud* u, uint8_t* offset, size_t size ) ;
FUNCTION: void ud_set_input_buffer ( ud* u, uchar* offset, size_t size ) ;
FUNCTION: void ud_set_vendor ( ud* u, uint vendor ) ;
FUNCTION: void ud_set_syntax ( ud* u, void* syntax ) ;
FUNCTION: void ud_input_skip ( ud* u, size_t size ) ;

View File

@ -40,10 +40,9 @@ M: bad-tr summary
PRIVATE>
: TR:
SYNTAX: TR:
scan parse-definition
unclip-last [ unclip-last ] dip compute-tr
[ check-tr ]
[ [ create-tr ] dip define-tr ]
[ [ "-fast" append create-tr ] dip define-fast-tr ] 2tri ;
parsing

View File

@ -4,8 +4,9 @@ USING: combinators.short-circuit unicode.categories kernel math
combinators splitting sequences math.parser io.files io assocs
arrays namespaces make math.ranges unicode.normalize
unicode.normalize.private values io.encodings.ascii
unicode.syntax unicode.data compiler.units fry
alien.syntax sets accessors interval-maps memoize locals words ;
unicode.data compiler.units fry unicode.categories.syntax
alien.syntax sets accessors interval-maps memoize locals words
simple-flat-file ;
IN: unicode.breaks
<PRIVATE
@ -31,9 +32,9 @@ CATEGORY: grapheme-control Zl Zp Cc Cf ;
[ drop Control ]
} case ;
CATEGORY: (extend) Me Mn ;
: extend? ( ch -- ? )
{ [ (extend)? ] [ "Other_Grapheme_Extend" property? ] } 1|| ;
CATEGORY: extend
Me Mn |
"Other_Grapheme_Extend" property? ;
: loe? ( ch -- ? )
"Logical_Order_Exception" property? ;
@ -127,7 +128,7 @@ to: grapheme-table
VALUE: word-break-table
"vocab:unicode/data/WordBreakProperty.txt" load-key-value
"vocab:unicode/data/WordBreakProperty.txt" load-interval-file
to: word-break-table
C-ENUM: wOther wCR wLF wNewline wExtend wFormat wKatakana wALetter wMidLetter

View File

@ -1,8 +1,8 @@
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.data sequences namespaces
sbufs make unicode.syntax unicode.normalize math hints
unicode.categories combinators unicode.syntax assocs combinators.short-circuit
sbufs make unicode.normalize math hints
unicode.categories combinators assocs combinators.short-circuit
strings splitting kernel accessors unicode.breaks fry locals ;
QUALIFIED: ascii
IN: unicode.case

View File

@ -12,6 +12,9 @@ HELP: Letter
HELP: alpha
{ $class-description "The class of alphanumeric characters." } ;
HELP: math
{ $class-description "The class of Unicode math characters." } ;
HELP: blank
{ $class-description "The class of whitespace characters." } ;
@ -54,6 +57,8 @@ ARTICLE: "unicode.categories" "Character classes"
{ $subsection uncased }
{ $subsection uncased? }
{ $subsection character }
{ $subsection character? } ;
{ $subsection character? }
{ $subsection math }
{ $subsection math? } ;
ABOUT: "unicode.categories"

View File

@ -1,15 +1,16 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.syntax ;
USING: unicode.categories.syntax sequences unicode.data ;
IN: unicode.categories
CATEGORY: blank Zs Zl Zp \r\n ;
CATEGORY: letter Ll ;
CATEGORY: LETTER Lu ;
CATEGORY: Letter Lu Ll Lt Lm Lo ;
CATEGORY: blank Zs Zl Zp | "\r\n" member? ;
CATEGORY: letter Ll | "Other_Lowercase" property? ;
CATEGORY: LETTER Lu | "Other_Uppercase" property? ;
CATEGORY: Letter Lu Ll Lt Lm Lo Nl ;
CATEGORY: digit Nd Nl No ;
CATEGORY-NOT: printable Cc Cf Cs Co Cn ;
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No ;
CATEGORY: alpha Lu Ll Lt Lm Lo Nd Nl No | "Other_Alphabetic" property? ;
CATEGORY: control Cc ;
CATEGORY-NOT: uncased Lu Ll Lt Lm Mn Me ;
CATEGORY-NOT: character Cn ;
CATEGORY: math Sm | "Other_Math" property? ;

View File

@ -0,0 +1,19 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: help.syntax help.markup ;
IN: unicode.categories.syntax
ABOUT: "unicode.categories.syntax"
ARTICLE: "unicode.categories.syntax" "Unicode category syntax"
"There is special syntax sugar for making predicate classes which are unions of Unicode general categories, plus some other code."
{ $subsection POSTPONE: CATEGORY: }
{ $subsection POSTPONE: CATEGORY-NOT: } ;
HELP: CATEGORY:
{ $syntax "CATEGORY: foo Nl Pd Lu | \"Diacritic\" property? ;" }
{ $description "This defines a predicate class which is a subset of code points. In this example, " { $snippet "foo" } " is the class of characters which are in the general category Nl or Pd or Lu, or which have the Diacritic property." } ;
HELP: CATEGORY-NOT:
{ $syntax "CATEGORY-NOT: foo Nl Pd Lu | \"Diacritic\" property? ;" }
{ $description "This defines a predicate class which is a subset of code points, the complement of what " { $link POSTPONE: CATEGORY: } " would define. In this example, " { $snippet "foo" } " is the class of characters which are neither in the general category Nl or Pd or Lu, nor have the Diacritic property." } ;

View File

@ -0,0 +1,3 @@
! Copyright (C) 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.

View File

@ -0,0 +1,34 @@
! Copyright (C) 2008, 2009 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.data kernel math sequences parser unicode.data.private
bit-arrays namespaces sequences.private arrays classes.parser
assocs classes.predicate sets fry splitting accessors ;
IN: unicode.categories.syntax
! For use in CATEGORY:
SYMBOLS: Cn Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co | ;
<PRIVATE
: [category] ( categories code -- quot )
'[ dup category# _ member? [ drop t ] _ if ] ;
: integer-predicate-class ( word predicate -- )
integer swap define-predicate-class ;
: define-category ( word categories code -- )
[category] integer-predicate-class ;
: define-not-category ( word categories code -- )
[category] [ not ] compose integer-predicate-class ;
: parse-category ( -- word tokens quot )
CREATE-CLASS \ ; parse-until { | } split1
[ [ name>> categories-map at ] map ]
[ [ [ ] like ] [ [ drop f ] ] if* ] bi* ;
PRIVATE>
SYNTAX: CATEGORY: parse-category define-category ;
SYNTAX: CATEGORY-NOT: parse-category define-not-category ;

View File

@ -4,7 +4,7 @@ USING: combinators.short-circuit sequences io.files
io.encodings.ascii kernel values splitting accessors math.parser
ascii io assocs strings math namespaces make sorting combinators
math.order arrays unicode.normalize unicode.data locals
unicode.syntax macros sequences.deep words unicode.breaks
macros sequences.deep words unicode.breaks
quotations combinators.short-circuit simple-flat-file ;
IN: unicode.collation

View File

@ -6,7 +6,7 @@ IN: unicode.data
ABOUT: "unicode.data"
ARTICLE: "unicode.data" "Unicode data tables"
"The " { $vocab-link "unicode.data" "unicode.data" } " vocabulary contains core Unicode data tables and code for parsing this from files."
"The " { $vocab-link "unicode.data" "unicode.data" } " vocabulary contains core Unicode data tables and code for parsing this from files. The following words access these data tables."
{ $subsection canonical-entry }
{ $subsection combine-chars }
{ $subsection combining-class }
@ -14,7 +14,11 @@ ARTICLE: "unicode.data" "Unicode data tables"
{ $subsection name>char }
{ $subsection char>name }
{ $subsection property? }
{ $subsection load-key-value } ;
{ $subsection category }
{ $subsection ch>upper }
{ $subsection ch>lower }
{ $subsection ch>title }
{ $subsection special-case } ;
HELP: canonical-entry
{ $values { "char" "a code point" } { "seq" string } }
@ -48,6 +52,22 @@ HELP: property?
{ $values { "char" "a code point" } { "property" string } { "?" "a boolean" } }
{ $description "Tests whether the code point is listed under the given property in PropList.txt in the Unicode Character Database." } ;
HELP: load-key-value
{ $values { "filename" string } { "table" "an interval map" } }
{ $description "This loads a file that looks like Script.txt in the Unicode Character Database and converts it into an efficient interval map, where the keys are characters and the values are strings for the properties." } ;
HELP: category
{ $values { "char" "a code point" } { "category" string } }
{ $description "Returns the general category of a code point, in the form of a string. This will always be a string within the ASCII range of length two. If the code point is unassigned, then it returns " { $snippet "Cn" } "." } ;
HELP: ch>upper
{ $values { "ch" "a code point" } { "upper" "a code point" } }
{ $description "Returns the simple upper-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
HELP: ch>lower
{ $values { "ch" "a code point" } { "lower" "a code point" } }
{ $description "Returns the simple lower-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
HELP: ch>title
{ $values { "ch" "a code point" } { "title" "a code point" } }
{ $description "Returns the simple title-cased version of the code point, if it exists. This does not handle context-sensitive or locale-dependent properties of linguistically accurate case conversion, and does not correctly handle characters which become multiple characters on conversion to this case." } ;
HELP: special-case
{ $values { "ch" "a code point" } { "casing-tuple" { "a tuple, or " { $link f } } } }
{ $description "If a code point has special casing behavior, returns a tuple which represents that information." } ;

View File

@ -58,7 +58,7 @@ CONSTANT: num-chars HEX: 2FA1E
PRIVATE>
: category# ( char -- category )
: category# ( char -- n )
! There are a few characters that should be Cn
! that this gives Cf or Mn
! Cf = 26; Mn = 5; Cn = 29
@ -219,27 +219,3 @@ load-properties to: properties
[ name>char [ "Invalid character" throw ] unless* ]
name>char-hook set-global
SYMBOL: interned
: range, ( value key -- )
swap interned get
[ = ] with find nip 2array , ;
: expand-ranges ( assoc -- interval-map )
[
[
swap CHAR: . over member? [
".." split1 [ hex> ] bi@ 2array
] [ hex> ] if range,
] assoc-each
] { } make <interval-map> ;
: process-key-value ( ranges -- table )
dup values prune interned
[ expand-ranges ] with-variable ;
PRIVATE>
: load-key-value ( filename -- table )
data process-key-value ;

View File

@ -2,7 +2,7 @@
! See http://factorcode.org/license.txt for BSD license.
USING: ascii sequences namespaces make unicode.data kernel math arrays
locals sorting.insertion accessors assocs math.order combinators
unicode.syntax strings sbufs hints combinators.short-circuit vectors ;
strings sbufs hints combinators.short-circuit vectors ;
IN: unicode.normalize
<PRIVATE

View File

@ -1,17 +1,13 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors values kernel sequences assocs io.files
io.encodings ascii math.ranges io splitting math.parser
namespaces make byte-arrays locals math sets io.encodings.ascii
words words.symbol compiler.units arrays interval-maps
unicode.data ;
USING: values interval-maps simple-flat-file ;
IN: unicode.script
<PRIVATE
VALUE: script-table
"vocab:unicode/script/Scripts.txt" load-key-value
"vocab:unicode/script/Scripts.txt" load-interval-file
to: script-table
PRIVATE>

View File

@ -1,38 +0,0 @@
! Copyright (C) 2008 Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: unicode.data kernel math sequences parser lexer
bit-arrays namespaces make sequences.private arrays quotations
assocs classes.predicate math.order strings.parser ;
IN: unicode.syntax
<PRIVATE
: >category-array ( categories -- bitarray )
categories [ swap member? ] with map >bit-array ;
: as-string ( strings -- bit-array )
concat unescape-string ;
: [category] ( categories -- quot )
[
[ [ categories member? not ] filter as-string ] keep
[ categories member? ] filter >category-array
[ dup category# ] % , [ nth-unsafe [ drop t ] ] %
\ member? 2array >quotation ,
\ if ,
] [ ] make ;
: define-category ( word categories -- )
[category] integer swap define-predicate-class ;
PRIVATE>
: CATEGORY:
CREATE ";" parse-tokens define-category ; parsing
: seq-minus ( seq1 seq2 -- diff )
[ member? not ] curry filter ;
: CATEGORY-NOT:
CREATE ";" parse-tokens
categories swap seq-minus define-category ; parsing

View File

@ -15,7 +15,7 @@ $nl
{ $vocab-subsection "Word and grapheme breaks" "unicode.breaks" }
{ $vocab-subsection "Unicode normalization" "unicode.normalize" }
"The following are mostly for internal use:"
{ $vocab-subsection "Unicode syntax" "unicode.syntax" }
{ $vocab-subsection "Unicode category syntax" "unicode.categories.syntax" }
{ $vocab-subsection "Unicode data tables" "unicode.data" }
{ $see-also "ascii" "io.encodings" } ;

View File

@ -179,7 +179,7 @@ PRIVATE>
dup protocol>> '[ _ protocol-port or ] change-port ;
! Literal syntax
: URL" lexer get skip-blank parse-string >url parsed ; parsing
SYNTAX: URL" lexer get skip-blank parse-string >url parsed ;
USING: vocabs vocabs.loader ;

View File

@ -30,11 +30,11 @@ PREDICATE: value-word < word
[ second \ obj>> = ]
} 1&& ;
: VALUE:
SYNTAX: VALUE:
CREATE-WORD
dup t "no-def-strip" set-word-prop
T{ value-holder } clone [ obj>> ] curry
(( -- value )) define-declared ; parsing
(( -- value )) define-declared ;
M: value-word definer drop \ VALUE: f ;
@ -43,9 +43,9 @@ M: value-word definition drop f ;
: set-value ( value word -- )
def>> first (>>obj) ;
: to:
SYNTAX: to:
scan-word literalize parsed
\ set-value parsed ; parsing
\ set-value parsed ;
: get-value ( word -- value )
def>> first obj>> ;

View File

@ -50,7 +50,7 @@ M: vlist like
INSTANCE: vlist immutable-sequence
: VL{ \ } [ >vlist ] parse-literal ; parsing
SYNTAX: VL{ \ } [ >vlist ] parse-literal ;
M: vlist pprint-delims drop \ VL{ \ } ;
M: vlist >pprint-sequence ;
@ -87,7 +87,7 @@ M: valist assoc-like
INSTANCE: valist assoc
: VA{ \ } [ >valist ] parse-literal ; parsing
SYNTAX: VA{ \ } [ >valist ] parse-literal ;
M: valist pprint-delims drop \ VA{ \ } ;
M: valist >pprint-sequence >alist ;

View File

@ -90,14 +90,13 @@ unless
PRIVATE>
: COM-INTERFACE:
SYNTAX: COM-INTERFACE:
scan
scan find-com-interface-definition
scan string>guid
parse-com-functions
<com-interface-definition>
dup save-com-interface-definition
define-words-for-com-interface
; parsing
define-words-for-com-interface ;
: GUID: scan string>guid parsed ; parsing
SYNTAX: GUID: scan string>guid parsed ;

View File

@ -1,19 +1,26 @@
! Copyright (C) 2005, 2009 Daniel Ehrenberg
! See http://factorcode.org/license.txt for BSD license.
USING: kernel sequences unicode.syntax math math.order combinators
hints ;
USING: kernel sequences unicode.categories.syntax math math.order
combinators hints combinators.short-circuit ;
IN: xml.char-classes
CATEGORY: 1.0name-start* Ll Lu Lo Lt Nl \u000559\u0006E5\u0006E6_: ;
: 1.0name-start? ( char -- ? )
dup 1.0name-start*? [ drop t ]
[ HEX: 2BB HEX: 2C1 between? ] if ;
CATEGORY: 1.0name-start
Ll Lu Lo Lt Nl | {
[ HEX: 2BB HEX: 2C1 between? ]
[ "\u000559\u0006E5\u0006E6_:" member? ]
} 1|| ;
CATEGORY: 1.0name-char Ll Lu Lo Lt Nl Mc Me Mn Lm Nd _-.\u000387: ;
CATEGORY: 1.0name-char
Ll Lu Lo Lt Nl Mc Me Mn Lm Nd |
"_-.\u000387:" member? ;
CATEGORY: 1.1name-start Ll Lu Lo Lm Ln Nl _: ;
CATEGORY: 1.1name-start
Ll Lu Lo Lm Nl |
"_:" member? ;
CATEGORY: 1.1name-char Ll Lu Lo Lm Ln Nl Mc Mn Nd Pc Cf _-.\u0000b7: ;
CATEGORY: 1.1name-char
Ll Lu Lo Lm Nl Mc Mn Nd Pc Cf |
"_-.\u0000b7:" member? ;
: name-start? ( 1.0? char -- ? )
swap [ 1.0name-start? ] [ 1.1name-start? ] if ;

View File

@ -26,17 +26,17 @@ M: no-tag summary
PRIVATE>
: TAGS:
SYNTAX: TAGS:
CREATE
[ H{ } clone "xtable" set-word-prop ]
[ define-tags ] bi ; parsing
[ define-tags ] bi ;
: TAG:
scan scan-word parse-definition define-tag ; parsing
SYNTAX: TAG:
scan scan-word parse-definition define-tag ;
: XML-NS:
SYNTAX: XML-NS:
CREATE-WORD (( string -- name )) over set-stack-effect
scan '[ f swap _ <name> ] define-memoized ; parsing
scan '[ f swap _ <name> ] define-memoized ;
<PRIVATE
@ -168,11 +168,11 @@ MACRO: interpolate-xml ( xml -- quot )
PRIVATE>
: <XML
"XML>" [ string>doc ] parse-def ; parsing
SYNTAX: <XML
"XML>" [ string>doc ] parse-def ;
: [XML
"XML]" [ string>chunk ] parse-def ; parsing
SYNTAX: [XML
"XML]" [ string>chunk ] parse-def ;
<PRIVATE

Some files were not shown because too many files have changed in this diff Show More