Updating extra/xmode
parent
ca0df2cb46
commit
9a0d318b91
|
@ -1,5 +1,41 @@
|
||||||
This is a Factor port of jEdit's syntax highlighting engine.
|
This is a Factor port of the jEdit 4.3 syntax highlighting engine
|
||||||
|
(http://www.jedit.org).
|
||||||
|
|
||||||
It implements a relatively basic, rule-driven recursive parser.
|
jEdit 1.2, released in late 1998, was the first release to support
|
||||||
The parser is incremental, with one line granularity. This is
|
syntax highlighting. It featured a small number of hand-coded
|
||||||
still a work in progress.
|
"token markers" -- simple incremental parers -- all based on the
|
||||||
|
original JavaTokenMarker contributed by Tal Davidson.
|
||||||
|
|
||||||
|
Around the time of jEdit 1.5 in 1999, Mike Dillon began developing a
|
||||||
|
jEdit plugin named "XMode". This plugin implemented a generic,
|
||||||
|
rule-driven token marker which read mode descriptions from XML files.
|
||||||
|
XMode eventually matured to the point where it could replace the
|
||||||
|
formerly hand-coded token markers.
|
||||||
|
|
||||||
|
With the release of jEdit 2.4, I merged XMode into the core and
|
||||||
|
eliminated the old hand-coded token markers.
|
||||||
|
|
||||||
|
XMode suffers from a somewhat archaic design, and was written at a time
|
||||||
|
when Java VMs with JIT compilers were relatively uncommon, object
|
||||||
|
allocation was expensive, and heap space tight. As a result the parser
|
||||||
|
design is less general than it could be.
|
||||||
|
|
||||||
|
Furthermore, the parser has a few bugs which some mode files have come
|
||||||
|
to depend on:
|
||||||
|
|
||||||
|
- If a RULES tag does not define any keywords or rules, then its
|
||||||
|
NO_WORD_SEP attribute is ignored.
|
||||||
|
|
||||||
|
The Factor implementation duplicates this behavior.
|
||||||
|
|
||||||
|
- if a RULES tag does not have a NO_WORD_SEP attribute, then
|
||||||
|
it inherits the value of the NO_WORD_SEP attribute from the previous
|
||||||
|
RULES tag.
|
||||||
|
|
||||||
|
The Factor implementation does not duplicate this behavior.
|
||||||
|
|
||||||
|
This is still a work in progress. If you find any behavioral differences
|
||||||
|
between the Factor implementation and the original jEdit code, please
|
||||||
|
report them as bugs. Also, if you wish to contribute a new or improved
|
||||||
|
mode file, please contact the jEdit project. Updated mode files in jEdit
|
||||||
|
will be periodically imported into the Factor source tree.
|
||||||
|
|
|
@ -35,8 +35,7 @@ IN: xmode.loader
|
||||||
dup children>string swap position-attrs <matcher> ;
|
dup children>string swap position-attrs <matcher> ;
|
||||||
|
|
||||||
: parse-regexp-matcher ( tag -- matcher )
|
: parse-regexp-matcher ( tag -- matcher )
|
||||||
! XXX
|
dup children>string <regexp> swap position-attrs <matcher> ;
|
||||||
dup children>string swap position-attrs <matcher> ;
|
|
||||||
|
|
||||||
! SPAN's children
|
! SPAN's children
|
||||||
<TAGS: parse-begin/end-tag
|
<TAGS: parse-begin/end-tag
|
||||||
|
@ -146,7 +145,7 @@ TAGS>
|
||||||
{ "SET" string>rule-set-name set-rule-set-name }
|
{ "SET" string>rule-set-name set-rule-set-name }
|
||||||
{ "IGNORE_CASE" string>boolean set-rule-set-ignore-case? }
|
{ "IGNORE_CASE" string>boolean set-rule-set-ignore-case? }
|
||||||
{ "HIGHLIGHT_DIGITS" string>boolean set-rule-set-highlight-digits? }
|
{ "HIGHLIGHT_DIGITS" string>boolean set-rule-set-highlight-digits? }
|
||||||
{ "DIGIT_RE" f set-rule-set-digit-re } ! XXX
|
{ "DIGIT_RE" <regexp> set-rule-set-digit-re } ! XXX
|
||||||
{ "ESCAPE" f add-escape-rule }
|
{ "ESCAPE" f add-escape-rule }
|
||||||
{ "DEFAULT" string>token set-rule-set-default }
|
{ "DEFAULT" string>token set-rule-set-default }
|
||||||
{ "NO_WORD_SEP" f set-rule-set-no-word-sep }
|
{ "NO_WORD_SEP" f set-rule-set-no-word-sep }
|
||||||
|
|
|
@ -2,6 +2,40 @@ USING: xmode.tokens xmode.catalog
|
||||||
xmode.marker tools.test kernel ;
|
xmode.marker tools.test kernel ;
|
||||||
IN: temporary
|
IN: temporary
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ token f "int" KEYWORD3 }
|
||||||
|
T{ token f " " f }
|
||||||
|
T{ token f "x" f }
|
||||||
|
}
|
||||||
|
] [ f "int x" "c" load-mode tokenize-line nip ] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ token f "\"" LITERAL1 }
|
||||||
|
T{ token f "hello\\\"" LITERAL1 }
|
||||||
|
T{ token f " " LITERAL1 }
|
||||||
|
T{ token f "world" LITERAL1 }
|
||||||
|
T{ token f "\"" LITERAL1 }
|
||||||
|
}
|
||||||
|
] [ f "\"hello\\\" world\"" "c" load-mode tokenize-line nip ] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ token f "\"" LITERAL1 }
|
||||||
|
T{ token f "hello\\\ world" LITERAL1 }
|
||||||
|
T{ token f "\"" LITERAL1 }
|
||||||
|
}
|
||||||
|
] [ f "\"hello\\\ world\"" "c" load-mode tokenize-line nip ] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ token f "int" KEYWORD3 }
|
||||||
|
T{ token f " " f }
|
||||||
|
T{ token f "x" f }
|
||||||
|
}
|
||||||
|
] [ f "int x" "java" load-mode tokenize-line nip ] unit-test
|
||||||
|
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
T{ token f "//" COMMENT2 }
|
T{ token f "//" COMMENT2 }
|
||||||
|
@ -66,3 +100,12 @@ IN: temporary
|
||||||
] [
|
] [
|
||||||
f "<!ELEMENT %hello-world; >" "xml" load-mode tokenize-line nip
|
f "<!ELEMENT %hello-world; >" "xml" load-mode tokenize-line nip
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
[
|
||||||
|
{
|
||||||
|
T{ token f "$" KEYWORD2 }
|
||||||
|
T{ token f "FOO" KEYWORD2 }
|
||||||
|
}
|
||||||
|
] [
|
||||||
|
f "$FOO" "shellscript" load-mode tokenize-line nip
|
||||||
|
] unit-test
|
||||||
|
|
|
@ -24,8 +24,18 @@ assocs combinators combinators.lib strings regexp splitting ;
|
||||||
: mark-number ( keyword -- id )
|
: mark-number ( keyword -- id )
|
||||||
keyword-number? DIGIT and ;
|
keyword-number? DIGIT and ;
|
||||||
|
|
||||||
|
: resolve-delegate ( name -- rules )
|
||||||
|
dup string? [
|
||||||
|
"::" split1 [ swap load-mode at ] [ rule-sets get at ] if*
|
||||||
|
] when ;
|
||||||
|
|
||||||
|
: rule-set-keyword-maps ( ruleset -- seq )
|
||||||
|
dup rule-set-imports
|
||||||
|
[ resolve-delegate rule-set-keyword-maps ] map concat
|
||||||
|
swap rule-set-keywords add ;
|
||||||
|
|
||||||
: mark-keyword ( keyword -- id )
|
: mark-keyword ( keyword -- id )
|
||||||
current-keywords at ;
|
current-rule-set rule-set-keyword-maps assoc-stack ;
|
||||||
|
|
||||||
: add-remaining-token ( -- )
|
: add-remaining-token ( -- )
|
||||||
current-rule-set rule-set-default prev-token, ;
|
current-rule-set rule-set-default prev-token, ;
|
||||||
|
@ -45,30 +55,6 @@ assocs combinators combinators.lib strings regexp splitting ;
|
||||||
: current-char ( -- char )
|
: current-char ( -- char )
|
||||||
position get line get nth ;
|
position get line get nth ;
|
||||||
|
|
||||||
GENERIC: perform-rule ( rule -- )
|
|
||||||
|
|
||||||
: ... ;
|
|
||||||
|
|
||||||
M: escape-rule perform-rule ( rule -- ) ... ;
|
|
||||||
|
|
||||||
: find-escape-rule ( -- rule )
|
|
||||||
context get dup
|
|
||||||
line-context-in-rule-set rule-set-escape-rule
|
|
||||||
[ ] [ line-context-parent find-escape-rule ] ?if ;
|
|
||||||
|
|
||||||
: check-escape-rule ( rule -- )
|
|
||||||
#! Unlike jEdit, we keep checking parents until we find
|
|
||||||
#! an escape rule.
|
|
||||||
dup rule-no-escape? [ drop ] [
|
|
||||||
drop
|
|
||||||
! find-escape-rule
|
|
||||||
! current-rule-set rule-set-escape-rule [
|
|
||||||
! find-escape-rule
|
|
||||||
! ] [
|
|
||||||
!
|
|
||||||
! ] if*
|
|
||||||
] if ;
|
|
||||||
|
|
||||||
GENERIC: match-position ( rule -- n )
|
GENERIC: match-position ( rule -- n )
|
||||||
|
|
||||||
M: mark-previous-rule match-position drop last-offset get ;
|
M: mark-previous-rule match-position drop last-offset get ;
|
||||||
|
@ -83,10 +69,10 @@ M: rule match-position drop position get ;
|
||||||
[ over matcher-at-word-start? over last-offset get = implies ]
|
[ over matcher-at-word-start? over last-offset get = implies ]
|
||||||
} && 2nip ;
|
} && 2nip ;
|
||||||
|
|
||||||
: matches-not-mark-following? ... ;
|
|
||||||
|
|
||||||
GENERIC: text-matches? ( position text -- match-count/f )
|
GENERIC: text-matches? ( position text -- match-count/f )
|
||||||
|
|
||||||
|
M: f text-matches? 2drop f ;
|
||||||
|
|
||||||
M: string text-matches?
|
M: string text-matches?
|
||||||
! XXX ignore case
|
! XXX ignore case
|
||||||
>r line get swap tail-slice r>
|
>r line get swap tail-slice r>
|
||||||
|
@ -103,7 +89,7 @@ M: string text-matches?
|
||||||
|
|
||||||
: rule-end-matches? ( rule -- match-count/f )
|
: rule-end-matches? ( rule -- match-count/f )
|
||||||
dup mark-following-rule? [
|
dup mark-following-rule? [
|
||||||
dup rule-end swap can-match-here? 0 and
|
dup rule-start swap can-match-here? 0 and
|
||||||
] [
|
] [
|
||||||
dup rule-end tuck swap can-match-here? [
|
dup rule-end tuck swap can-match-here? [
|
||||||
position get swap matcher-text
|
position get swap matcher-text
|
||||||
|
@ -114,10 +100,48 @@ M: string text-matches?
|
||||||
] if
|
] if
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
|
DEFER: get-rules
|
||||||
|
|
||||||
|
: get-imported-rules ( vector/f char ruleset -- vector/f )
|
||||||
|
rule-set-imports
|
||||||
|
[ resolve-delegate get-rules ?push-all ] curry* each ;
|
||||||
|
|
||||||
|
: get-always-rules ( vector/f ruleset -- vector/f )
|
||||||
|
f swap rule-set-rules at ?push-all ;
|
||||||
|
|
||||||
|
: get-char-rules ( vector/f char ruleset -- vector/f )
|
||||||
|
>r ch>upper r> rule-set-rules at ?push-all ;
|
||||||
|
|
||||||
|
: get-rules ( char ruleset -- seq )
|
||||||
|
f -rot
|
||||||
|
[ get-char-rules ] 2keep
|
||||||
|
[ get-always-rules ] keep
|
||||||
|
get-imported-rules ;
|
||||||
|
|
||||||
GENERIC: handle-rule-start ( match-count rule -- )
|
GENERIC: handle-rule-start ( match-count rule -- )
|
||||||
|
|
||||||
GENERIC: handle-rule-end ( match-count rule -- )
|
GENERIC: handle-rule-end ( match-count rule -- )
|
||||||
|
|
||||||
|
: find-escape-rule ( -- rule )
|
||||||
|
context get dup
|
||||||
|
line-context-in-rule-set rule-set-escape-rule [ ] [
|
||||||
|
line-context-parent line-context-in-rule-set
|
||||||
|
dup [ rule-set-escape-rule ] when
|
||||||
|
] ?if ;
|
||||||
|
|
||||||
|
: check-escape-rule ( rule -- ? )
|
||||||
|
rule-no-escape? [ f ] [
|
||||||
|
find-escape-rule dup [
|
||||||
|
dup rule-start-matches? dup [
|
||||||
|
swap handle-rule-start
|
||||||
|
delegate-end-escaped? [ not ] change
|
||||||
|
t
|
||||||
|
] [
|
||||||
|
2drop f
|
||||||
|
] if
|
||||||
|
] when
|
||||||
|
] if ;
|
||||||
|
|
||||||
: check-every-rule ( -- ? )
|
: check-every-rule ( -- ? )
|
||||||
current-char current-rule-set get-rules
|
current-char current-rule-set get-rules
|
||||||
[ rule-start-matches? ] map-find
|
[ rule-start-matches? ] map-find
|
||||||
|
@ -129,11 +153,6 @@ GENERIC: handle-rule-end ( match-count rule -- )
|
||||||
dup [ swap handle-rule-end ] [ 2drop ] if
|
dup [ swap handle-rule-end ] [ 2drop ] if
|
||||||
] when* ;
|
] when* ;
|
||||||
|
|
||||||
: handle-escape-rule ( rule -- )
|
|
||||||
?end-rule
|
|
||||||
;
|
|
||||||
! ... process escape ... ;
|
|
||||||
|
|
||||||
: rule-match-token* ( rule -- id )
|
: rule-match-token* ( rule -- id )
|
||||||
dup rule-match-token {
|
dup rule-match-token {
|
||||||
{ f [ dup rule-body-token ] }
|
{ f [ dup rule-body-token ] }
|
||||||
|
@ -141,10 +160,13 @@ GENERIC: handle-rule-end ( match-count rule -- )
|
||||||
[ ]
|
[ ]
|
||||||
} case nip ;
|
} case nip ;
|
||||||
|
|
||||||
: resolve-delegate ( name -- rules )
|
M: escape-rule handle-rule-start
|
||||||
dup string? [
|
drop
|
||||||
"::" split1 [ swap load-mode at ] [ rule-sets get at ] if*
|
?end-rule
|
||||||
] when ;
|
process-escape? get [
|
||||||
|
escaped? [ not ] change
|
||||||
|
position [ + ] change
|
||||||
|
] [ 2drop ] if ;
|
||||||
|
|
||||||
M: seq-rule handle-rule-start
|
M: seq-rule handle-rule-start
|
||||||
?end-rule
|
?end-rule
|
||||||
|
@ -174,6 +196,10 @@ M: mark-following-rule handle-rule-start
|
||||||
f context get set-line-context-end
|
f context get set-line-context-end
|
||||||
context get set-line-context-in-rule ;
|
context get set-line-context-in-rule ;
|
||||||
|
|
||||||
|
M: mark-following-rule handle-rule-end
|
||||||
|
nip rule-match-token* prev-token,
|
||||||
|
f context get set-line-context-in-rule ;
|
||||||
|
|
||||||
M: mark-previous-rule handle-rule-start
|
M: mark-previous-rule handle-rule-start
|
||||||
?end-rule
|
?end-rule
|
||||||
mark-token
|
mark-token
|
||||||
|
@ -183,7 +209,7 @@ M: mark-previous-rule handle-rule-start
|
||||||
: do-escaped
|
: do-escaped
|
||||||
escaped? get [
|
escaped? get [
|
||||||
escaped? off
|
escaped? off
|
||||||
...
|
! ...
|
||||||
] when ;
|
] when ;
|
||||||
|
|
||||||
: check-end-delegate ( -- ? )
|
: check-end-delegate ( -- ? )
|
||||||
|
@ -198,14 +224,14 @@ M: mark-previous-rule handle-rule-start
|
||||||
] keep context get line-context-parent line-context-in-rule rule-match-token* next-token,
|
] keep context get line-context-parent line-context-in-rule rule-match-token* next-token,
|
||||||
pop-context
|
pop-context
|
||||||
seen-whitespace-end? on t
|
seen-whitespace-end? on t
|
||||||
] [ 2drop f ] if
|
] [ drop check-escape-rule ] if
|
||||||
] [ f ] if*
|
] [ f ] if*
|
||||||
] [ f ] if* ;
|
] [ f ] if* ;
|
||||||
|
|
||||||
: handle-no-word-break ( -- )
|
: handle-no-word-break ( -- )
|
||||||
context get line-context-parent [
|
context get line-context-parent [
|
||||||
line-context-in-rule dup rule-no-word-break? [
|
line-context-in-rule dup rule-no-word-break? [
|
||||||
rule-match-token prev-token,
|
rule-match-token* prev-token,
|
||||||
pop-context
|
pop-context
|
||||||
] [ drop ] if
|
] [ drop ] if
|
||||||
] when* ;
|
] when* ;
|
||||||
|
@ -221,6 +247,10 @@ M: mark-previous-rule handle-rule-start
|
||||||
|
|
||||||
1 current-rule-set rule-set-default next-token, ;
|
1 current-rule-set rule-set-default next-token, ;
|
||||||
|
|
||||||
|
: rule-set-empty? ( ruleset -- ? )
|
||||||
|
dup rule-set-rules assoc-empty?
|
||||||
|
swap rule-set-keywords assoc-empty? and ;
|
||||||
|
|
||||||
: check-word-break ( -- ? )
|
: check-word-break ( -- ? )
|
||||||
current-char dup blank? [
|
current-char dup blank? [
|
||||||
drop
|
drop
|
||||||
|
@ -232,14 +262,17 @@ M: mark-previous-rule handle-rule-start
|
||||||
(check-word-break)
|
(check-word-break)
|
||||||
|
|
||||||
] [
|
] [
|
||||||
dup alpha? [
|
! Micro-optimization with incorrect semantics; we keep
|
||||||
|
! it here because jEdit mode files depend on it now...
|
||||||
|
current-rule-set rule-set-empty? [
|
||||||
drop
|
drop
|
||||||
] [
|
] [
|
||||||
dup current-rule-set dup short. rule-set-no-word-sep* dup . member? [
|
dup alpha? [
|
||||||
"A: " write write1 nl
|
drop
|
||||||
] [
|
] [
|
||||||
"B: " write write1 nl
|
current-rule-set rule-set-no-word-sep* member? [
|
||||||
(check-word-break)
|
(check-word-break)
|
||||||
|
] unless
|
||||||
] if
|
] if
|
||||||
] if
|
] if
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ SYMBOL: whitespace-end
|
||||||
SYMBOL: seen-whitespace-end?
|
SYMBOL: seen-whitespace-end?
|
||||||
|
|
||||||
SYMBOL: escaped?
|
SYMBOL: escaped?
|
||||||
|
SYMBOL: process-escape?
|
||||||
SYMBOL: delegate-end-escaped?
|
SYMBOL: delegate-end-escaped?
|
||||||
SYMBOL: terminated?
|
SYMBOL: terminated?
|
||||||
|
|
||||||
|
@ -61,5 +62,6 @@ SYMBOL: terminated?
|
||||||
0 position set
|
0 position set
|
||||||
0 last-offset set
|
0 last-offset set
|
||||||
0 whitespace-end set
|
0 whitespace-end set
|
||||||
|
process-escape? on
|
||||||
[ clone ] [ main-rule-set f <line-context> ] if*
|
[ clone ] [ main-rule-set f <line-context> ] if*
|
||||||
context set ;
|
context set ;
|
||||||
|
|
|
@ -45,23 +45,6 @@ MEMO: standard-rule-set ( id -- ruleset )
|
||||||
over [ >r V{ } like r> over push-all ] [ nip ] if
|
over [ >r V{ } like r> over push-all ] [ nip ] if
|
||||||
] when* ;
|
] when* ;
|
||||||
|
|
||||||
DEFER: get-rules
|
|
||||||
|
|
||||||
: get-imported-rules ( vector/f char ruleset -- vector/f )
|
|
||||||
rule-set-imports [ get-rules ?push-all ] curry* each ;
|
|
||||||
|
|
||||||
: get-always-rules ( vector/f ruleset -- vector/f )
|
|
||||||
f swap rule-set-rules at ?push-all ;
|
|
||||||
|
|
||||||
: get-char-rules ( vector/f char ruleset -- vector/f )
|
|
||||||
>r ch>upper r> rule-set-rules at ?push-all ;
|
|
||||||
|
|
||||||
: get-rules ( char ruleset -- seq )
|
|
||||||
f -rot
|
|
||||||
[ get-char-rules ] 2keep
|
|
||||||
[ get-always-rules ] keep
|
|
||||||
get-imported-rules ;
|
|
||||||
|
|
||||||
: rule-set-no-word-sep* ( ruleset -- str )
|
: rule-set-no-word-sep* ( ruleset -- str )
|
||||||
dup rule-set-keywords keyword-map-no-word-sep*
|
dup rule-set-keywords keyword-map-no-word-sep*
|
||||||
swap rule-set-no-word-sep "_" 3append ;
|
swap rule-set-no-word-sep "_" 3append ;
|
||||||
|
|
Loading…
Reference in New Issue