Updating extra/xmode

2007-12-02 05:25:18 -05:00 · 2007-12-02 05:25:18 -05:00 · 9a0d318b91
parent ca0df2cb46
commit 9a0d318b91
6 changed files with 165 additions and 69 deletions
--- a/extra/xmode/README.txt
+++ b/extra/xmode/README.txt
@ -1,5 +1,41 @@
-This is a Factor port of jEdit's syntax highlighting engine.
+This is a Factor port of the jEdit 4.3 syntax highlighting engine
 (http://www.jedit.org).
-It implements a relatively basic, rule-driven recursive parser.
+jEdit 1.2, released in late 1998, was the first release to support
-The parser is incremental, with one line granularity. This is
+syntax highlighting. It featured a small number of hand-coded
-still a work in progress.
+"token markers" -- simple incremental parers -- all based on the
 original JavaTokenMarker contributed by Tal Davidson.
 Around the time of jEdit 1.5 in 1999, Mike Dillon began developing a
 jEdit plugin named "XMode". This plugin implemented a generic,
 rule-driven token marker which read mode descriptions from XML files.
 XMode eventually matured to the point where it could replace the
 formerly hand-coded token markers.
 With the release of jEdit 2.4, I merged XMode into the core and
 eliminated the old hand-coded token markers.
 XMode suffers from a somewhat archaic design, and was written at a time
 when Java VMs with JIT compilers were relatively uncommon, object
 allocation was expensive, and heap space tight. As a result the parser
 design is less general than it could be.
 Furthermore, the parser has a few bugs which some mode files have come
 to depend on:
 - If a RULES tag does not define any keywords or rules, then its
  NO_WORD_SEP attribute is ignored.
  The Factor implementation duplicates this behavior.
 - if a RULES tag does not have a NO_WORD_SEP attribute, then
  it inherits the value of the NO_WORD_SEP attribute from the previous
  RULES tag.
  The Factor implementation does not duplicate this behavior.
 This is still a work in progress. If you find any behavioral differences
 between the Factor implementation and the original jEdit code, please
 report them as bugs. Also, if you wish to contribute a new or improved
 mode file, please contact the jEdit project. Updated mode files in jEdit
 will be periodically imported into the Factor source tree.
--- a/extra/xmode/loader/loader.factor
+++ b/extra/xmode/loader/loader.factor
@ -35,8 +35,7 @@ IN: xmode.loader
    dup children>string swap position-attrs <matcher> ;
 : parse-regexp-matcher ( tag -- matcher )
-    ! XXX
+    dup children>string <regexp> swap position-attrs <matcher> ;
    dup children>string swap position-attrs <matcher> ;
 ! SPAN's children
 <TAGS: parse-begin/end-tag
@ -146,7 +145,7 @@ TAGS>
        { "SET" string>rule-set-name set-rule-set-name }
        { "IGNORE_CASE" string>boolean set-rule-set-ignore-case? }
        { "HIGHLIGHT_DIGITS" string>boolean set-rule-set-highlight-digits? }
-        { "DIGIT_RE" f set-rule-set-digit-re } ! XXX
+        { "DIGIT_RE" <regexp> set-rule-set-digit-re } ! XXX
        { "ESCAPE" f add-escape-rule }
        { "DEFAULT" string>token set-rule-set-default }
        { "NO_WORD_SEP" f set-rule-set-no-word-sep }
--- a/extra/xmode/marker/marker-tests.factor
+++ b/extra/xmode/marker/marker-tests.factor
@ -2,6 +2,40 @@ USING: xmode.tokens xmode.catalog
 xmode.marker tools.test kernel ;
 IN: temporary
 [
    {
        T{ token f "int" KEYWORD3 }
        T{ token f " " f }
        T{ token f "x" f }
    }
 ] [ f "int x" "c" load-mode tokenize-line nip ] unit-test
 [
    {
        T{ token f "\"" LITERAL1 }
        T{ token f "hello\\\"" LITERAL1 }
        T{ token f " " LITERAL1 }
        T{ token f "world" LITERAL1 }
        T{ token f "\"" LITERAL1 }
    }
 ] [ f "\"hello\\\" world\"" "c" load-mode tokenize-line nip ] unit-test
 [
    {
        T{ token f "\"" LITERAL1 }
        T{ token f "hello\\\ world" LITERAL1 }
        T{ token f "\"" LITERAL1 }
    }
 ] [ f "\"hello\\\ world\"" "c" load-mode tokenize-line nip ] unit-test
 [
    {
        T{ token f "int" KEYWORD3 }
        T{ token f " " f }
        T{ token f "x" f }
    }
 ] [ f "int x" "java" load-mode tokenize-line nip ] unit-test
 [
    {
        T{ token f "//" COMMENT2 }
@ -66,3 +100,12 @@ IN: temporary
 ] [
     f "<!ELEMENT %hello-world; >" "xml" load-mode tokenize-line nip
 ] unit-test
 [
    {
        T{ token f "$" KEYWORD2 }
        T{ token f "FOO" KEYWORD2 }
    }
 ] [
    f "$FOO" "shellscript" load-mode tokenize-line nip
 ] unit-test
--- a/extra/xmode/marker/marker.factor
+++ b/extra/xmode/marker/marker.factor
@ -24,8 +24,18 @@ assocs combinators combinators.lib strings regexp splitting ;
 : mark-number ( keyword -- id )
    keyword-number? DIGIT and ;
 : resolve-delegate ( name -- rules )
    dup string? [
        "::" split1 [ swap load-mode at ] [ rule-sets get at ] if*
    ] when ;
 : rule-set-keyword-maps ( ruleset -- seq )
    dup rule-set-imports
    [ resolve-delegate rule-set-keyword-maps ] map concat
    swap rule-set-keywords add ;
 : mark-keyword ( keyword -- id )
-    current-keywords at ;
+    current-rule-set rule-set-keyword-maps assoc-stack ;
 : add-remaining-token ( -- )
    current-rule-set rule-set-default prev-token, ;
@ -45,30 +55,6 @@ assocs combinators combinators.lib strings regexp splitting ;
 : current-char ( -- char )
    position get line get nth ;
 GENERIC: perform-rule ( rule -- )
 : ... ;
 M: escape-rule perform-rule ( rule -- ) ... ;
 : find-escape-rule ( -- rule )
    context get dup
    line-context-in-rule-set rule-set-escape-rule
    [ ] [ line-context-parent find-escape-rule ] ?if ;
 : check-escape-rule ( rule -- )
    #! Unlike jEdit, we keep checking parents until we find
    #! an escape rule.
    dup rule-no-escape? [ drop ] [
        drop
        ! find-escape-rule
        ! current-rule-set rule-set-escape-rule [
        !     find-escape-rule
        ! ] [
        !     
        ! ] if*
    ] if ;
 GENERIC: match-position ( rule -- n )
 M: mark-previous-rule match-position drop last-offset get ;
@ -83,10 +69,10 @@ M: rule match-position drop position get ;
        [ over matcher-at-word-start?     over last-offset get =    implies ]
    } && 2nip ;
 : matches-not-mark-following? ... ;
 GENERIC: text-matches? ( position text -- match-count/f )
 M: f text-matches? 2drop f ;
 M: string text-matches?
    ! XXX ignore case
    >r line get swap tail-slice r>
@ -103,7 +89,7 @@ M: string text-matches?
 : rule-end-matches? ( rule -- match-count/f )
    dup mark-following-rule? [
-        dup rule-end swap can-match-here? 0 and
+        dup rule-start swap can-match-here? 0 and
    ] [
        dup rule-end tuck swap can-match-here? [
            position get swap matcher-text
@ -114,10 +100,48 @@ M: string text-matches?
        ] if
    ] if ;
 DEFER: get-rules
 : get-imported-rules ( vector/f char ruleset -- vector/f )
    rule-set-imports
    [ resolve-delegate get-rules ?push-all ] curry* each ;
 : get-always-rules ( vector/f ruleset -- vector/f )
    f swap rule-set-rules at ?push-all ;
 : get-char-rules ( vector/f char ruleset -- vector/f )
    >r ch>upper r> rule-set-rules at ?push-all ;
 : get-rules ( char ruleset -- seq )
    f -rot
    [ get-char-rules ] 2keep
    [ get-always-rules ] keep
    get-imported-rules ;
 GENERIC: handle-rule-start ( match-count rule -- )
 GENERIC: handle-rule-end ( match-count rule -- )
 : find-escape-rule ( -- rule )
    context get dup
    line-context-in-rule-set rule-set-escape-rule [ ] [
        line-context-parent line-context-in-rule-set
        dup [ rule-set-escape-rule ] when
    ] ?if ;
 : check-escape-rule ( rule -- ? )
    rule-no-escape? [ f ] [
        find-escape-rule dup [
            dup rule-start-matches? dup [
                swap handle-rule-start
                delegate-end-escaped? [ not ] change
                t
            ] [
                2drop f
            ] if
        ] when
    ] if ;
 : check-every-rule ( -- ? )
    current-char current-rule-set get-rules
    [ rule-start-matches? ] map-find
@ -129,11 +153,6 @@ GENERIC: handle-rule-end ( match-count rule -- )
        dup [ swap handle-rule-end ] [ 2drop ] if
    ] when* ;
 : handle-escape-rule ( rule -- )
    ?end-rule
    ;
 !        ... process escape ... ;
 : rule-match-token* ( rule -- id )
    dup rule-match-token {
        { f [ dup rule-body-token ] }
@ -141,10 +160,13 @@ GENERIC: handle-rule-end ( match-count rule -- )
        [ ]
    } case nip ;
-: resolve-delegate ( name -- rules )
+M: escape-rule handle-rule-start
-    dup string? [
+    drop
-        "::" split1 [ swap load-mode at ] [ rule-sets get at ] if*
+    ?end-rule
-    ] when ;
+    process-escape? get [
        escaped? [ not ] change
        position [ + ] change
    ] [ 2drop ] if ;
 M: seq-rule handle-rule-start
    ?end-rule
@ -174,6 +196,10 @@ M: mark-following-rule handle-rule-start
    f context get set-line-context-end
    context get set-line-context-in-rule ;
 M: mark-following-rule handle-rule-end
    nip rule-match-token* prev-token,
    f context get set-line-context-in-rule ;
 M: mark-previous-rule handle-rule-start
    ?end-rule
    mark-token
@ -183,7 +209,7 @@ M: mark-previous-rule handle-rule-start
 : do-escaped
    escaped? get [
        escaped? off
-        ...
+        ! ...
    ] when ;
 : check-end-delegate ( -- ? )
@ -198,14 +224,14 @@ M: mark-previous-rule handle-rule-start
                ] keep context get line-context-parent line-context-in-rule rule-match-token* next-token,
                pop-context
                seen-whitespace-end? on t
-            ] [ 2drop f ] if
+            ] [ drop check-escape-rule ] if
        ] [ f ] if*
    ] [ f ] if* ;
 : handle-no-word-break ( -- )
    context get line-context-parent [
        line-context-in-rule dup rule-no-word-break? [
-            rule-match-token prev-token,
+            rule-match-token* prev-token,
            pop-context
        ] [ drop ] if
    ] when* ;
@ -221,6 +247,10 @@ M: mark-previous-rule handle-rule-start
    1 current-rule-set rule-set-default next-token, ;
 : rule-set-empty? ( ruleset -- ? )
    dup rule-set-rules assoc-empty?
    swap rule-set-keywords assoc-empty? and ;
 : check-word-break ( -- ? )
    current-char dup blank? [
        drop
@ -232,14 +262,17 @@ M: mark-previous-rule handle-rule-start
        (check-word-break)
    ] [
-        dup alpha? [
+        ! Micro-optimization with incorrect semantics; we keep
        ! it here because jEdit mode files depend on it now...
        current-rule-set rule-set-empty? [
            drop
        ] [
-            dup current-rule-set dup short. rule-set-no-word-sep* dup . member? [
+            dup alpha? [
-                "A: " write write1 nl
+                drop
            ] [
-                "B: " write write1 nl
+                current-rule-set rule-set-no-word-sep* member? [
-                (check-word-break)
+                    (check-word-break)
                ] unless
            ] if
        ] if
--- a/extra/xmode/marker/state/state.factor
+++ b/extra/xmode/marker/state/state.factor
@ -14,6 +14,7 @@ SYMBOL: whitespace-end
 SYMBOL: seen-whitespace-end?
 SYMBOL: escaped?
 SYMBOL: process-escape?
 SYMBOL: delegate-end-escaped?
 SYMBOL: terminated?
@ -61,5 +62,6 @@ SYMBOL: terminated?
    0 position set
    0 last-offset set
    0 whitespace-end set
    process-escape? on
    [ clone ] [ main-rule-set f <line-context> ] if*
    context set ;
--- a/extra/xmode/rules/rules.factor
+++ b/extra/xmode/rules/rules.factor
@ -45,23 +45,6 @@ MEMO: standard-rule-set ( id -- ruleset )
        over [ >r V{ } like r> over push-all ] [ nip ] if
    ] when* ;
 DEFER: get-rules
 : get-imported-rules ( vector/f char ruleset -- vector/f )
    rule-set-imports [ get-rules ?push-all ] curry* each ;
 : get-always-rules ( vector/f ruleset -- vector/f )
    f swap rule-set-rules at ?push-all ;
 : get-char-rules ( vector/f char ruleset -- vector/f )
    >r ch>upper r> rule-set-rules at ?push-all ;
 : get-rules ( char ruleset -- seq )
    f -rot
    [ get-char-rules ] 2keep
    [ get-always-rules ] keep
    get-imported-rules ;
 : rule-set-no-word-sep* ( ruleset -- str )
    dup rule-set-keywords keyword-map-no-word-sep*
    swap rule-set-no-word-sep "_" 3append ;