From 9a0d318b916a113581760a73a163ad87e5d8801b Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 2 Dec 2007 05:25:18 -0500 Subject: [PATCH] Updating extra/xmode --- extra/xmode/README.txt | 44 ++++++++- extra/xmode/loader/loader.factor | 5 +- extra/xmode/marker/marker-tests.factor | 43 +++++++++ extra/xmode/marker/marker.factor | 123 ++++++++++++++++--------- extra/xmode/marker/state/state.factor | 2 + extra/xmode/rules/rules.factor | 17 ---- 6 files changed, 165 insertions(+), 69 deletions(-) mode change 100644 => 100755 extra/xmode/README.txt mode change 100644 => 100755 extra/xmode/loader/loader.factor mode change 100644 => 100755 extra/xmode/marker/marker-tests.factor mode change 100644 => 100755 extra/xmode/marker/marker.factor mode change 100644 => 100755 extra/xmode/marker/state/state.factor mode change 100644 => 100755 extra/xmode/rules/rules.factor diff --git a/extra/xmode/README.txt b/extra/xmode/README.txt old mode 100644 new mode 100755 index 7a9d1580d3..bf73042030 --- a/extra/xmode/README.txt +++ b/extra/xmode/README.txt @@ -1,5 +1,41 @@ -This is a Factor port of jEdit's syntax highlighting engine. +This is a Factor port of the jEdit 4.3 syntax highlighting engine +(http://www.jedit.org). -It implements a relatively basic, rule-driven recursive parser. -The parser is incremental, with one line granularity. This is -still a work in progress. +jEdit 1.2, released in late 1998, was the first release to support +syntax highlighting. It featured a small number of hand-coded +"token markers" -- simple incremental parers -- all based on the +original JavaTokenMarker contributed by Tal Davidson. + +Around the time of jEdit 1.5 in 1999, Mike Dillon began developing a +jEdit plugin named "XMode". This plugin implemented a generic, +rule-driven token marker which read mode descriptions from XML files. +XMode eventually matured to the point where it could replace the +formerly hand-coded token markers. + +With the release of jEdit 2.4, I merged XMode into the core and +eliminated the old hand-coded token markers. + +XMode suffers from a somewhat archaic design, and was written at a time +when Java VMs with JIT compilers were relatively uncommon, object +allocation was expensive, and heap space tight. As a result the parser +design is less general than it could be. + +Furthermore, the parser has a few bugs which some mode files have come +to depend on: + +- If a RULES tag does not define any keywords or rules, then its + NO_WORD_SEP attribute is ignored. + + The Factor implementation duplicates this behavior. + +- if a RULES tag does not have a NO_WORD_SEP attribute, then + it inherits the value of the NO_WORD_SEP attribute from the previous + RULES tag. + + The Factor implementation does not duplicate this behavior. + +This is still a work in progress. If you find any behavioral differences +between the Factor implementation and the original jEdit code, please +report them as bugs. Also, if you wish to contribute a new or improved +mode file, please contact the jEdit project. Updated mode files in jEdit +will be periodically imported into the Factor source tree. diff --git a/extra/xmode/loader/loader.factor b/extra/xmode/loader/loader.factor old mode 100644 new mode 100755 index a287efdd4b..c6b5cad9d1 --- a/extra/xmode/loader/loader.factor +++ b/extra/xmode/loader/loader.factor @@ -35,8 +35,7 @@ IN: xmode.loader dup children>string swap position-attrs ; : parse-regexp-matcher ( tag -- matcher ) - ! XXX - dup children>string swap position-attrs ; + dup children>string swap position-attrs ; ! SPAN's children { "SET" string>rule-set-name set-rule-set-name } { "IGNORE_CASE" string>boolean set-rule-set-ignore-case? } { "HIGHLIGHT_DIGITS" string>boolean set-rule-set-highlight-digits? } - { "DIGIT_RE" f set-rule-set-digit-re } ! XXX + { "DIGIT_RE" set-rule-set-digit-re } ! XXX { "ESCAPE" f add-escape-rule } { "DEFAULT" string>token set-rule-set-default } { "NO_WORD_SEP" f set-rule-set-no-word-sep } diff --git a/extra/xmode/marker/marker-tests.factor b/extra/xmode/marker/marker-tests.factor old mode 100644 new mode 100755 index 6c66f958a6..cb7f2960a4 --- a/extra/xmode/marker/marker-tests.factor +++ b/extra/xmode/marker/marker-tests.factor @@ -2,6 +2,40 @@ USING: xmode.tokens xmode.catalog xmode.marker tools.test kernel ; IN: temporary +[ + { + T{ token f "int" KEYWORD3 } + T{ token f " " f } + T{ token f "x" f } + } +] [ f "int x" "c" load-mode tokenize-line nip ] unit-test + +[ + { + T{ token f "\"" LITERAL1 } + T{ token f "hello\\\"" LITERAL1 } + T{ token f " " LITERAL1 } + T{ token f "world" LITERAL1 } + T{ token f "\"" LITERAL1 } + } +] [ f "\"hello\\\" world\"" "c" load-mode tokenize-line nip ] unit-test + +[ + { + T{ token f "\"" LITERAL1 } + T{ token f "hello\\\ world" LITERAL1 } + T{ token f "\"" LITERAL1 } + } +] [ f "\"hello\\\ world\"" "c" load-mode tokenize-line nip ] unit-test + +[ + { + T{ token f "int" KEYWORD3 } + T{ token f " " f } + T{ token f "x" f } + } +] [ f "int x" "java" load-mode tokenize-line nip ] unit-test + [ { T{ token f "//" COMMENT2 } @@ -66,3 +100,12 @@ IN: temporary ] [ f "" "xml" load-mode tokenize-line nip ] unit-test + +[ + { + T{ token f "$" KEYWORD2 } + T{ token f "FOO" KEYWORD2 } + } +] [ + f "$FOO" "shellscript" load-mode tokenize-line nip +] unit-test diff --git a/extra/xmode/marker/marker.factor b/extra/xmode/marker/marker.factor old mode 100644 new mode 100755 index c155f8e11c..cd9eacbb88 --- a/extra/xmode/marker/marker.factor +++ b/extra/xmode/marker/marker.factor @@ -24,8 +24,18 @@ assocs combinators combinators.lib strings regexp splitting ; : mark-number ( keyword -- id ) keyword-number? DIGIT and ; +: resolve-delegate ( name -- rules ) + dup string? [ + "::" split1 [ swap load-mode at ] [ rule-sets get at ] if* + ] when ; + +: rule-set-keyword-maps ( ruleset -- seq ) + dup rule-set-imports + [ resolve-delegate rule-set-keyword-maps ] map concat + swap rule-set-keywords add ; + : mark-keyword ( keyword -- id ) - current-keywords at ; + current-rule-set rule-set-keyword-maps assoc-stack ; : add-remaining-token ( -- ) current-rule-set rule-set-default prev-token, ; @@ -45,30 +55,6 @@ assocs combinators combinators.lib strings regexp splitting ; : current-char ( -- char ) position get line get nth ; -GENERIC: perform-rule ( rule -- ) - -: ... ; - -M: escape-rule perform-rule ( rule -- ) ... ; - -: find-escape-rule ( -- rule ) - context get dup - line-context-in-rule-set rule-set-escape-rule - [ ] [ line-context-parent find-escape-rule ] ?if ; - -: check-escape-rule ( rule -- ) - #! Unlike jEdit, we keep checking parents until we find - #! an escape rule. - dup rule-no-escape? [ drop ] [ - drop - ! find-escape-rule - ! current-rule-set rule-set-escape-rule [ - ! find-escape-rule - ! ] [ - ! - ! ] if* - ] if ; - GENERIC: match-position ( rule -- n ) M: mark-previous-rule match-position drop last-offset get ; @@ -83,10 +69,10 @@ M: rule match-position drop position get ; [ over matcher-at-word-start? over last-offset get = implies ] } && 2nip ; -: matches-not-mark-following? ... ; - GENERIC: text-matches? ( position text -- match-count/f ) +M: f text-matches? 2drop f ; + M: string text-matches? ! XXX ignore case >r line get swap tail-slice r> @@ -103,7 +89,7 @@ M: string text-matches? : rule-end-matches? ( rule -- match-count/f ) dup mark-following-rule? [ - dup rule-end swap can-match-here? 0 and + dup rule-start swap can-match-here? 0 and ] [ dup rule-end tuck swap can-match-here? [ position get swap matcher-text @@ -114,10 +100,48 @@ M: string text-matches? ] if ] if ; +DEFER: get-rules + +: get-imported-rules ( vector/f char ruleset -- vector/f ) + rule-set-imports + [ resolve-delegate get-rules ?push-all ] curry* each ; + +: get-always-rules ( vector/f ruleset -- vector/f ) + f swap rule-set-rules at ?push-all ; + +: get-char-rules ( vector/f char ruleset -- vector/f ) + >r ch>upper r> rule-set-rules at ?push-all ; + +: get-rules ( char ruleset -- seq ) + f -rot + [ get-char-rules ] 2keep + [ get-always-rules ] keep + get-imported-rules ; + GENERIC: handle-rule-start ( match-count rule -- ) GENERIC: handle-rule-end ( match-count rule -- ) +: find-escape-rule ( -- rule ) + context get dup + line-context-in-rule-set rule-set-escape-rule [ ] [ + line-context-parent line-context-in-rule-set + dup [ rule-set-escape-rule ] when + ] ?if ; + +: check-escape-rule ( rule -- ? ) + rule-no-escape? [ f ] [ + find-escape-rule dup [ + dup rule-start-matches? dup [ + swap handle-rule-start + delegate-end-escaped? [ not ] change + t + ] [ + 2drop f + ] if + ] when + ] if ; + : check-every-rule ( -- ? ) current-char current-rule-set get-rules [ rule-start-matches? ] map-find @@ -129,11 +153,6 @@ GENERIC: handle-rule-end ( match-count rule -- ) dup [ swap handle-rule-end ] [ 2drop ] if ] when* ; -: handle-escape-rule ( rule -- ) - ?end-rule - ; -! ... process escape ... ; - : rule-match-token* ( rule -- id ) dup rule-match-token { { f [ dup rule-body-token ] } @@ -141,10 +160,13 @@ GENERIC: handle-rule-end ( match-count rule -- ) [ ] } case nip ; -: resolve-delegate ( name -- rules ) - dup string? [ - "::" split1 [ swap load-mode at ] [ rule-sets get at ] if* - ] when ; +M: escape-rule handle-rule-start + drop + ?end-rule + process-escape? get [ + escaped? [ not ] change + position [ + ] change + ] [ 2drop ] if ; M: seq-rule handle-rule-start ?end-rule @@ -174,6 +196,10 @@ M: mark-following-rule handle-rule-start f context get set-line-context-end context get set-line-context-in-rule ; +M: mark-following-rule handle-rule-end + nip rule-match-token* prev-token, + f context get set-line-context-in-rule ; + M: mark-previous-rule handle-rule-start ?end-rule mark-token @@ -183,7 +209,7 @@ M: mark-previous-rule handle-rule-start : do-escaped escaped? get [ escaped? off - ... + ! ... ] when ; : check-end-delegate ( -- ? ) @@ -198,14 +224,14 @@ M: mark-previous-rule handle-rule-start ] keep context get line-context-parent line-context-in-rule rule-match-token* next-token, pop-context seen-whitespace-end? on t - ] [ 2drop f ] if + ] [ drop check-escape-rule ] if ] [ f ] if* ] [ f ] if* ; : handle-no-word-break ( -- ) context get line-context-parent [ line-context-in-rule dup rule-no-word-break? [ - rule-match-token prev-token, + rule-match-token* prev-token, pop-context ] [ drop ] if ] when* ; @@ -221,6 +247,10 @@ M: mark-previous-rule handle-rule-start 1 current-rule-set rule-set-default next-token, ; +: rule-set-empty? ( ruleset -- ? ) + dup rule-set-rules assoc-empty? + swap rule-set-keywords assoc-empty? and ; + : check-word-break ( -- ? ) current-char dup blank? [ drop @@ -232,14 +262,17 @@ M: mark-previous-rule handle-rule-start (check-word-break) ] [ - dup alpha? [ + ! Micro-optimization with incorrect semantics; we keep + ! it here because jEdit mode files depend on it now... + current-rule-set rule-set-empty? [ drop ] [ - dup current-rule-set dup short. rule-set-no-word-sep* dup . member? [ - "A: " write write1 nl + dup alpha? [ + drop ] [ - "B: " write write1 nl - (check-word-break) + current-rule-set rule-set-no-word-sep* member? [ + (check-word-break) + ] unless ] if ] if diff --git a/extra/xmode/marker/state/state.factor b/extra/xmode/marker/state/state.factor old mode 100644 new mode 100755 index 26379501bd..cce7c7567a --- a/extra/xmode/marker/state/state.factor +++ b/extra/xmode/marker/state/state.factor @@ -14,6 +14,7 @@ SYMBOL: whitespace-end SYMBOL: seen-whitespace-end? SYMBOL: escaped? +SYMBOL: process-escape? SYMBOL: delegate-end-escaped? SYMBOL: terminated? @@ -61,5 +62,6 @@ SYMBOL: terminated? 0 position set 0 last-offset set 0 whitespace-end set + process-escape? on [ clone ] [ main-rule-set f ] if* context set ; diff --git a/extra/xmode/rules/rules.factor b/extra/xmode/rules/rules.factor old mode 100644 new mode 100755 index 9b530aae14..7206668edb --- a/extra/xmode/rules/rules.factor +++ b/extra/xmode/rules/rules.factor @@ -45,23 +45,6 @@ MEMO: standard-rule-set ( id -- ruleset ) over [ >r V{ } like r> over push-all ] [ nip ] if ] when* ; -DEFER: get-rules - -: get-imported-rules ( vector/f char ruleset -- vector/f ) - rule-set-imports [ get-rules ?push-all ] curry* each ; - -: get-always-rules ( vector/f ruleset -- vector/f ) - f swap rule-set-rules at ?push-all ; - -: get-char-rules ( vector/f char ruleset -- vector/f ) - >r ch>upper r> rule-set-rules at ?push-all ; - -: get-rules ( char ruleset -- seq ) - f -rot - [ get-char-rules ] 2keep - [ get-always-rules ] keep - get-imported-rules ; - : rule-set-no-word-sep* ( ruleset -- str ) dup rule-set-keywords keyword-map-no-word-sep* swap rule-set-no-word-sep "_" 3append ;