regexp: Add re:: and re[[ ]] and use new syntax.
R/ / is a crazy syntax to support. - it makes / a special character - you have to escape forward slashes which is awkward in any language with R/ / syntax - re"\d" is not a valid regexp string, gotta do the escapes later -- the only universal escape in a string is either \" or even "none" as a default optionmodern-harvey3
parent
a654c7b879
commit
422078e01e
|
@ -7,10 +7,10 @@ unicode multiline ;
|
||||||
IN: globs
|
IN: globs
|
||||||
|
|
||||||
: not-path-separator ( -- sep )
|
: not-path-separator ( -- sep )
|
||||||
os windows? R/ [^\/\\]/ R/ [^\/]/ ? ; foldable
|
os windows? re"[^\/\\]" re"[^\/]" ? ; foldable
|
||||||
|
|
||||||
: wild-path-separator ( -- sep )
|
: wild-path-separator ( -- sep )
|
||||||
os windows? R/ [^\/\\][\/\\]|[^\/\\]/ R/ [^\/][\/]|[^\/]/ ? ; foldable
|
os windows? re"[^\/\\][\/\\]|[^\/\\]" re"[^\/][\/]|[^\/]" ? ; foldable
|
||||||
|
|
||||||
EBNF: <glob> [=[
|
EBNF: <glob> [=[
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ IN: regexp.combinators.tests
|
||||||
{ f f f } [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test
|
{ f f f } [ "food" "ibar" "ba" [ strings matches? ] tri@ ] unit-test
|
||||||
|
|
||||||
: conj ( -- regexp )
|
: conj ( -- regexp )
|
||||||
{ R/ .*a/ R/ b.*/ } <and> ;
|
{ re".*a" re"b.*" } <and> ;
|
||||||
|
|
||||||
{ t } [ "bljhasflsda" conj matches? ] unit-test
|
{ t } [ "bljhasflsda" conj matches? ] unit-test
|
||||||
{ f } [ "bsdfdfs" conj matches? ] unit-test
|
{ f } [ "bsdfdfs" conj matches? ] unit-test
|
||||||
|
|
|
@ -13,7 +13,7 @@ IN: regexp.combinators
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
CONSTANT: <nothing> R/ (?~.*)/s
|
CONSTANT: <nothing> re:: "(?~.*)" "s"
|
||||||
|
|
||||||
: <literal> ( string -- regexp )
|
: <literal> ( string -- regexp )
|
||||||
[ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable
|
[ "\\Q" "\\E" surround ] [ <concatenation> ] bi make-regexp ; foldable
|
||||||
|
|
|
@ -229,6 +229,16 @@ SYNTAX: \R[=[ "]=]" parse-optioned-regexp ;
|
||||||
SYNTAX: \R[==[ "]==]" parse-optioned-regexp ;
|
SYNTAX: \R[==[ "]==]" parse-optioned-regexp ;
|
||||||
SYNTAX: \R[===[ "]===]" parse-optioned-regexp ;
|
SYNTAX: \R[===[ "]===]" parse-optioned-regexp ;
|
||||||
SYNTAX: \R[====[ "]====]" parse-optioned-regexp ;
|
SYNTAX: \R[====[ "]====]" parse-optioned-regexp ;
|
||||||
|
SYNTAX: \re" "\"" parse-optioned-regexp ;
|
||||||
|
SYNTAX: \re[[ "]]" parse-optioned-regexp ;
|
||||||
|
SYNTAX: \re[=[ "]=]" parse-optioned-regexp ;
|
||||||
|
SYNTAX: \re[==[ "]==]" parse-optioned-regexp ;
|
||||||
|
SYNTAX: \re[===[ "]===]" parse-optioned-regexp ;
|
||||||
|
SYNTAX: \re[====[ "]====]" parse-optioned-regexp ;
|
||||||
|
|
||||||
|
SYNTAX: \re: scan-object "" <optioned-regexp> suffix! ;
|
||||||
|
SYNTAX: \re:: scan-object scan-object <optioned-regexp> suffix! ;
|
||||||
|
|
||||||
SYNTAX: \R(( "))" parse-optioned-regexp ;
|
SYNTAX: \R(( "))" parse-optioned-regexp ;
|
||||||
SYNTAX: \R(=( ")=)" parse-optioned-regexp ;
|
SYNTAX: \R(=( ")=)" parse-optioned-regexp ;
|
||||||
SYNTAX: \R(==( ")==)" parse-optioned-regexp ;
|
SYNTAX: \R(==( ")==)" parse-optioned-regexp ;
|
||||||
|
@ -240,6 +250,7 @@ SYNTAX: \R{=={ "}==}" parse-optioned-regexp ;
|
||||||
SYNTAX: \R{==={ "}===}" parse-optioned-regexp ;
|
SYNTAX: \R{==={ "}===}" parse-optioned-regexp ;
|
||||||
SYNTAX: \R{===={ "}====}" parse-optioned-regexp ;
|
SYNTAX: \R{===={ "}====}" parse-optioned-regexp ;
|
||||||
|
|
||||||
|
|
||||||
USE: vocabs.loader
|
USE: vocabs.loader
|
||||||
|
|
||||||
{ "prettyprint" "regexp" } "regexp.prettyprint" require-when
|
{ "prettyprint" "regexp" } "regexp.prettyprint" require-when
|
||||||
|
|
|
@ -61,11 +61,11 @@ IN: validators
|
||||||
! From http://www.regular-expressions.info/email.html
|
! From http://www.regular-expressions.info/email.html
|
||||||
320 v-max-length
|
320 v-max-length
|
||||||
"e-mail"
|
"e-mail"
|
||||||
R/ [A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}/i
|
re:: [[[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}]] "i"
|
||||||
v-regexp ;
|
v-regexp ;
|
||||||
|
|
||||||
: v-url ( str -- str )
|
: v-url ( str -- str )
|
||||||
"url" R/ (?:ftp|http|https):\/\/\S+/ v-regexp ;
|
"url" re[[(?:ftp|http|https):\/\/\S+]] v-regexp ;
|
||||||
|
|
||||||
: v-captcha ( str -- str )
|
: v-captcha ( str -- str )
|
||||||
dup empty? [ "must remain blank" throw ] unless ;
|
dup empty? [ "must remain blank" throw ] unless ;
|
||||||
|
|
|
@ -7,19 +7,19 @@ IN: benchmark.regex-dna
|
||||||
! Based on http://shootout.alioth.debian.org/gp4/benchmark.php?test=regexdna&lang=ruby&id=1
|
! Based on http://shootout.alioth.debian.org/gp4/benchmark.php?test=regexdna&lang=ruby&id=1
|
||||||
|
|
||||||
: strip-line-breaks ( string -- string' )
|
: strip-line-breaks ( string -- string' )
|
||||||
R/ >.*\n|\n/ "" re-replace ;
|
re">.*\n|\n" "" re-replace ;
|
||||||
|
|
||||||
: count-patterns ( string -- )
|
: count-patterns ( string -- )
|
||||||
{
|
{
|
||||||
R/ agggtaaa|tttaccct/i
|
re:: "agggtaaa|tttaccct" "i"
|
||||||
R/ [cgt]gggtaaa|tttaccc[acg]/i
|
re:: "[cgt]gggtaaa|tttaccc[acg]" "i"
|
||||||
R/ a[act]ggtaaa|tttacc[agt]t/i
|
re:: "a[act]ggtaaa|tttacc[agt]t" "i"
|
||||||
R/ ag[act]gtaaa|tttac[agt]ct/i
|
re:: "ag[act]gtaaa|tttac[agt]ct" "i"
|
||||||
R/ agg[act]taaa|ttta[agt]cct/i
|
re:: "agg[act]taaa|ttta[agt]cct" "i"
|
||||||
R/ aggg[acg]aaa|ttt[cgt]ccct/i
|
re:: "aggg[acg]aaa|ttt[cgt]ccct" "i"
|
||||||
R/ agggt[cgt]aa|tt[acg]accct/i
|
re:: "agggt[cgt]aa|tt[acg]accct" "i"
|
||||||
R/ agggta[cgt]a|t[acg]taccct/i
|
re:: "agggta[cgt]a|t[acg]taccct" "i"
|
||||||
R/ agggtaa[cgt]|[acg]ttaccct/i
|
re:: "agggtaa[cgt]|[acg]ttaccct" "i"
|
||||||
} [
|
} [
|
||||||
[ raw>> write bl ]
|
[ raw>> write bl ]
|
||||||
[ count-matches number>string print ]
|
[ count-matches number>string print ]
|
||||||
|
@ -28,17 +28,17 @@ IN: benchmark.regex-dna
|
||||||
|
|
||||||
: do-replacements ( string -- string' )
|
: do-replacements ( string -- string' )
|
||||||
{
|
{
|
||||||
{ R/ B/ "(c|g|t)" }
|
{ re"B" "(c|g|t)" }
|
||||||
{ R/ D/ "(a|g|t)" }
|
{ re"D" "(a|g|t)" }
|
||||||
{ R/ H/ "(a|c|t)" }
|
{ re"H" "(a|c|t)" }
|
||||||
{ R/ K/ "(g|t)" }
|
{ re"K" "(g|t)" }
|
||||||
{ R/ M/ "(a|c)" }
|
{ re"M" "(a|c)" }
|
||||||
{ R/ N/ "(a|c|g|t)" }
|
{ re"N" "(a|c|g|t)" }
|
||||||
{ R/ R/ "(a|g)" }
|
{ re"R" "(a|g)" }
|
||||||
{ R/ S/ "(c|t)" }
|
{ re"S" "(c|t)" }
|
||||||
{ R/ V/ "(a|c|g)" }
|
{ re"V" "(a|c|g)" }
|
||||||
{ R/ W/ "(a|t)" }
|
{ re"W" "(a|t)" }
|
||||||
{ R/ Y/ "(c|t)" }
|
{ re"Y" "(c|t)" }
|
||||||
} [ re-replace ] assoc-each ;
|
} [ re-replace ] assoc-each ;
|
||||||
|
|
||||||
SYMBOL: ilen
|
SYMBOL: ilen
|
||||||
|
|
|
@ -9,8 +9,8 @@ IN: benchmark.regexp
|
||||||
20,000 <iota> [ number>string ] map
|
20,000 <iota> [ number>string ] map
|
||||||
200 <iota> [ 1 + char: a <string> ] map
|
200 <iota> [ 1 + char: a <string> ] map
|
||||||
'[
|
'[
|
||||||
_ R/ \d+/ [ matches? ] curry all? t assert=
|
_ re[[\d+]] [ matches? ] curry all? t assert=
|
||||||
_ R/ [a]+/ [ matches? ] curry all? t assert=
|
_ re[[[a]+]] [ matches? ] curry all? t assert=
|
||||||
] times ;
|
] times ;
|
||||||
|
|
||||||
MAIN: regexp-benchmark
|
MAIN: regexp-benchmark
|
||||||
|
|
|
@ -869,7 +869,7 @@ CONSTANT: emoji H{
|
||||||
}
|
}
|
||||||
|
|
||||||
: emojify ( str -- str' )
|
: emojify ( str -- str' )
|
||||||
R/ :([^:])+:/ [ >string emoji at ] re-replace-with ;
|
R[[:([^:])+:]] [ >string emoji at ] re-replace-with ;
|
||||||
|
|
||||||
: emojify-main ( -- )
|
: emojify-main ( -- )
|
||||||
command-line get [
|
command-line get [
|
||||||
|
|
|
@ -2352,7 +2352,7 @@ CONSTANT: html5 H{
|
||||||
"#" ?head [ numeric-charref ] [ named-charref ] if ;
|
"#" ?head [ numeric-charref ] [ named-charref ] if ;
|
||||||
|
|
||||||
CONSTANT: re-charref
|
CONSTANT: re-charref
|
||||||
R/ &(#[0-9]+|#[xX][0-9a-fA-F]+|[^\t\n\f <&#;]{1,32});?/
|
re"&(#[0-9]+|#[xX][0-9a-fA-F]+|[^\t\n\f <&#;]{1,32});?"
|
||||||
|
|
||||||
PRIVATE>
|
PRIVATE>
|
||||||
|
|
||||||
|
|
|
@ -87,7 +87,7 @@ M: text-printer print-comment-tag drop ;
|
||||||
M: text-printer print-dtd-tag drop ;
|
M: text-printer print-dtd-tag drop ;
|
||||||
|
|
||||||
: collapse-spaces ( text -- text' )
|
: collapse-spaces ( text -- text' )
|
||||||
preformatted? get [ R/ \s+/ " " re-replace ] unless ;
|
preformatted? get [ re"\s+" " " re-replace ] unless ;
|
||||||
|
|
||||||
M: text-printer print-text-tag
|
M: text-printer print-text-tag
|
||||||
script? get style? get or
|
script? get style? get or
|
||||||
|
|
|
@ -29,25 +29,25 @@ M: lexed length tokens>> length ;
|
||||||
|
|
||||||
|
|
||||||
TUPLE: comment < lexed payload ;
|
TUPLE: comment < lexed payload ;
|
||||||
CONSTRUCTOR: <comment> comment ( tokens payload -- obj ) ;
|
CONSTRUCTOR: <comment> comment ( tokens -- obj ) ;
|
||||||
|
|
||||||
TUPLE: escaped-identifier < lexed name ;
|
TUPLE: escaped-identifier < lexed name ;
|
||||||
CONSTRUCTOR: <escaped-identifier> escaped-identifier ( tokens name -- obj ) ;
|
CONSTRUCTOR: <escaped-identifier> escaped-identifier ( tokens -- obj ) ;
|
||||||
|
|
||||||
TUPLE: escaped-object < lexed name payload ;
|
TUPLE: escaped-object < lexed name payload ;
|
||||||
CONSTRUCTOR: <escaped-object> escaped-object ( tokens name payload -- obj ) ;
|
CONSTRUCTOR: <escaped-object> escaped-object ( tokens -- obj ) ;
|
||||||
|
|
||||||
TUPLE: section < lexed tag payload ;
|
TUPLE: section < lexed tag payload ;
|
||||||
CONSTRUCTOR: <section> section ( tokens tag payload -- obj ) ;
|
CONSTRUCTOR: <section> section ( tokens -- obj ) ;
|
||||||
|
|
||||||
TUPLE: named-section < lexed tag name payload ;
|
TUPLE: named-section < lexed tag name payload ;
|
||||||
CONSTRUCTOR: <named-section> named-section ( tokens tag name payload -- obj ) ;
|
CONSTRUCTOR: <named-section> named-section ( tokens -- obj ) ;
|
||||||
|
|
||||||
TUPLE: upper-colon < lexed tag payload ;
|
TUPLE: upper-colon < lexed tag payload ;
|
||||||
CONSTRUCTOR: <upper-colon> upper-colon ( tokens -- obj ) ;
|
CONSTRUCTOR: <upper-colon> upper-colon ( tokens -- obj ) ;
|
||||||
|
|
||||||
TUPLE: lower-colon < lexed tag payload ;
|
TUPLE: lower-colon < lexed tag payload ;
|
||||||
CONSTRUCTOR: <lower-colon> lower-colon ( tokens tag payload -- obj ) ;
|
CONSTRUCTOR: <lower-colon> lower-colon ( tokens -- obj ) ;
|
||||||
|
|
||||||
TUPLE: matched < lexed tag payload ;
|
TUPLE: matched < lexed tag payload ;
|
||||||
|
|
||||||
|
@ -98,11 +98,11 @@ CONSTRUCTOR: <double-paren> double-paren ( tokens -- obj )
|
||||||
|
|
||||||
|
|
||||||
TUPLE: double-quote < matched ;
|
TUPLE: double-quote < matched ;
|
||||||
CONSTRUCTOR: <double-quote> double-quote ( tokens tag payload -- obj ) ;
|
CONSTRUCTOR: <double-quote> double-quote ( tokens -- obj ) ;
|
||||||
|
|
||||||
|
|
||||||
TUPLE: identifier < lexed name ;
|
TUPLE: identifier < lexed name ;
|
||||||
CONSTRUCTOR: <identifier> identifier ( tokens name -- obj ) ;
|
CONSTRUCTOR: <identifier> identifier ( tokens -- obj ) ;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -144,7 +144,7 @@ MACRO:: read-matched ( ch -- quot: ( string n tag -- string n' slice' ) )
|
||||||
2over ?nth-of char: \[ = [
|
2over ?nth-of char: \[ = [
|
||||||
[ 1 + ] dip 1 modify-to 2over ?nth-of read-double-matched-bracket
|
[ 1 + ] dip 1 modify-to 2over ?nth-of read-double-matched-bracket
|
||||||
] [
|
] [
|
||||||
[ slice-til-eol drop ] dip swap 2array
|
[ slice-til-eol drop ] dip swap 2array <comment>
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
: terminator? ( slice -- ? )
|
: terminator? ( slice -- ? )
|
||||||
|
@ -166,7 +166,7 @@ MACRO:: read-matched ( ch -- quot: ( string n tag -- string n' slice' ) )
|
||||||
dup [ f unexpected-eof ] unless
|
dup [ f unexpected-eof ] unless
|
||||||
lex-factor
|
lex-factor
|
||||||
] replicate ensure-tokens ! concat
|
] replicate ensure-tokens ! concat
|
||||||
] dip swap 2array ;
|
] dip swap 2array <lower-colon> ;
|
||||||
|
|
||||||
: (strict-upper?) ( string -- ? )
|
: (strict-upper?) ( string -- ? )
|
||||||
{
|
{
|
||||||
|
@ -280,17 +280,16 @@ MACRO:: read-matched ( ch -- quot: ( string n tag -- string n' slice' ) )
|
||||||
swap
|
swap
|
||||||
! What ended the FOO: .. ; form?
|
! What ended the FOO: .. ; form?
|
||||||
! Remove the ; from the payload if present
|
! Remove the ; from the payload if present
|
||||||
! XXX: probably can remove this, T: is dumb
|
|
||||||
! Also in stack effects ( T: int -- ) can be ended by -- and )
|
! Also in stack effects ( T: int -- ) can be ended by -- and )
|
||||||
dup ?last {
|
dup ?last {
|
||||||
{ [ dup ";" sequence= ] [ drop unclip-last 3array ] }
|
{ [ dup ";" sequence= ] [ drop unclip-last 3array <upper-colon> ] }
|
||||||
{ [ dup ";" tail? ] [ drop unclip-last 3array ] }
|
{ [ dup ";" tail? ] [ drop unclip-last 3array <upper-colon> ] }
|
||||||
{ [ dup "--" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }
|
{ [ dup "--" sequence= ] [ drop unclip-last -rot 2array <upper-colon> [ rewind-slice ] dip ] }
|
||||||
{ [ dup "]" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }
|
{ [ dup "]" sequence= ] [ "omg1" throw drop unclip-last -rot 2array <upper-colon> [ rewind-slice ] dip ] }
|
||||||
{ [ dup "}" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }
|
{ [ dup "}" sequence= ] [ "omg2" throw drop unclip-last -rot 2array <upper-colon> [ rewind-slice ] dip ] }
|
||||||
{ [ dup ")" sequence= ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] } ! (n*quot) breaks
|
{ [ dup ")" sequence= ] [ B "opg3" throw drop unclip-last -rot 2array <upper-colon> [ rewind-slice ] dip ] } ! (n*quot) breaks
|
||||||
{ [ dup section-close? ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }
|
{ [ dup section-close? ] [ drop unclip-last -rot 2array <upper-colon> [ rewind-slice ] dip ] }
|
||||||
{ [ dup upper-colon? ] [ drop unclip-last -rot 2array [ rewind-slice ] dip ] }
|
{ [ dup upper-colon? ] [ drop unclip-last -rot 2array <upper-colon> [ rewind-slice ] dip ] }
|
||||||
[ drop 2array <upper-colon> ]
|
[ drop 2array <upper-colon> ]
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
|
@ -342,7 +341,7 @@ MACRO:: read-matched ( ch -- quot: ( string n tag -- string n' slice' ) )
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
: read-acute ( string n slice -- string n' acute )
|
: read-acute ( string n slice -- string n' acute )
|
||||||
[ matching-section-delimiter 1array lex-until ] keep swap unclip-last 3array ;
|
[ matching-section-delimiter 1array lex-until ] keep swap unclip-last 3array <section> ;
|
||||||
|
|
||||||
! #{ } turned off, foo# not turned off
|
! #{ } turned off, foo# not turned off
|
||||||
: read-turnoff ( string n slice -- string n' obj )
|
: read-turnoff ( string n slice -- string n' obj )
|
||||||
|
|
|
@ -158,4 +158,4 @@ version 8.36 <= [
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
! Test that the regexp syntax works.
|
! Test that the regexp syntax works.
|
||||||
{ t } [ "1234abcd" regexp:R/ ^\d+\w+$/ matches? ] unit-test
|
{ t } [ "1234abcd" regexp:R[[^\d+\w+$]] matches? ] unit-test
|
||||||
|
|
|
@ -14,7 +14,7 @@ IN: text-analysis
|
||||||
[ [ blank? ] trim ] map harvest ;
|
[ [ blank? ] trim ] map harvest ;
|
||||||
|
|
||||||
: split-paragraphs ( str -- seq )
|
: split-paragraphs ( str -- seq )
|
||||||
R/ \r?\n\r?\n/ re-split trimmed ;
|
re"\r?\n\r?\n" re-split trimmed ;
|
||||||
|
|
||||||
<<
|
<<
|
||||||
CONSTANT: ABBREVIATIONS {
|
CONSTANT: ABBREVIATIONS {
|
||||||
|
@ -39,7 +39,7 @@ CONSTANT: ABBREVIATIONS {
|
||||||
: split-sentences ( str -- seq )
|
: split-sentences ( str -- seq )
|
||||||
|
|
||||||
! Mark end of sentences with EOS marker
|
! Mark end of sentences with EOS marker
|
||||||
R/ ((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)/
|
re[[((?:[\.?!]|[\r\n]+)(?:\"|\'|\)|\]|\})?)(\s+)]]
|
||||||
[ [ ".?!\r\n\"')]}" member? not ] cut-when "\x01" glue ]
|
[ [ ".?!\r\n\"')]}" member? not ] cut-when "\x01" glue ]
|
||||||
re-replace-with
|
re-replace-with
|
||||||
|
|
||||||
|
@ -63,46 +63,46 @@ CONSTANT: ABBREVIATIONS {
|
||||||
"\x01" split trimmed ;
|
"\x01" split trimmed ;
|
||||||
|
|
||||||
CONSTANT: sub-syllable {
|
CONSTANT: sub-syllable {
|
||||||
R/ [^aeiou]e$/ ! give, love, bone, done, ride ...
|
re"[^aeiou]e$" ! give, love, bone, done, ride ...
|
||||||
R/ [aeiou](?:([cfghklmnprsvwz])\1?|ck|sh|[rt]ch)e[ds]$/
|
re"[aeiou](?:([cfghklmnprsvwz])\1?|ck|sh|[rt]ch)e[ds]$"
|
||||||
! (passive) past participles and 3rd person sing present verbs:
|
! (passive) past participles and 3rd person sing present verbs:
|
||||||
! bared, liked, called, tricked, bashed, matched
|
! bared, liked, called, tricked, bashed, matched
|
||||||
|
|
||||||
R/ .e(?:ly|less(?:ly)?|ness?|ful(?:ly)?|ments?)$/
|
re".e(?:ly|less(?:ly)?|ness?|ful(?:ly)?|ments?)$"
|
||||||
! nominal, adjectival and adverbial derivatives from -e$ roots:
|
! nominal, adjectival and adverbial derivatives from -e$ roots:
|
||||||
! absolutely, nicely, likeness, basement, hopeless
|
! absolutely, nicely, likeness, basement, hopeless
|
||||||
! hopeful, tastefully, wasteful
|
! hopeful, tastefully, wasteful
|
||||||
|
|
||||||
R/ ion/ ! action, diction, fiction
|
re"ion" ! action, diction, fiction
|
||||||
R/ [ct]ia[nl]/ ! special(ly), initial, physician, christian
|
re"[ct]ia[nl]" ! special(ly), initial, physician, christian
|
||||||
R/ [^cx]iou/ ! illustrious, NOT spacious, gracious, anxious, noxious
|
re"[^cx]iou" ! illustrious, NOT spacious, gracious, anxious, noxious
|
||||||
R/ sia$/ ! amnesia, polynesia
|
re"sia$" ! amnesia, polynesia
|
||||||
R/ .gue$/ ! dialogue, intrigue, colleague
|
re".gue$" ! dialogue, intrigue, colleague
|
||||||
}
|
}
|
||||||
|
|
||||||
CONSTANT: add-syllable {
|
CONSTANT: add-syllable {
|
||||||
R/ i[aiou]/ ! alias, science, phobia
|
re"i[aiou]" ! alias, science, phobia
|
||||||
R/ [dls]ien/ ! salient, gradient, transient
|
re"[dls]ien" ! salient, gradient, transient
|
||||||
R/ [aeiouym]ble$/ ! -Vble, plus -mble
|
re"[aeiouym]ble$" ! -Vble, plus -mble
|
||||||
R/ [aeiou]{3}/ ! agreeable
|
re"[aeiou]{3}" ! agreeable
|
||||||
R/ ^mc/ ! mcwhatever
|
re"^mc" ! mcwhatever
|
||||||
R/ ism$/ ! sexism, racism
|
re"ism$" ! sexism, racism
|
||||||
R/ (?:([^aeiouy])\1|ck|mp|ng)le$/ ! bubble, cattle, cackle, sample, angle
|
re"(?:([^aeiouy])\1|ck|mp|ng)le$" ! bubble, cattle, cackle, sample, angle
|
||||||
R/ dnt$/ ! couldn/t
|
re"dnt$" ! couldn/t
|
||||||
R/ [aeiou]y[aeiou]/ ! annoying, layer
|
re"[aeiou]y[aeiou]" ! annoying, layer
|
||||||
}
|
}
|
||||||
|
|
||||||
: syllables ( str -- n )
|
: syllables ( str -- n )
|
||||||
dup length 1 = [ drop 1 ] [
|
dup length 1 = [ drop 1 ] [
|
||||||
>lower char: . swap remove
|
>lower char: . swap remove
|
||||||
[ R/ [aeiouy]+/ count-matches ]
|
[ re"[aeiouy]+" count-matches ]
|
||||||
[ sub-syllable [ matches? ] with count - ]
|
[ sub-syllable [ matches? ] with count - ]
|
||||||
[ add-syllable [ matches? ] with count + ] tri
|
[ add-syllable [ matches? ] with count + ] tri
|
||||||
1 max
|
1 max
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
: split-words ( str -- words )
|
: split-words ( str -- words )
|
||||||
R/ \b([a-z][a-z\-']*)\b/i all-matching-subseqs ;
|
re:: [[\b([a-z][a-z\-']*)\b]] "i" all-matching-subseqs ;
|
||||||
|
|
||||||
TUPLE: text-analysis n-paragraphs n-sentences n-chars n-words
|
TUPLE: text-analysis n-paragraphs n-sentences n-chars n-words
|
||||||
n-syllables n-complex-words n-unique-words n-difficult-words ;
|
n-syllables n-complex-words n-unique-words n-difficult-words ;
|
||||||
|
|
|
@ -116,7 +116,7 @@ TUPLE: entry key value ;
|
||||||
: multi-string ( -- parser )
|
: multi-string ( -- parser )
|
||||||
multi-basic-string multi-literal-string 2choice [
|
multi-basic-string multi-literal-string 2choice [
|
||||||
"" like "\n" ?head drop
|
"" like "\n" ?head drop
|
||||||
R/ \\[ \t\r\n]*\n[ \t\r\n]*/m "" re-replace
|
re:: [[\\[ \t\r\n]*\n[ \t\r\n]*]] "m" "" re-replace
|
||||||
] action ;
|
] action ;
|
||||||
|
|
||||||
: string-parser ( -- parser )
|
: string-parser ( -- parser )
|
||||||
|
|
|
@ -13,7 +13,7 @@ IN: xkcd
|
||||||
|
|
||||||
: comic-image ( url -- image )
|
: comic-image ( url -- image )
|
||||||
http-get nip
|
http-get nip
|
||||||
R/ \/\/imgs\.xkcd\.com\/comics\/[^\.]+\.(png|jpg)/
|
re"//imgs\.xkcd\.com/comics/[^\.]+\.(png|jpg)"
|
||||||
first-match >string "http:" prepend load-http-image ;
|
first-match >string "http:" prepend load-http-image ;
|
||||||
|
|
||||||
: comic-image. ( url -- )
|
: comic-image. ( url -- )
|
||||||
|
|
Loading…
Reference in New Issue