html.entities: minor refactor for clarity.

db4
John Benediktsson 2014-04-23 21:21:30 -07:00
parent 50ca01a4d8
commit d27fa562e8
1 changed files with 19 additions and 19 deletions

View File

@ -2316,25 +2316,25 @@ CONSTANT: html5 H{
{ "zwnj;" "\u00200c" } { "zwnj;" "\u00200c" }
} }
: replace-charref ( str -- str' ) : numeric-charref ( str -- newstr )
"#" ?head [ ";" ?tail drop dup first "xX" member?
! numeric charref [ rest hex> ] [ dec> ] if invalid-charrefs ?at [
";" ?tail drop dup first "xX" member? dup { [ 0xD800 0xDFFF between? ] [ 0x10FFFF > ] } 1||
[ rest hex> ] [ dec> ] if invalid-charrefs ?at [ [ drop "\u0FFFFD" ] [
dup { [ 0xD800 0xDFFF between? ] [ 0x10FFFF > ] } 1|| dup invalid-codepoints member?
[ drop "\u0FFFFD" ] [ [ drop "" ] [ 1string ] if
dup invalid-codepoints member? ] if
[ drop "" ] [ 1string ] if ] unless ;
] if
] unless : named-charref ( str -- newstr )
] [ html5 ?at [
! named charref ! find the longest matching name
html5 ?at [ dup dup length 1 (a,b) [ head html5 at ] with map-find
! find the longest matching name [ swapd tail append ] [ drop "&" prepend ] if*
dup dup length 1 (a,b) [ head html5 at ] with map-find ] unless ;
[ swapd tail append ] [ drop "&" prepend ] if*
] unless : replace-charref ( str -- newstr )
] if ; "#" ?head [ numeric-charref ] [ named-charref ] if ;
CONSTANT: re-charref CONSTANT: re-charref
R/ &(#[0-9]+|#[xX][0-9a-fA-F]+|[^\t\n\f <&#;]{1,32});?/ R/ &(#[0-9]+|#[xX][0-9a-fA-F]+|[^\t\n\f <&#;]{1,32});?/