factor/extra/punycode/punycode.factor

165 lines
3.9 KiB
Factor

! Copyright (C) 2020 John Benediktsson
! See http://factorcode.org/license.txt for BSD license
USING: ascii byte-arrays combinators kernel literals locals math
math.order sbufs sequences sequences.extras sets sorting
splitting ;
IN: punycode
<PRIVATE
<<
CONSTANT: BASE 36
CONSTANT: TMIN 1
CONSTANT: TMAX 26
CONSTANT: SKEW 38
CONSTANT: DAMP 700
CONSTANT: BIAS 72
CONSTANT: N 128
CONSTANT: DIGITS $[ "abcdefghijklmnopqrstuvwxyz0123456789" >byte-array ]
>>
: threshold ( j bias -- T )
[ BASE * ] [ - ] bi* TMIN TMAX clamp ;
:: adapt ( delta! #chars first? -- bias )
delta first? DAMP 2 ? /i delta!
delta dup #chars /i + delta!
0 [ delta $[ BASE TMIN - TMAX * 2 /i ] > ] [
delta $[ BASE TMIN - ] /i delta!
BASE +
] while BASE delta * delta SKEW + /i + ;
: segregate ( str -- base extended )
[ N < ] partition members natural-sort ;
:: find-pos ( str ch i pos -- i' pos' )
i pos 1 + str [
ch <=> {
{ +eq+ [ 1 + t ] }
{ +lt+ [ 1 + f ] }
[ drop f ]
} case
] find-from drop [ drop -1 -1 ] unless* ;
:: insertion-unsort ( str extended -- deltas )
V{ } clone :> accum
N :> oldch!
-1 :> oldi!
extended [| ch |
-1 :> i!
-1 :> pos!
str [ ch < ] count :> curlen
curlen 1 + ch oldch - * :> delta!
[
str ch i pos find-pos pos! i!
i -1 = [
f
] [
i oldi - delta + delta!
delta 1 - accum push
i oldi!
0 delta!
t
] if
] loop
ch oldch!
] each accum ;
:: encode-delta ( delta! bias -- seq )
SBUF" " clone :> accum
0 :> j!
[
j 1 + j!
j bias threshold :> T
delta T < [
f
delta
] [
t
delta T - BASE T - /mod T + swap delta!
] if DIGITS nth accum push
] loop accum ;
:: encode-deltas ( baselen deltas -- seq )
SBUF" " clone :> accum
BIAS :> bias!
deltas [| delta i |
delta bias encode-delta accum push-all
delta baselen i + 1 + i 0 = adapt bias!
] each-index accum ;
PRIVATE>
:: >punycode ( str -- punicode )
str segregate :> ( base extended )
str extended insertion-unsort :> deltas
base length deltas encode-deltas
base [ "-" rot 3append ] unless-empty "" like ;
<PRIVATE
ERROR: invalid-digit char ;
:: decode-digit ( ch -- digit )
{
{ [ ch CHAR: A CHAR: Z between? ] [ ch CHAR: A - ] }
{ [ ch CHAR: 0 CHAR: 9 between? ] [ ch CHAR: 0 26 - - ] }
[ ch invalid-digit ]
} cond ;
:: decode-delta ( extended extpos! bias -- extpos' delta )
0 :> delta!
1 :> w!
0 :> j!
[
j 1 + j!
j bias threshold :> T
extpos extended nth decode-digit :> digit
extpos 1 + extpos!
digit w * delta + delta!
BASE T - w * w!
digit T >=
] loop extpos delta ;
ERROR: invalid-character char ;
:: insertion-sort ( base extended -- base )
N :> ch!
-1 :> pos!
BIAS :> bias!
0 :> extpos!
extended length :> extlen
[ extpos extlen < ] [
extended extpos bias decode-delta :> ( newpos delta )
delta 1 + pos + pos!
pos base length 1 + /mod pos! ch + ch!
ch 0x10FFFF > [ ch invalid-character ] when
ch pos base insert-nth!
delta base length extpos 0 = adapt bias!
newpos extpos!
] while base ;
PRIVATE>
: punycode> ( punycode -- str )
CHAR: - over last-index [
! FIXME: assert all non-basic code-points
[ head >sbuf ] [ 1 + tail ] 2bi >upper
] [
SBUF" " clone swap >upper
] if* insertion-sort "" like ;
: idna> ( punycode -- str )
"." split [
"xn--" ?head [ punycode> ] when
] map "." join ;
: >idna ( str -- punycode )
"." split [
dup [ N < ] all? [
>punycode "xn--" prepend
] unless
] map "." join ;