io.pathnames: Add canonicalize-path.

The idea is to make a canonical representation of any path, taking into
account . and .. and unicode-prefix on Windows.

The use case is in a shell you have a current-directory and you can do
crazy commands like ``cd ../foo/bar/baz/../.././././`` and get the
canonical/shortened directory name. You can also use this word to
compare if two paths are the same.
vm-warnings
Doug Coleman 2018-07-07 11:59:59 -05:00
parent f4ac9fcfca
commit 3ac520a8ec
3 changed files with 142 additions and 8 deletions

View File

@ -8,7 +8,7 @@ io.files.types io.pathnames io.ports io.streams.c io.streams.null
io.timeouts kernel libc literals locals math math.bitwise namespaces
sequences specialized-arrays system threads tr vectors windows
windows.errors windows.handles windows.kernel32 windows.shell32
windows.time windows.types windows.winsock ;
windows.time windows.types windows.winsock splitting ;
SPECIALIZED-ARRAY: ushort
IN: io.files.windows
@ -326,11 +326,14 @@ M: windows root-directory? ( path -- ? )
[ drop f ]
} cond ;
: prepend-prefix ( string -- string' )
: prepend-unicode-prefix ( string -- string' )
dup unicode-prefix head? [
unicode-prefix prepend
] unless ;
: remove-unicode-prefix ( string -- string' )
unicode-prefix ?head drop ;
TR: normalize-separators "/" "\\" ;
<PRIVATE
@ -340,13 +343,20 @@ TR: normalize-separators "/" "\\" ;
PRIVATE>
M: windows canonicalize-path
remove-unicode-prefix canonicalize-path* ;
M: object root-path remove-unicode-prefix root-path* ;
M: object relative-path remove-unicode-prefix relative-path* ;
M: windows normalize-path ( string -- string' )
dup unc-path? [
normalize-separators
] [
absolute-path
normalize-separators
prepend-prefix
prepend-unicode-prefix
] if ;
<PRIVATE

View File

@ -1,6 +1,6 @@
USING: io.backend io.directories io.files.private io.files.temp
io.files.unique io.pathnames kernel locals math namespaces
system tools.test ;
io.files.unique io.pathnames kernel locals math multiline
namespaces sequences system tools.test ;
{ "passwd" } [ "/etc/passwd" file-name ] unit-test
{ "awk" } [ "/usr/libexec/awk/" file-name ] unit-test
@ -81,3 +81,80 @@ H{
{ t } [ "~" home [ "foo" append-path ] bi@ [ normalize-path ] same? ] unit-test
{ t } [ os windows? "~\\~/" "~/~/" ? "~" "~" append-path [ path-components ] same? ] unit-test
! Absolute paths
os windows? [
{ "c:/" } [ "c:/" canonicalize-path ] unit-test
{ "c:/" } [ "c:/." canonicalize-path ] unit-test
{ "c:/" } [ "c:/.." canonicalize-path ] unit-test
{ "c:/" } [ "c:/Users/.." canonicalize-path ] unit-test
{ "c:/" } [ "c:/Users/../" canonicalize-path ] unit-test
{ "c:/" } [ "c:/Users/../." canonicalize-path ] unit-test
{ "c:/" } [ "c:/Users/.././" canonicalize-path ] unit-test
{ "c:/" } [ "c:/Users/.././././././" canonicalize-path ] unit-test
{ "c:/" } [ "c:/Users/../././/////./././/././././//././././././." canonicalize-path ] unit-test
{ "c:/" } [ "c:/Users/../../../..////.././././././/../" canonicalize-path ] unit-test
{ "c:/Users" } [ "c:/Users/../../../Users" canonicalize-path ] unit-test
{ "c:/Users" } [ "c:/Users" canonicalize-path ] unit-test
{ "c:/Users" } [ "c:/Users/." canonicalize-path ] unit-test
{ "c:/Users\\foo\\bar" } [ "c:/Users/foo/bar" canonicalize-path ] unit-test
] [
{ "/" } [ "/" canonicalize-path ] unit-test
{ "/" } [ "/." canonicalize-path ] unit-test
{ "/" } [ "/.." canonicalize-path ] unit-test
{ "/" } [ "/Users/.." canonicalize-path ] unit-test
{ "/" } [ "/Users/../" canonicalize-path ] unit-test
{ "/" } [ "/Users/../." canonicalize-path ] unit-test
{ "/" } [ "/Users/.././" canonicalize-path ] unit-test
{ "/" } [ "/Users/.././././././" canonicalize-path ] unit-test
{ "/" } [ "/Users/../././/////./././/././././//././././././." canonicalize-path ] unit-test
{ "/" } [ "/Users/../../../..////.././././././/../" canonicalize-path ] unit-test
{ "/Users" } [ "/Users/../../../Users" canonicalize-path ] unit-test
{ "/Users" } [ "/Users" canonicalize-path ] unit-test
{ "/Users" } [ "/Users/." canonicalize-path ] unit-test
{ "/Users/foo/bar" } [ "/Users/foo/bar" canonicalize-path ] unit-test
] if
! Relative paths
{ "." } [ f canonicalize-path ] unit-test
{ "." } [ "" canonicalize-path ] unit-test
{ "." } [ "." canonicalize-path ] unit-test
{ "." } [ "./" canonicalize-path ] unit-test
{ "." } [ "./." canonicalize-path ] unit-test
{ ".." } [ ".." canonicalize-path ] unit-test
{ ".." } [ "../" canonicalize-path ] unit-test
{ ".." } [ "../." canonicalize-path ] unit-test
{ ".." } [ ".././././././//." canonicalize-path ] unit-test
{ t } [ "../.." canonicalize-path { "../.." "..\\.." } member? ] unit-test
{ t } [ "../../" canonicalize-path { "../.." "..\\.." } member? ] unit-test
{ t } [ "../.././././/./././" canonicalize-path { "../.." "..\\.." } member? ] unit-test
! Root paths
os windows? [
{ "d:\\" } [ "d:\\" root-path ] unit-test
{ "d:\\" } [ "d:\\\\\\\\//////" root-path ] unit-test
{ "c:\\" } [ "c:\\Users\\merlen" root-path ] unit-test
{ "c:\\" } [ "c:\\\\\\//Users//\\//merlen//" root-path ] unit-test
{ "d:\\" } [ "d:\\././././././/../../../" root-path ] unit-test
{ "d:\\" } [ "d:\\merlen\\dog" root-path ] unit-test
{ "d:\\" } [ "\\\\?\\d:\\" root-path ] unit-test
{ "d:\\" } [ "\\\\?\\d:\\\\\\\\//////" root-path ] unit-test
{ "c:\\" } [ "\\\\?\\c:\\Users\\merlen" root-path ] unit-test
{ "c:\\" } [ "\\\\?\\c:\\\\\\//Users//\\//merlen//" root-path ] unit-test
{ "d:\\" } [ "\\\\?\\d:\\././././././/../../../" root-path ] unit-test
{ "d:\\" } [ "\\\\?\\d:\\merlen\\dog" root-path ] unit-test
] [
{ "/" } [ "/" root-path ] unit-test
{ "/" } [ "//" root-path ] unit-test
{ "/" } [ "/Users" root-path ] unit-test
{ "/" } [ "//Users" root-path ] unit-test
{ "/" } [ "/Users/foo/bar////././." root-path ] unit-test
{ "/" } [ "/Users/foo/bar////.//../../../../../../////./." root-path ] unit-test
{ "/" } [ "/Users/////" root-path ] unit-test
] if

View File

@ -1,7 +1,7 @@
! Copyright (C) 2004, 2009 Slava Pestov, Doug Coleman.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators io.backend kernel math math.order
namespaces sequences splitting strings system ;
USING: accessors combinators io.backend io.files.windows kernel
math math.order namespaces sequences splitting strings system ;
IN: io.pathnames
SYMBOL: current-directory
@ -166,6 +166,53 @@ M: string absolute-path
M: object normalize-path ( path -- path' )
absolute-path ;
: root-path* ( path -- path' )
dup absolute-path? [
dup [ path-separator? ] find
drop 1 + head
] when ;
HOOK: root-path os ( path -- path' )
M: object root-path root-path* ;
: relative-path* ( path -- relative-path )
dup absolute-path? [
dup [ path-separator? ] find
drop 1 + tail
] when ;
HOOK: relative-path os ( path -- path' )
M: object relative-path relative-path* ;
: canonicalize-path* ( path -- path' )
[
relative-path
[ path-separator? ] split-when
[ { "." "" } member? ] reject
V{ } clone [
dup ".." = [
over empty?
[ over push ]
[ over ?last ".." = [ over push ] [ drop dup pop* ] if ] if
] [
over push
] if
] reduce
] keep dup absolute-path? [
[
[ ".." = ] trim-head
path-separator join
] dip root-path prepend-path
] [
drop path-separator join [ "." ] when-empty
] if ;
HOOK: canonicalize-path io-backend ( path -- path' )
M: object canonicalize-path canonicalize-path* ;
TUPLE: pathname string ;
C: <pathname> pathname