From 05bed8308262e767d2daee12717092d61507e88f Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Fri, 17 Feb 2023 19:14:49 +0000 Subject: [PATCH 1/7] Use dropWhileEnd from Data.List --- System/FilePath/Internal.hs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/System/FilePath/Internal.hs b/System/FilePath/Internal.hs index f0d79d83..5d5edcd8 100644 --- a/System/FilePath/Internal.hs +++ b/System/FilePath/Internal.hs @@ -122,7 +122,7 @@ import Data.String (fromString) import System.Environment(getEnv) import Prelude (String, map, FilePath, Eq, IO, id, last, init, reverse, dropWhile, null, break, takeWhile, take, all, elem, any, span) import Data.Char(toLower, toUpper, isAsciiLower, isAsciiUpper) -import Data.List(stripPrefix, isSuffixOf, uncons) +import Data.List(stripPrefix, isSuffixOf, uncons, dropWhileEnd) #define CHAR Char #define STRING String #define FILEPATH FilePath @@ -1129,11 +1129,6 @@ isAbsolute = not . isRelative #ifndef OS_PATH ----------------------------------------------------------------------------- --- dropWhileEnd (>2) [1,2,3,4,1,2,3,4] == [1,2,3,4,1,2]) --- Note that Data.List.dropWhileEnd is only available in base >= 4.5. -dropWhileEnd :: (a -> Bool) -> [a] -> [a] -dropWhileEnd p = reverse . dropWhile p . reverse - -- takeWhileEnd (>2) [1,2,3,4,1,2,3,4] == [3,4]) takeWhileEnd :: (a -> Bool) -> [a] -> [a] takeWhileEnd p = reverse . takeWhile p . reverse From ae4ad60afa88adcd75ba9364d4fdb165f1939d82 Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Fri, 17 Feb 2023 19:22:59 +0000 Subject: [PATCH 2/7] Implement spanEnd efficiently: in a single pass and without reverses --- System/FilePath/Internal.hs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/System/FilePath/Internal.hs b/System/FilePath/Internal.hs index 5d5edcd8..245f7fc0 100644 --- a/System/FilePath/Internal.hs +++ b/System/FilePath/Internal.hs @@ -120,7 +120,7 @@ import qualified Data.List as L #ifndef OS_PATH import Data.String (fromString) import System.Environment(getEnv) -import Prelude (String, map, FilePath, Eq, IO, id, last, init, reverse, dropWhile, null, break, takeWhile, take, all, elem, any, span) +import Prelude (String, map, FilePath, Eq, IO, id, last, init, reverse, dropWhile, null, break, take, all, elem, any, span) import Data.Char(toLower, toUpper, isAsciiLower, isAsciiUpper) import Data.List(stripPrefix, isSuffixOf, uncons, dropWhileEnd) #define CHAR Char @@ -1129,13 +1129,9 @@ isAbsolute = not . isRelative #ifndef OS_PATH ----------------------------------------------------------------------------- --- takeWhileEnd (>2) [1,2,3,4,1,2,3,4] == [3,4]) -takeWhileEnd :: (a -> Bool) -> [a] -> [a] -takeWhileEnd p = reverse . takeWhile p . reverse - -- spanEnd (>2) [1,2,3,4,1,2,3,4] = ([1,2,3,4,1,2], [3,4]) spanEnd :: (a -> Bool) -> [a] -> ([a], [a]) -spanEnd p xs = (dropWhileEnd p xs, takeWhileEnd p xs) +spanEnd p = L.foldr (\x (pref, suff) -> if null pref && p x then (pref, x : suff) else (x : pref, suff)) ([], []) -- breakEnd (< 2) [1,2,3,4,1,2,3,4] == ([1,2,3,4,1],[2,3,4]) breakEnd :: (a -> Bool) -> [a] -> ([a], [a]) From 6d371f1ced8855dc4f45f1ba413af4f1a6a3134c Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Fri, 17 Feb 2023 19:25:24 +0000 Subject: [PATCH 3/7] Implement unsnoc efficiently, in a single pass --- System/FilePath/Internal.hs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/System/FilePath/Internal.hs b/System/FilePath/Internal.hs index 245f7fc0..da3f7a43 100644 --- a/System/FilePath/Internal.hs +++ b/System/FilePath/Internal.hs @@ -112,6 +112,7 @@ module System.OsPath.MODULE_NAME.Internal {- HLINT ignore "Use fewer imports" -} import Prelude (Char, Bool(..), Maybe(..), (.), (&&), (<=), not, fst, maybe, (||), (==), ($), otherwise, fmap, mempty, (>=), (/=), (++), snd) +import Data.Bifunctor (first) import Data.Semigroup ((<>)) import qualified Prelude as P import Data.Maybe(isJust) @@ -129,7 +130,6 @@ import Data.List(stripPrefix, isSuffixOf, uncons, dropWhileEnd) #else import Prelude (fromIntegral) import Control.Exception ( SomeException, evaluate, try, displayException ) -import Data.Bifunctor (first) import Control.DeepSeq (force) import GHC.IO (unsafePerformIO) import qualified Data.Char as C @@ -1145,8 +1145,7 @@ stripSuffix xs ys = reverse P.<$> stripPrefix (reverse xs) (reverse ys) unsnoc :: [a] -> Maybe ([a], a) -unsnoc [] = Nothing -unsnoc xs = Just (init xs, last xs) +unsnoc = L.foldr (\x -> Just . maybe ([], x) (first (x :))) Nothing _period, _quotedbl, _backslash, _slash, _question, _U, _N, _C, _colon, _semicolon, _US, _less, _greater, _bar, _asterisk, _nul, _space, _underscore :: Char From a80bd07edb7bff6cbb0db4b52125668e88ea68d0 Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Fri, 17 Feb 2023 20:43:21 +0000 Subject: [PATCH 4/7] Speed up readDriveLetter 10x by introducing uncons2 --- System/FilePath/Internal.hs | 18 ++++++++++++------ System/OsPath/Data/ByteString/Short.hs | 16 ++++++++++++++++ .../OsPath/Data/ByteString/Short/Internal.hs | 13 ++++++++++--- System/OsPath/Data/ByteString/Short/Word16.hs | 13 +++++++++++++ 4 files changed, 51 insertions(+), 9 deletions(-) diff --git a/System/FilePath/Internal.hs b/System/FilePath/Internal.hs index da3f7a43..b9c85e99 100644 --- a/System/FilePath/Internal.hs +++ b/System/FilePath/Internal.hs @@ -506,12 +506,14 @@ readDriveUNC bs = case unpack bs of {- c:\ -} readDriveLetter :: STRING -> Maybe (FILEPATH, FILEPATH) -readDriveLetter bs = case unpack bs of - (x:c:y:xs) - | c == _colon && isLetter x && isPathSeparator y -> Just $ addSlash (pack [x,_colon]) (pack (y:xs)) - (x:c:xs) - | c == _colon && isLetter x -> Just (pack [x,_colon], pack xs) - _ -> Nothing +readDriveLetter bs = case uncons2 bs of + Nothing -> Nothing + Just (x, c, ys) + | isLetter x, c == _colon -> Just $ case uncons ys of + Just (y, _) + | isPathSeparator y -> addSlash (pack [x,_colon]) ys + _ -> (pack [x,_colon], ys) + | otherwise -> Nothing {- \\sharename\ -} readDriveShare :: STRING -> Maybe (FILEPATH, FILEPATH) @@ -1147,6 +1149,10 @@ stripSuffix xs ys = reverse P.<$> stripPrefix (reverse xs) (reverse ys) unsnoc :: [a] -> Maybe ([a], a) unsnoc = L.foldr (\x -> Just . maybe ([], x) (first (x :))) Nothing +uncons2 :: [a] -> Maybe (a, a, [a]) +uncons2 [] = Nothing +uncons2 [_] = Nothing +uncons2 (x : y : zs) = Just (x, y, zs) _period, _quotedbl, _backslash, _slash, _question, _U, _N, _C, _colon, _semicolon, _US, _less, _greater, _bar, _asterisk, _nul, _space, _underscore :: Char _period = '.' diff --git a/System/OsPath/Data/ByteString/Short.hs b/System/OsPath/Data/ByteString/Short.hs index 176ae012..f3a666ff 100644 --- a/System/OsPath/Data/ByteString/Short.hs +++ b/System/OsPath/Data/ByteString/Short.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE NoImplicitPrelude #-} -- | -- Module : System.OsPath.Data.ByteString.Short @@ -81,6 +82,7 @@ module System.OsPath.Data.ByteString.Short ( last, tail, uncons, + uncons2, head, init, unsnoc, @@ -173,3 +175,17 @@ module System.OsPath.Data.ByteString.Short ( ) where import Data.ByteString.Short.Internal +import System.OsPath.Data.ByteString.Short.Internal + +import Prelude (Maybe(..), Ord(..), Num(..), ($), otherwise) +import Data.Word (Word8) + +uncons2 :: ShortByteString -> Maybe (Word8, Word8, ShortByteString) +uncons2 = \sbs -> + let l = length sbs + nl = l - 2 + in if | l <= 1 -> Nothing + | otherwise -> let h = indexWord8Array (asBA sbs) 0 + h' = indexWord8Array (asBA sbs) 1 + t = create nl $ \mba -> copyByteArray (asBA sbs) 1 mba 0 nl + in Just (h, h', t) diff --git a/System/OsPath/Data/ByteString/Short/Internal.hs b/System/OsPath/Data/ByteString/Short/Internal.hs index b2428503..493b447b 100644 --- a/System/OsPath/Data/ByteString/Short/Internal.hs +++ b/System/OsPath/Data/ByteString/Short/Internal.hs @@ -20,6 +20,7 @@ module System.OsPath.Data.ByteString.Short.Internal where import Control.Monad.ST import Control.Exception (assert, throwIO) +import Data.Bits (Bits(..)) import Data.ByteString.Short.Internal (ShortByteString(..), length) #if !MIN_VERSION_base(4,11,0) import Data.Semigroup @@ -284,15 +285,21 @@ writeWord16Array (MBA# mba#) (I# i#) (W16# w#) = ST (\s -> case writeWord8Array# mba# (i# +# 1#) msb# s of s' -> (# s', () #)) +indexWord8Array :: BA + -> Int -- ^ Word8 index + -> Word8 +indexWord8Array (BA# ba#) (I# i#) = W8# (indexWord8Array# ba# i#) + -- | This isn't strictly Word16 array read. Instead it's two Word8 array reads -- to avoid endianness issues due to primops doing automatic alignment based -- on host platform. We expect the byte array to be LE always. indexWord16Array :: BA -> Int -- ^ Word8 index (not Word16) -> Word16 -indexWord16Array (BA# ba#) (I# i#) = - case (# indexWord8Array# ba# i#, indexWord8Array# ba# (i# +# 1#) #) of - (# lsb#, msb# #) -> W16# (decodeWord16LE# (# lsb#, msb# #)) +indexWord16Array ba i = fromIntegral lsb .|. (fromIntegral msb `shiftL` 8) + where + lsb = indexWord8Array ba i + msb = indexWord8Array ba (i + 1) #if !MIN_VERSION_base(4,16,0) diff --git a/System/OsPath/Data/ByteString/Short/Word16.hs b/System/OsPath/Data/ByteString/Short/Word16.hs index f6e591bc..9fdec459 100644 --- a/System/OsPath/Data/ByteString/Short/Word16.hs +++ b/System/OsPath/Data/ByteString/Short/Word16.hs @@ -46,6 +46,7 @@ module System.OsPath.Data.ByteString.Short.Word16 ( last, tail, uncons, + uncons2, head, init, unsnoc, @@ -260,6 +261,18 @@ uncons = \(assertEven -> sbs) -> t = create nl $ \mba -> copyByteArray (asBA sbs) 2 mba 0 nl in Just (h, t) +-- | /O(n)/ Extract first two elements and the rest of a ByteString, +-- returning Nothing if it is shorter than two elements. +uncons2 :: ShortByteString -> Maybe (Word16, Word16, ShortByteString) +uncons2 = \(assertEven -> sbs) -> + let l = BS.length sbs + nl = l - 4 + in if | l <= 2 -> Nothing + | otherwise -> let h = indexWord16Array (asBA sbs) 0 + h' = indexWord16Array (asBA sbs) 2 + t = create nl $ \mba -> copyByteArray (asBA sbs) 4 mba 0 nl + in Just (h, h', t) + -- | /O(1)/ Extract the first element of a ShortByteString, which must be at least one Word16. -- An exception will be thrown in the case of an empty ShortByteString. head :: HasCallStack => ShortByteString -> Word16 From 8a4a7f07c96b4dd952d42302aa4a20f0cc044462 Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Fri, 17 Feb 2023 21:04:57 +0000 Subject: [PATCH 5/7] Use cons x y instead of singleton x <> y --- System/FilePath/Internal.hs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/System/FilePath/Internal.hs b/System/FilePath/Internal.hs index b9c85e99..c69e5c32 100644 --- a/System/FilePath/Internal.hs +++ b/System/FilePath/Internal.hs @@ -293,7 +293,7 @@ getSearchPath = fmap splitSearchPath (getEnv "PATH") splitExtension :: FILEPATH -> (STRING, STRING) splitExtension x = if null nameDot then (x, mempty) - else (dir <> init nameDot, singleton extSeparator <> ext) + else (dir <> init nameDot, extSeparator `cons` ext) where (dir,file) = splitFileName_ x (nameDot,ext) = breakEnd isExtSeparator file @@ -358,7 +358,7 @@ addExtension file xs = case uncons xs of Just (x, _) -> joinDrive a res where res = if isExtSeparator x then b <> xs - else b <> singleton extSeparator <> xs + else b <> (extSeparator `cons` xs) (a,b) = splitDrive file @@ -383,7 +383,7 @@ isExtensionOf :: STRING -> FILEPATH -> Bool isExtensionOf ext = \fp -> case uncons ext of Just (x, _) | x == _period -> isSuffixOf ext . takeExtensions $ fp - _ -> isSuffixOf (singleton _period <> ext) . takeExtensions $ fp + _ -> isSuffixOf (_period `cons` ext) . takeExtensions $ fp -- | Drop the given extension from a FILEPATH, and the @\".\"@ preceding it. -- Returns 'Nothing' if the FILEPATH does not have the given extension, or @@ -403,7 +403,7 @@ isExtensionOf ext = \fp -> case uncons ext of -- > stripExtension "" x == Just x stripExtension :: STRING -> FILEPATH -> Maybe FILEPATH stripExtension ext path = case uncons ext of - Just (x, _) -> let dotExt = if isExtSeparator x then ext else singleton _period <> ext + Just (x, _) -> let dotExt = if isExtSeparator x then ext else _period `cons` ext in stripSuffix dotExt path Nothing -> Just path @@ -520,7 +520,7 @@ readDriveShare :: STRING -> Maybe (FILEPATH, FILEPATH) readDriveShare bs = case unpack bs of (s1:s2:xs) | isPathSeparator s1 && isPathSeparator s2 -> let (a, b) = readDriveShareName (pack xs) - in Just (singleton s1 <> singleton s2 <> a,b) + in Just (s1 `cons` (s2 `cons` a), b) _ -> Nothing {- assume you have already seen \\ -} @@ -596,7 +596,7 @@ splitFileName x = if null path else (path, file) where (path, file) = splitFileName_ x - dotSlash = singleton _period <> singleton _slash + dotSlash = _period `cons` singleton _slash -- version of splitFileName where, if the FILEPATH has no directory -- component, the returned directory is "" rather than "./". This @@ -738,7 +738,7 @@ combineAlways a b | null a = b [a1, a2] | isWindows , isLetter a1 , a2 == _colon -> a <> b - _ -> a <> singleton pathSeparator <> b + _ -> a <> (pathSeparator `cons` b) -- | Combine two paths with a path separator. @@ -1070,7 +1070,7 @@ makeValid path | isPosix = map (\x -> if x == _nul then _underscore else x) path | isJust (readDriveShare drv) && all isPathSeparator drv = take 2 drv <> fromString "drive" | isJust (readDriveUNC drv) && not (hasTrailingPathSeparator drv) = - makeValid (drv <> singleton pathSeparator <> pth) + makeValid (drv <> (pathSeparator `cons` pth)) | otherwise = joinDrive drv $ validElements $ validCHARs pth where @@ -1145,6 +1145,8 @@ breakEnd p = spanEnd (not . p) stripSuffix :: Eq a => [a] -> [a] -> Maybe [a] stripSuffix xs ys = reverse P.<$> stripPrefix (reverse xs) (reverse ys) +cons :: a -> [a] -> [a] +cons = (:) unsnoc :: [a] -> Maybe ([a], a) unsnoc = L.foldr (\x -> Just . maybe ([], x) (first (x :))) Nothing From faac262b711f73837e074951539caa926ea0e70e Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Fri, 17 Feb 2023 19:17:45 +0000 Subject: [PATCH 6/7] Speed up splitFileName --- System/FilePath/Internal.hs | 42 +++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/System/FilePath/Internal.hs b/System/FilePath/Internal.hs index c69e5c32..19d150f3 100644 --- a/System/FilePath/Internal.hs +++ b/System/FilePath/Internal.hs @@ -115,7 +115,7 @@ import Prelude (Char, Bool(..), Maybe(..), (.), (&&), (<=), not, fst, maybe, (|| import Data.Bifunctor (first) import Data.Semigroup ((<>)) import qualified Prelude as P -import Data.Maybe(isJust) +import Data.Maybe(fromMaybe, isJust) import qualified Data.List as L #ifndef OS_PATH @@ -604,11 +604,45 @@ splitFileName x = if null path -- directory to make a valid FILEPATH, and having a "./" appear would -- look strange and upset simple equality properties. See -- e.g. replaceFileName. +-- +-- A naive implementation is +-- +-- splitFileName_ fp = (drv <> dir, file) +-- where +-- (drv, pth) = splitDrive fp +-- (dir, file) = breakEnd isPathSeparator pth +-- +-- but it is undesirable for two reasons: +-- * splitDrive is very slow on Windows, +-- * we unconditionally allocate 5 FilePath objects where only 2 would normally suffice. +-- +-- In the implementation below we first speculatively split the input by the last path +-- separator. In the vast majority of cases this is already the answer, except +-- two exceptional cases explained below. +-- splitFileName_ :: FILEPATH -> (STRING, STRING) -splitFileName_ fp = (drv <> dir, file) +splitFileName_ fp + -- If dirSlash is empty, @fp@ is either a genuine filename without any dir, + -- or just a Windows drive name without slash like "c:". + -- Run readDriveLetter to figure out. + | isWindows + , null dirSlash + = fromMaybe (mempty, fp) (readDriveLetter fp) + -- Another Windows quirk is that @fp@ could have been a shared drive "\\share" + -- or UNC location "\\?\UNC\foo", where path separator is a part of the drive name. + -- We can test this by trying dropDrive and falling back to splitDrive. + | isWindows + , Just (s1, _s2, bs') <- uncons2 dirSlash + , isPathSeparator s1 + -- If bs' is empty, then s2 as the last character of dirSlash must be a path separator, + -- so we are in the middle of shared drive. + -- Otherwise, since s1 is a path separator, we might be in the middle of UNC path. + , null bs' || maybe False (null . snd) (readDriveUNC dirSlash) + = (fp, mempty) + | otherwise + = (dirSlash, file) where - (drv, pth) = splitDrive fp - (dir, file) = breakEnd isPathSeparator pth + (dirSlash, file) = breakEnd isPathSeparator fp -- | Set the filename. -- From 5370bb44a0ae6ba6c9f01c16a08b077d013cc654 Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Fri, 17 Feb 2023 21:06:15 +0000 Subject: [PATCH 7/7] Speed up splitExtension --- System/FilePath/Internal.hs | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/System/FilePath/Internal.hs b/System/FilePath/Internal.hs index 19d150f3..aff418d5 100644 --- a/System/FilePath/Internal.hs +++ b/System/FilePath/Internal.hs @@ -290,13 +290,24 @@ getSearchPath = fmap splitSearchPath (getEnv "PATH") -- > splitExtension "file.txt/boris.ext" == ("file.txt/boris",".ext") -- > splitExtension "file/path.txt.bob.fred" == ("file/path.txt.bob",".fred") -- > splitExtension "file/path.txt/" == ("file/path.txt/","") + +-- A naive implementation would be to use @splitFileName_@ first, +-- then break filename into basename and extension, then recombine dir and basename. +-- This is way too expensive, see @splitFileName_@ comment for discussion. +-- +-- Instead we speculatively split on the extension separator first, then check +-- whether results are well-formed. splitExtension :: FILEPATH -> (STRING, STRING) -splitExtension x = if null nameDot - then (x, mempty) - else (dir <> init nameDot, extSeparator `cons` ext) - where - (dir,file) = splitFileName_ x - (nameDot,ext) = breakEnd isExtSeparator file +splitExtension x + -- Imagine x = "no-dots", then nameDot = "" + | null nameDot = (x, mempty) + -- Imagine x = "\\shared.with.dots\no-dots" + | isWindows && null (dropDrive nameDot) = (x, mempty) + -- Imagine x = "dir.with.dots/no-dots" + | any isPathSeparator ext = (x, mempty) + | otherwise = (init nameDot, extSeparator `cons` ext) + where + (nameDot, ext) = breakEnd isExtSeparator x -- | Get the extension of a file, returns @\"\"@ for no extension, @.ext@ otherwise. --