-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
94 additions
and
63 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
:set -isrc -itest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,31 @@ | ||
{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-} | ||
-- Consider splitting off as a separate package | ||
-- Copyright (c) 2008 Gustav Munkby | ||
-- Copyright (c) 2008 Jean-Philippe Bernardy | ||
|
||
-- | This module defines a Rope representation. | ||
|
||
-- While the representation are ByteStrings stored in a finger tree, the indices | ||
-- are actually in number of characters. | ||
|
||
-- This is currently based on utf8-string, but a couple of other packages might be | ||
-- better: text, compact-string. | ||
|
||
-- At the moment none of them has a lazy | ||
-- implementation, which forces us to always export plain Strings. | ||
-- (Utf8-string does not have a proper newtype) | ||
{-# LANGUAGE FlexibleInstances #-} | ||
{-# LANGUAGE MultiParamTypeClasses #-} | ||
{-# OPTIONS_HADDOCK show-extensions #-} | ||
|
||
-- | | ||
-- Module : Yi.OldRope | ||
-- License : GPL-2 | ||
-- Maintainer : [email protected] | ||
-- Stability : experimental | ||
-- Portability : portable | ||
-- | ||
-- This module defines a Rope representation. | ||
-- | ||
-- While the representation are ByteStrings stored in a finger tree, | ||
-- the indices are actually in number of characters. | ||
-- | ||
-- This is currently based on utf8-string, but a couple of other | ||
-- packages might be better: text, compact-string. | ||
-- | ||
-- At the moment none of them has a lazy implementation, which forces | ||
-- us to always export plain Strings. (Utf8-string does not have a | ||
-- proper newtype) | ||
-- | ||
-- __Important__: The reason this module exists is to allow | ||
-- benchmarking and behaviour checks against a new implementation. As | ||
-- of today (10th September 2014), Yi imports this module. Notably, | ||
-- this module will be going away and Yi will start using "Yi.Rope" | ||
-- instead in the near future. | ||
|
||
module Yi.OldRope ( | ||
Rope, | ||
|
@@ -38,25 +50,24 @@ module Yi.OldRope ( | |
splitAtChunkBefore | ||
) where | ||
|
||
import Prelude hiding (null, head, tail, length, take, drop, splitAt, head, tail, foldl, reverse, readFile, writeFile, concat) | ||
import qualified Data.List as L | ||
|
||
import qualified Data.ByteString.UTF8 as B | ||
import Data.Binary | ||
import Data.ByteString (ByteString) | ||
import qualified Data.ByteString as B (append, concat) | ||
import qualified Data.ByteString as Byte | ||
import Data.ByteString (ByteString) | ||
import qualified Data.ByteString.Lazy as LB (toChunks, fromChunks, null, readFile, split) | ||
import qualified Data.ByteString.Lazy as LB (toChunks, fromChunks, null, | ||
readFile, split) | ||
import qualified Data.ByteString.Lazy.UTF8 as LB | ||
|
||
import qualified Data.ByteString.UTF8 as B | ||
import Data.Char (ord) | ||
import qualified Data.FingerTree as T | ||
import Data.FingerTree hiding (null, empty, reverse, split) | ||
|
||
import Data.Binary | ||
import Data.Char (ord) | ||
import Data.Monoid | ||
import Data.String (IsString(..)) | ||
|
||
import System.IO.Cautious (writeFileL) | ||
import Data.FingerTree hiding (null, empty, reverse, split) | ||
import qualified Data.List as L | ||
import Data.Monoid | ||
import Data.String (IsString(..)) | ||
import Prelude hiding (null, head, tail, length, take, drop, splitAt, | ||
head, tail, foldl, reverse, readFile, | ||
writeFile, concat) | ||
import System.IO.Cautious (writeFileL) | ||
|
||
defaultChunkSize :: Int | ||
defaultChunkSize = 128 -- in chars! (chunkSize requires this to be <= 256) | ||
|
@@ -65,11 +76,16 @@ defaultChunkSize = 128 -- in chars! (chunkSize requires this to be <= 256) | |
-- means that the length of chunks often have to be recomputed. | ||
mkChunk :: ByteString -> Chunk | ||
mkChunk s = Chunk (fromIntegral $ B.length s) s | ||
data Chunk = Chunk { chunkSize :: {-# UNPACK #-} !Word8, fromChunk :: {-# UNPACK #-} !ByteString } | ||
deriving (Eq, Show) | ||
data Chunk = Chunk { chunkSize :: {-# UNPACK #-} !Word8 | ||
, fromChunk :: {-# UNPACK #-} !ByteString | ||
} deriving (Eq, Show) | ||
|
||
data Size = Indices {charIndex :: {-# UNPACK #-} !Int, lineIndex :: {-# UNPACK #-} !Int} -- lineIndex is lazy because we do not often want the line count. However, we need this to avoid stack overflows on large files! | ||
deriving Show | ||
data Size = Indices { charIndex :: {-# UNPACK #-} !Int | ||
, lineIndex :: {-# UNPACK #-} !Int | ||
-- ^ lineIndex is lazy because we do not often | ||
-- want the line count. However, we need this to | ||
-- avoid stack overflows on large files! | ||
} deriving Show | ||
|
||
instance Monoid Size where | ||
mempty = Indices 0 0 | ||
|
@@ -91,11 +107,15 @@ newline :: Word8 | |
newline = fromIntegral (ord '\n') | ||
|
||
instance Measured Size Chunk where | ||
measure (Chunk l s) = Indices (fromIntegral l) -- note that this is the length in characters, not bytes. | ||
(Byte.count newline s) | ||
|
||
-- | The 'Foldable' instance of 'FingerTree' only defines 'foldMap', so the 'foldr' needed for 'toList' is inefficient, | ||
-- and can cause stack overflows. So, we roll our own (somewhat inefficient) version of 'toList' to avoid this. | ||
measure (Chunk l s) = | ||
Indices (fromIntegral l) -- note that this is the length in | ||
-- characters, not bytes. | ||
(Byte.count newline s) | ||
|
||
-- | The 'Foldable' instance of 'FingerTree' only defines 'foldMap', | ||
-- so the 'foldr' needed for 'toList' is inefficient, and can cause | ||
-- stack overflows. So, we roll our own (somewhat inefficient) version | ||
-- of 'toList' to avoid this. | ||
toList :: Measured v a => FingerTree v a -> [a] | ||
toList t = case viewl t of | ||
c :< cs -> c : toList cs | ||
|
@@ -135,24 +155,27 @@ null (Rope a) = T.null a | |
empty :: Rope | ||
empty = Rope T.empty | ||
|
||
-- | Get the length of the string. (This information cached, so O(1) amortized runtime.) | ||
-- | Get the length of the string. (This information cached, so O(1) | ||
-- amortized runtime.) | ||
length :: Rope -> Int | ||
length = charIndex . measure . fromRope | ||
|
||
-- | Count the number of newlines in the strings. (This information cached, so O(1) amortized runtime.) | ||
-- | Count the number of newlines in the strings. (This information | ||
-- cached, so O(1) amortized runtime.) | ||
countNewLines :: Rope -> Int | ||
countNewLines = lineIndex . measure . fromRope | ||
|
||
-- | Append two strings by merging the two finger trees. | ||
append :: Rope -> Rope -> Rope | ||
append (Rope a) (Rope b) = Rope $ | ||
case T.viewr a of | ||
EmptyR -> b | ||
l :> Chunk len x -> case T.viewl b of | ||
EmptyL -> a | ||
Chunk len' x' :< r -> if fromIntegral len + fromIntegral len' < defaultChunkSize | ||
then l >< singleton (Chunk (len + len') (x `B.append` x')) >< r | ||
else a >< b | ||
case T.viewr a of | ||
EmptyR -> b | ||
l :> Chunk len x -> case T.viewl b of | ||
EmptyL -> a | ||
Chunk len' x' :< r -> | ||
if fromIntegral len + fromIntegral len' < defaultChunkSize | ||
then l >< singleton (Chunk (len + len') (x `B.append` x')) >< r | ||
else a >< b | ||
|
||
concat :: [Rope] -> Rope | ||
concat = L.foldl' append empty | ||
|
@@ -166,7 +189,9 @@ splitAt :: Int -> Rope -> (Rope, Rope) | |
splitAt n (Rope t) = | ||
case T.viewl c of | ||
Chunk len x :< r | n' /= 0 -> | ||
let (lx, rx) = B.splitAt n' x in (Rope $ l |> Chunk (fromIntegral n') lx, Rope $ Chunk (len - fromIntegral n') rx -| r) | ||
let (lx, rx) = B.splitAt n' x | ||
in (Rope $ l |> Chunk (fromIntegral n') lx, | ||
Rope $ Chunk (len - fromIntegral n') rx -| r) | ||
_ -> (Rope l, Rope c) | ||
where | ||
(l, c) = T.split ((> n) . charIndex) t | ||
|
@@ -199,8 +224,10 @@ split :: Word8 -> Rope -> [Rope] | |
split c = map fromLazyByteString . LB.split c . toLazyByteString | ||
|
||
cutExcess :: Int -> ByteString -> (ByteString, ByteString) | ||
cutExcess i s = let idx = gt i $ L.reverse $ Byte.elemIndices newline s | ||
in Byte.splitAt (idx+1) s -- take one extra byte to that the newline is found on the left. | ||
cutExcess i s = | ||
let idx = gt i $ L.reverse $ Byte.elemIndices newline s | ||
in Byte.splitAt (idx+1) s -- take one extra byte to that the newline | ||
-- is found on the left. | ||
where gt _ [] = Byte.length s | ||
gt 0 (x:_ ) = x | ||
gt n (_:xs) = gt (n-1) xs | ||
|
@@ -210,7 +237,6 @@ instance Binary Rope where | |
put = put . toString | ||
get = fromString `fmap` get | ||
|
||
|
||
writeFile :: FilePath -> Rope -> IO () | ||
writeFile f = writeFileL f . toLazyByteString | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,14 @@ | ||
{-# OPTIONS_HADDOCK show-extensions #-} | ||
|
||
-- | | ||
-- Module : Yi.Rope | ||
-- License : GPL-2 | ||
-- Maintainer : [email protected] | ||
-- Stability : experimental | ||
-- Portability : portable | ||
-- | ||
-- A work in progress module that aims to be a more efficient | ||
-- replacement for "Yi.OldRope". For now, please import "Yi.OldRope"! | ||
module Yi.Rope (Rope, fromString, toString, toReverseString, null, empty, | ||
Yi.Rope.take, Yi.Rope.drop, Yi.Rope.length, reverse, | ||
countNewLines, Yi.Rope.split, Yi.Rope.splitAt, | ||
|
@@ -7,7 +18,7 @@ module Yi.Rope (Rope, fromString, toString, toReverseString, null, empty, | |
|
||
import qualified Codec.Binary.UTF8.Generic as G | ||
import Data.Binary | ||
import qualified Data.ByteString.Lazy as LB (readFile, split, count, reverse) | ||
import qualified Data.ByteString.Lazy as LB (readFile, split, count) | ||
import Data.Monoid | ||
import Data.Rope | ||
import qualified Prelude as P | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters