diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e148888 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,5 @@ +# Revision history for gigaparsec + +## 0.1.0.0 -- 2023-04-14 + +* First version. Released on an unsuspecting world. diff --git a/src/Gigaparsec/Core.hs b/src/Gigaparsec/Core.hs index c495123..fe68590 100644 --- a/src/Gigaparsec/Core.hs +++ b/src/Gigaparsec/Core.hs @@ -20,19 +20,12 @@ import Data.Kind import Control.Applicative import Data.Functor.Compose -data ActionF a f = AscendF a | MatchF Char (ActionF (() -> a) f) | forall b. DescendF f (ActionF (b -> a) f) -instance Show f => Show (ActionF a f) where - show AscendF{} = "AscendF" - show (MatchF c xs) = "(MatchF " ++ show c ++ " " ++ show xs ++ ")" - show (DescendF x xs) = "(DescendF " ++ show x ++ " " ++ show xs ++ ")" -data ListF a f = Nil | Cons (a f) (ListF a f) deriving Show - data Action m a where Match :: Char -> Action m () Descend :: m a -> Action m a -- Free alternative, see https://hackage.haskell.org/package/free-5.2/docs/Control-Alternative-Free.html --- But this one is higher-order. +-- But this one is higher order. type Alt :: (Type -> Type) -> ((Type -> Type) -> Type -> Type) -> Type -> Type newtype Alt m f a = Alt [AltF m f a] @@ -53,33 +46,45 @@ newtype G n = G (Map n [AltF (Name n) Action Any]) lookupG :: forall n a. Ord n => G n -> Name n a -> [AltF (Name n) Action a] lookupG (G m) (Name n) = unsafeCoerce (m Map.! n) -compareName :: Ord n => Name n a -> Name n b -> Ordering -compareName (Name x) (Name y) = compare x y - -compareSlot :: Ord n => Slot n a -> Slot n b -> Ordering -compareSlot (Slot x1 x2 x3 _ _) (Slot y1 y2 y3 _ _) = compareName x1 y1 <> compare x2 y2 <> compare x3 y3 - -compareDescriptor :: Ord n => Descriptor n a -> Descriptor n b -> Ordering -compareDescriptor (Descriptor x1 x2 x3 _) (Descriptor y1 y2 y3 _) = compareSlot x1 y1 <> compare x2 y2 <> compare x3 y3 - data Deps n a b where Self :: Deps n a a - Dep :: Name n b -> Int -> Int -> Deps n a c -> Deps n a (b -> c) + Dep :: !(Name n b) -> !Int -> !Int -> !(Deps n a c) -> Deps n a (b -> c) deriving instance Show n => Show (Deps n a b) -data Slot n a = forall b. Slot !(Name n a) !Int !Int (Deps n a b) (AltF (Name n) Action b) -data Descriptor n a = Descriptor (Slot n a) !Int !Int String +data Slot n a = forall b. + Slot + !(Name n a) -- ^ The name of the current nonterminal + !Int -- ^ The number of the current alternative + !Int -- ^ The number of symbols that have already been processed + !(Deps n a b) -- ^ The dependencies, i.e. the (external) nonterminals that have been processed + (AltF (Name n) Action b) -- ^ The actions that still need to be done + +data Descriptor n a = + Descriptor + !(Slot n a) + !Int -- ^ The left extent + !Int -- ^ The pivot + String -- ^ The remainder of the input (to the right of the pivot) + data SomeDescriptor n = forall a. SomeDescriptor (Descriptor n a) instance Ord n => Eq (SomeDescriptor n) where SomeDescriptor x == SomeDescriptor y = compareDescriptor x y == EQ instance Ord n => Ord (SomeDescriptor n) where compare (SomeDescriptor x) (SomeDescriptor y) = compareDescriptor x y +compareName :: Ord n => Name n a -> Name n b -> Ordering +compareName (Name x) (Name y) = compare x y + +compareSlot :: Ord n => Slot n a -> Slot n b -> Ordering +compareSlot (Slot x1 x2 x3 _ _) (Slot y1 y2 y3 _ _) = compareName x1 y1 <> compare x2 y2 <> compare x3 y3 + +compareDescriptor :: Ord n => Descriptor n a -> Descriptor n b -> Ordering +compareDescriptor (Descriptor x1 x2 x3 _) (Descriptor y1 y2 y3 _) = compareSlot x1 y1 <> compare x2 y2 <> compare x3 y3 + initialDescriptor :: Name n a -> String -> Int -> AltF (Name n) Action a -> SomeDescriptor n initialDescriptor n xs i act = SomeDescriptor (Descriptor (Slot n i 0 Self act) 0 0 xs) newtype WaitForAscend n = WA (Map (n, Int) [Int -> String -> SomeDescriptor n]) -newtype WaitForDescend n = WD (Map (n, Int) [(Int, String)]) emptyWA :: WaitForAscend n emptyWA = WA Map.empty @@ -90,6 +95,8 @@ lookupWA (WA m) (Name n) k = fromMaybe [] (m Map.!? (n, k)) insertWA :: Ord n => Name n a -> Int -> (Int -> String -> SomeDescriptor n) -> WaitForAscend n -> WaitForAscend n insertWA (Name n) k f (WA m) = WA (Map.insertWith (++) (n, k) [f] m) +newtype WaitForDescend n = WD (Map (n, Int) [(Int, String)]) + emptyWD :: WaitForDescend n emptyWD = WD Map.empty @@ -102,19 +109,31 @@ insertWD (Name n) k x (WD m) = WD (Map.insertWith (++) (n, k) [x] m) parse :: forall n a. Ord n => (G n, Name n a) -> String -> Set (SomeDescriptor n) parse (g, z) xs0 = go Set.empty emptyWA emptyWD (zipWith (initialDescriptor z xs0) [0..] (lookupG g z)) where go :: Set (SomeDescriptor n) -> WaitForAscend n -> WaitForDescend n -> [SomeDescriptor n] -> Set (SomeDescriptor n) + + -- If we've already processed this descriptor then we can skip it go u wa wd (d : rs) | d `Set.member` u = go u wa wd rs + + -- If we're currently 'Match'ing a character and that character appears in the input text then we can continue go u wa wd (d@(SomeDescriptor (Descriptor (Slot x a i ds (Ap (Match c) r)) l k xs)) : rs) | c' : xs' <- xs, c == c' = go (Set.insert d u) wa wd (SomeDescriptor (Descriptor (Slot x a (i + 1) ds (($ ()) <$> r)) l (k + 1) xs') : rs) + -- otherwise we skip this descriptor | otherwise = go u wa wd rs + + -- If we're descending into a nonterminal then we check if something was already waiting for us to descend. go u wa wd (d@(SomeDescriptor (Descriptor (Slot x a i ds (Ap (Descend n) next)) l k xs)) : rs) - = go (Set.insert d u) (insertWA n k (\r xs' -> SomeDescriptor (Descriptor (Slot x a (i + 1) (Dep n k r ds) next) l r xs')) wa) wd $ concat - [ [ SomeDescriptor (Descriptor (Slot n a' 0 Self acts) k k xs) | (a', acts) <- zip [0..] (lookupG g n) ] - , [ SomeDescriptor (Descriptor (Slot x a (i + 1) (Dep n k r ds) next) l r xs') | (r, xs') <- lookupWD wd n k ] - , rs - ] + = go (Set.insert d u) (insertWA n k (\r xs' -> SomeDescriptor (Descriptor (Slot x a (i + 1) (Dep n k r ds) next) l r xs')) wa) wd $ + case lookupWD wd n k of + -- If nothing was waiting for this then we start descending by adding initial descriptors the nonterminal we are descending into. + [] -> [ SomeDescriptor (Descriptor (Slot n a' 0 Self acts) k k xs) | (a', acts) <- zip [0..] (lookupG g n) ] ++ rs + -- If something was waiting for us then we can take over where they left off. + _ -> [ SomeDescriptor (Descriptor (Slot x a (i + 1) (Dep n k r ds) next) l r xs') | (r, xs') <- lookupWD wd n k ] ++ rs + + -- If we have reached the end of a descriptor then we ascend. go u wa wd (d@(SomeDescriptor (Descriptor (Slot x _ _ _ (Pure _)) k r xs)) : rs) = go (Set.insert d u) wa (insertWD x k (r, xs) wd) ([ f r xs | f <- lookupWA wa x k ] ++ rs) + + -- If we have no more work then parsing is done! go u _ _ [] = u decode :: forall n a. (Show n, Ord n) => Set (SomeDescriptor n) -> Name n a -> Int -> Int -> [a] diff --git a/src/Gigaparsec/Syntax.hs b/src/Gigaparsec/Syntax.hs index 8e6307a..3138f05 100644 --- a/src/Gigaparsec/Syntax.hs +++ b/src/Gigaparsec/Syntax.hs @@ -2,42 +2,46 @@ {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE UndecidableInstances #-} -module Gigaparsec.Syntax where +module Gigaparsec.Syntax (Parser, reify, char, nt) where import Gigaparsec.Core import GHC.Exts (Any) -import Control.Applicative +import Control.Applicative ( Const(..), Alternative ) import Data.Reify import qualified Data.Map as Map import Unsafe.Coerce (unsafeCoerce) import System.IO.Unsafe (unsafePerformIO) +import Data.Kind ( Type ) +type Parser :: Type -> Type newtype Parser a = Parser (Alt Parser Action a) deriving (Functor, Applicative, Alternative) via Alt Parser Action +data AltListF a f = Nil | Cons (AltF (Const f) Action a) (AltListF a f) + instance MuRef (Parser a) where - type DeRef (Parser a) = ListF (ActionF Any) + type DeRef (Parser a) = AltListF Any mapDeRef :: forall f u. Applicative f => (forall b. (MuRef b, DeRef (Parser a) ~ DeRef b) => b -> f u) -> Parser a -> f (DeRef (Parser a) u) mapDeRef _ (Parser (Alt [])) = pure Nil mapDeRef f (Parser (Alt (x:xs))) = Cons <$> helper x <*> mapDeRef f (Parser (Alt xs)) where - helper :: forall b. AltF Parser Action b -> f (ActionF Any u) - helper (Pure a) = pure (AscendF (unsafeCoerce a)) - helper (Ap (Match c) r) = MatchF c <$> unsafeCoerce (helper r) - helper (Ap (Descend y) r) = DescendF <$> f y <*> unsafeCoerce (helper r) + helper :: forall b. AltF Parser Action b -> f (AltF (Const u) Action Any) + helper (Pure a) = pure (Pure (unsafeCoerce a)) + helper (Ap (Match c) r) = Ap (Match c) <$> unsafeCoerce (helper r) + helper (Ap (Descend y) r) = Ap . Descend . Const <$> f y <*> unsafeCoerce (helper r) reify :: Parser a -> (G Unique, Name Unique a) reify acts = (G (Map.fromList [ (u, f x') | (u, x') <- xs ]), Name x) where (Graph xs x) = unsafePerformIO $ reifyGraph acts - f :: ListF (ActionF Any) Unique -> [AltF (Name Unique) Action Any] + f :: AltListF Any Unique -> [AltF (Name Unique) Action Any] f Nil = [] f (Cons h t) = g h ++ f t - g :: forall a. ActionF a Unique -> [AltF (Name Unique) Action a] - g (AscendF r) = [Pure r] - g (MatchF c r) = Ap (Match c) <$> g r - g (DescendF u r) = Ap (Descend (Name u)) <$> g r + g :: forall a. AltF (Const Unique) Action a -> [AltF (Name Unique) Action a] + g (Pure r) = [Pure r] + g (Ap (Match c) r) = Ap (Match c) <$> g r + g (Ap (Descend u) r) = Ap (Descend (Name (getConst u))) <$> g r nt :: Parser a -> Parser a nt xs = Parser $ Alt [Ap (Descend xs) (pure id)]