From ebef5a86b097a6cfbfedfab3c1f9a1f2371dec1a Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Fri, 22 Apr 2022 18:23:37 +1000 Subject: [PATCH 1/5] Bump version --- core-data/core-data.cabal | 2 +- core-data/package.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core-data/core-data.cabal b/core-data/core-data.cabal index 9ba7515a..490fe626 100644 --- a/core-data/core-data.cabal +++ b/core-data/core-data.cabal @@ -5,7 +5,7 @@ cabal-version: 1.12 -- see: https://github.com/sol/hpack name: core-data -version: 0.3.2.1 +version: 0.3.2.2 synopsis: Convenience wrappers around common data structures and encodings description: Wrappers around common data structures and encodings. . diff --git a/core-data/package.yaml b/core-data/package.yaml index 4dee5ea8..f94931c9 100644 --- a/core-data/package.yaml +++ b/core-data/package.yaml @@ -1,5 +1,5 @@ name: core-data -version: 0.3.2.1 +version: 0.3.2.2 synopsis: Convenience wrappers around common data structures and encodings description: | Wrappers around common data structures and encodings. From f2608826e9bd37d367baf7f9c70a3c7bf76b2fd9 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Sun, 24 Apr 2022 23:47:53 +1000 Subject: [PATCH 2/5] Implement uncons for Ropes --- core-text/core-text.cabal | 2 +- core-text/lib/Core/Text/Rope.hs | 21 ++++++++++++++++++++- core-text/package.yaml | 2 +- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/core-text/core-text.cabal b/core-text/core-text.cabal index 4d18939f..19d4efb7 100644 --- a/core-text/core-text.cabal +++ b/core-text/core-text.cabal @@ -5,7 +5,7 @@ cabal-version: 1.18 -- see: https://github.com/sol/hpack name: core-text -version: 0.3.6.0 +version: 0.3.7.0 synopsis: A rope type based on a finger tree over UTF-8 fragments description: A rope data type for text, built as a finger tree over UTF-8 text fragments. The package also includes utiltiy functions for breaking and diff --git a/core-text/lib/Core/Text/Rope.hs b/core-text/lib/Core/Text/Rope.hs index 761ab3a1..267e231a 100644 --- a/core-text/lib/Core/Text/Rope.hs +++ b/core-text/lib/Core/Text/Rope.hs @@ -79,6 +79,7 @@ module Core.Text.Rope ( replicateRope, replicateChar, widthRope, + unconsRope, splitRope, takeRope, insertRope, @@ -154,7 +155,7 @@ import qualified Data.Text.Short as S ( splitAt, toBuilder, toText, - unpack, + unpack, uncons ) import qualified Data.Text.Short.Unsafe as S (fromByteStringUnsafe) import GHC.Generics (Generic) @@ -332,6 +333,24 @@ nullRope (Rope x) = case F.viewl x of F.EmptyL -> True (F.:<) piece _ -> S.null piece +{- | +Read the first character from a 'Rope', assuming it's length 1 or greater, +returning 'Just' that character and the remainder of the text. Returns +'Nothing' if the input is 0 length. + +@since 0.3.7 +-} +unconsRope :: Rope -> Maybe (Char, Rope) +unconsRope text = + let x = unRope text + in case F.viewl x of + F.EmptyL -> Nothing + (F.:<) piece x' -> + case S.uncons piece of + Nothing -> Nothing + Just (c, piece') -> Just (c, Rope ((F.<|) piece' x')) + + {- | Break the text into two pieces at the specified offset. diff --git a/core-text/package.yaml b/core-text/package.yaml index 96be874a..12fa20f2 100644 --- a/core-text/package.yaml +++ b/core-text/package.yaml @@ -1,5 +1,5 @@ name: core-text -version: 0.3.6.0 +version: 0.3.7.0 synopsis: A rope type based on a finger tree over UTF-8 fragments description: | A rope data type for text, built as a finger tree over UTF-8 text From abd61c5640b7cdc1832377823a3bef8a3ce8138f Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Sun, 24 Apr 2022 23:50:00 +1000 Subject: [PATCH 3/5] Add breakRope convenience helper This may well admit a more efficient implementation, but doing this in two passes will do quite well for now. --- core-text/lib/Core/Text/Breaking.hs | 22 ++++++++++++++++++++++ core-text/lib/Core/Text/Utilities.hs | 1 + 2 files changed, 23 insertions(+) diff --git a/core-text/lib/Core/Text/Breaking.hs b/core-text/lib/Core/Text/Breaking.hs index 014094df..6f6f5070 100644 --- a/core-text/lib/Core/Text/Breaking.hs +++ b/core-text/lib/Core/Text/Breaking.hs @@ -3,6 +3,7 @@ -- This is an Internal module, hidden from Haddock module Core.Text.Breaking ( + breakRope, breakWords, breakLines, breakPieces, @@ -142,3 +143,24 @@ intoChunks predicate piece = in if trailing then intoRope chunk : emptyRope : [] else intoRope chunk : intoChunks predicate remainder' + +{- +The utilities breakPieces and its helpers above were written long before this +code. The special purpose functions above might have been written more easily +if this below had been written first, but they _are_ special cases and they're +done, so {shrug} if someone wants to unify these go right head, otherwise this +can stand as almost but not-quite repetition. +-} + +{- | +Given a piece of 'Rope' and a predicate, break the text into two pieces at the first +site where that predicate returns 'True'. + +@since 0.3.7 +-} +breakRope :: (Char -> Bool) -> Rope -> (Rope, Rope) +breakRope predicate text = + let possibleIndex = findIndexRope predicate text + in case possibleIndex of + Nothing -> (text, emptyRope) + Just i -> splitRope i text diff --git a/core-text/lib/Core/Text/Utilities.hs b/core-text/lib/Core/Text/Utilities.hs index 2aa2e3e2..febe312e 100644 --- a/core-text/lib/Core/Text/Utilities.hs +++ b/core-text/lib/Core/Text/Utilities.hs @@ -21,6 +21,7 @@ module Core.Text.Utilities ( -- * Helpers indefinite, oxford, + breakRope, breakWords, breakLines, breakPieces, From 46718497d8d46fac1df067e95c1c8d4580547701 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Sun, 24 Apr 2022 23:50:33 +1000 Subject: [PATCH 4/5] Fix escaping of JSON strings Fix (again) the escaping of JSON strings, accounting this time for TABs other control characters. This is now fully spec compliant. Closes #111. --- core-data/lib/Core/Encoding/Json.hs | 59 +++++++++++++++++------------ 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/core-data/lib/Core/Encoding/Json.hs b/core-data/lib/Core/Encoding/Json.hs index 5c9de6a1..7c4cf07c 100644 --- a/core-data/lib/Core/Encoding/Json.hs +++ b/core-data/lib/Core/Encoding/Json.hs @@ -67,6 +67,13 @@ module Core.Encoding.Json ( prettyValue, ) where +#if MIN_VERSION_aeson(2,0,1) +import qualified Data.Aeson.Key as Aeson +import qualified Data.Aeson.KeyMap as Aeson +#else +import qualified Data.HashMap.Strict as HashMap +#endif + import Core.Data.Structures (Key, Map, fromMap, intoMap) import Core.Text.Bytes (Bytes, fromBytes, intoBytes) import Core.Text.Colour ( @@ -86,21 +93,15 @@ import Core.Text.Rope ( fromRope, intoRope, singletonRope, + unconsRope, ) import Core.Text.Utilities ( Render (Token, colourize, highlight), - breakPieces, + breakRope, ) -import qualified Data.Aeson as Aeson - -#if MIN_VERSION_aeson(2,0,1) -import qualified Data.Aeson.Key as Aeson -import qualified Data.Aeson.KeyMap as Aeson -#else -import qualified Data.HashMap.Strict as HashMap -#endif - import Data.Aeson (FromJSON, Value (String)) +import qualified Data.Aeson as Aeson +import Data.Char (intToDigit) import Data.Coerce import Data.Hashable (Hashable) import qualified Data.List as List @@ -178,26 +179,34 @@ encodeToRope value = case value of closebracket = singletonRope ']' {- | -Escape any quotes or backslashes in a JsonString. +Escape any quotes, backslashes, or other possible rubbish in a 'JsonString'. -} escapeString :: Rope -> Rope escapeString text = - let text1 = escapeBackslashes text - text2 = escapeQuotes text1 - in text2 + let (before, after) = breakRope needsEscaping text + in case unconsRope after of + Nothing -> + text + Just (c, after') -> + before <> escapeCharacter c <> escapeString after' + where + needsEscaping c = + c == '\"' || c == '\\' || c < '\x20' {-# INLINEABLE escapeString #-} -escapeBackslashes :: Rope -> Rope -escapeBackslashes text = - let pieces = breakPieces (== '\\') text - in mconcat (List.intersperse "\\\\" pieces) -{-# INLINEABLE escapeBackslashes #-} - -escapeQuotes :: Rope -> Rope -escapeQuotes text = - let pieces = breakPieces (== '"') text - in mconcat (List.intersperse "\\\"" pieces) -{-# INLINEABLE escapeQuotes #-} +escapeCharacter :: Char -> Rope +escapeCharacter c = + case c of + '\"' -> "\\\"" + '\\' -> "\\\\" + '\n' -> "\\n" + '\r' -> "\\r" + '\t' -> "\\t" + _ -> + if c < '\x10' + then "\\u000" <> singletonRope (intToDigit (fromEnum c)) + else "\\u001" <> singletonRope (intToDigit ((fromEnum c) - 16)) +{-# INLINEABLE escapeCharacter #-} {- | Given an array of bytes, attempt to decode it as a JSON value. From 1dceb018d6fd0415e04c6f3330b38d2a3ee4e1b0 Mon Sep 17 00:00:00 2001 From: Andrew Cowie Date: Sun, 24 Apr 2022 23:54:36 +1000 Subject: [PATCH 5/5] Set dependency version --- core-data/core-data.cabal | 2 +- core-data/package.yaml | 2 +- core-program/lib/Core/Program/Metadata.hs | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/core-data/core-data.cabal b/core-data/core-data.cabal index 490fe626..5afdc5f8 100644 --- a/core-data/core-data.cabal +++ b/core-data/core-data.cabal @@ -46,7 +46,7 @@ library , base >=4.11 && <5 , bytestring , containers - , core-text >=0.3.4 + , core-text >=0.3.7 , hashable >=1.2 , prettyprinter >=1.6.2 , scientific diff --git a/core-data/package.yaml b/core-data/package.yaml index f94931c9..5236f871 100644 --- a/core-data/package.yaml +++ b/core-data/package.yaml @@ -33,7 +33,7 @@ dependencies: - text - unordered-containers - vector - - core-text >= 0.3.4 + - core-text >= 0.3.7 library: source-dirs: lib diff --git a/core-program/lib/Core/Program/Metadata.hs b/core-program/lib/Core/Program/Metadata.hs index d2b61320..c0a926e2 100644 --- a/core-program/lib/Core/Program/Metadata.hs +++ b/core-program/lib/Core/Program/Metadata.hs @@ -150,12 +150,12 @@ readCabalFile = runIO $ do parseCabalFile :: Bytes -> Map Rope Rope parseCabalFile contents = - let breakup = intoMap . fmap (breakRope (== ':')) . breakLines . fromBytes + let breakup = intoMap . fmap (breakRope' (== ':')) . breakLines . fromBytes in breakup contents -- this should probably be a function in Core.Text.Rope -breakRope :: (Char -> Bool) -> Rope -> (Rope, Rope) -breakRope predicate text = +breakRope' :: (Char -> Bool) -> Rope -> (Rope, Rope) +breakRope' predicate text = let pieces = take 2 (breakPieces predicate text) in case pieces of [] -> ("", "")