From 53fe0806c7a1075e2d686d0a3b6fc0f974a6f35a Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 21 Jun 2020 02:49:06 +0200 Subject: [PATCH 01/13] Correct version info regarding lookupDefault deprecation (#274) --- Data/HashMap/Base.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Data/HashMap/Base.hs b/Data/HashMap/Base.hs index 310b9229..1c20cdee 100644 --- a/Data/HashMap/Base.hs +++ b/Data/HashMap/Base.hs @@ -713,7 +713,7 @@ findWithDefault def k t = case lookup k t of -- | /O(log n)/ Return the value to which the specified key is mapped, -- or the default value if this map contains no mapping for the key. -- --- DEPRECATED: lookupDefault is deprecated as of version 0.2.10, replaced +-- DEPRECATED: lookupDefault is deprecated as of version 0.2.11, replaced -- by 'findWithDefault'. lookupDefault :: (Eq k, Hashable k) => v -- ^ Default value to return. From e6598b5b6b114602b4cedcd3961efffe739aff98 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 21 Jun 2020 02:17:44 +0200 Subject: [PATCH 02/13] Documentation tweaks for alter and alterF --- Data/HashMap/Base.hs | 20 +++++++++++++------- Data/HashMap/Strict/Base.hs | 18 ++++++++++++------ 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/Data/HashMap/Base.hs b/Data/HashMap/Base.hs index 1c20cdee..007a71b1 100644 --- a/Data/HashMap/Base.hs +++ b/Data/HashMap/Base.hs @@ -1233,9 +1233,14 @@ update f = alter (>>= f) {-# INLINABLE update #-} --- | /O(log n)/ The expression (@'alter' f k map@) alters the value @x@ at @k@, or --- absence thereof. @alter@ can be used to insert, delete, or update a value in a --- map. In short : @'lookup' k ('alter' f k m) = f ('lookup' k m)@. +-- | /O(log n)/ The expression @('alter' f k map)@ alters the value @x@ at @k@, or +-- absence thereof. +-- +-- 'alter' can be used to insert, delete, or update a value in a map. In short: +-- +-- @ +-- 'lookup' k ('alter' f k m) = f ('lookup' k m) +-- @ alter :: (Eq k, Hashable k) => (Maybe v -> Maybe v) -> k -> HashMap k v -> HashMap k v -- TODO(m-renaud): Consider using specialized insert and delete for alter. alter f k m = @@ -1244,12 +1249,13 @@ alter f k m = Just v -> insert k v m {-# INLINABLE alter #-} --- | /O(log n)/ The expression (@'alterF' f k map@) alters the value @x@ at --- @k@, or absence thereof. @alterF@ can be used to insert, delete, or update --- a value in a map. +-- | /O(log n)/ The expression @('alterF' f k map)@ alters the value @x@ at +-- @k@, or absence thereof. +-- +-- 'alterF' can be used to insert, delete, or update a value in a map. -- -- Note: 'alterF' is a flipped version of the 'at' combinator from --- . +-- . -- -- @since 0.2.10 alterF :: (Functor f, Eq k, Hashable k) diff --git a/Data/HashMap/Strict/Base.hs b/Data/HashMap/Strict/Base.hs index 6cdb221d..48dced5e 100644 --- a/Data/HashMap/Strict/Base.hs +++ b/Data/HashMap/Strict/Base.hs @@ -265,9 +265,14 @@ update :: (Eq k, Hashable k) => (a -> Maybe a) -> k -> HashMap k a -> HashMap k update f = alter (>>= f) {-# INLINABLE update #-} --- | /O(log n)/ The expression (@'alter' f k map@) alters the value @x@ at @k@, or --- absence thereof. @alter@ can be used to insert, delete, or update a value in a --- map. In short : @'lookup' k ('alter' f k m) = f ('lookup' k m)@. +-- | /O(log n)/ The expression @('alter' f k map)@ alters the value @x@ at @k@, or +-- absence thereof. +-- +-- 'alter' can be used to insert, delete, or update a value in a map. In short: +-- +-- @ +-- 'lookup' k ('alter' f k m) = f ('lookup' k m) +-- @ alter :: (Eq k, Hashable k) => (Maybe v -> Maybe v) -> k -> HashMap k v -> HashMap k v alter f k m = case f (HM.lookup k m) of @@ -276,11 +281,12 @@ alter f k m = {-# INLINABLE alter #-} -- | /O(log n)/ The expression (@'alterF' f k map@) alters the value @x@ at --- @k@, or absence thereof. @alterF@ can be used to insert, delete, or update --- a value in a map. +-- @k@, or absence thereof. +-- +-- 'alterF' can be used to insert, delete, or update a value in a map. -- -- Note: 'alterF' is a flipped version of the 'at' combinator from --- . +-- . -- -- @since 0.2.10 alterF :: (Functor f, Eq k, Hashable k) From 6b80aa819a5faeeab6246f77d56049ec0bfb5c9e Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Sun, 21 Jun 2020 03:28:49 +0200 Subject: [PATCH 03/13] Remove dead code from Data.HashMap.Array (#271) * Remove unused Array.traverse{ST,IO} functions * Remove unused Array.copyM function * Remove unused Array.run2 function * Remove unused Int argument from array function * Remove copyMutableArray#, clean up imports * Remove Int parameter from marray function * Remove Array.lengthM and sizeOfSmallMutableArray# * Revert "Remove unused Array.traverse{ST,IO} functions" This reverts commit 492b3944b25d3e5e7457a8d54d5e9f5cb78d3b42. * Revert "Remove Array.lengthM and sizeOfSmallMutableArray#" This reverts commit 3b04d963414e5117e8af3c657f59cb961ed6ddfe. * Revert "Remove copyMutableArray#, clean up imports" This reverts commit 126a7d517809f3c05d851939608c1baaa19546ea. * Silence warnings regarding copyMutableArray# * Remove [m]array smart constructors * Revert "Remove unused Array.copyM function" This reverts commit 089e9e4e87df3d7f4405dbbab2b7f0c2244a7d91. --- Data/HashMap/Array.hs | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) diff --git a/Data/HashMap/Array.hs b/Data/HashMap/Array.hs index e8ba5a0d..8dc23e9e 100644 --- a/Data/HashMap/Array.hs +++ b/Data/HashMap/Array.hs @@ -36,7 +36,6 @@ module Data.HashMap.Array , unsafeThaw , unsafeSameArray , run - , run2 , copy , copyM @@ -210,11 +209,6 @@ length :: Array a -> Int length ary = I# (sizeofArray# (unArray ary)) {-# INLINE length #-} --- | Smart constructor -array :: Array# a -> Int -> Array a -array ary _n = Array ary -{-# INLINE array #-} - data MArray s a = MArray { unMArray :: !(MutableArray# s a) } @@ -223,11 +217,6 @@ lengthM :: MArray s a -> Int lengthM mary = I# (sizeofMutableArray# (unMArray mary)) {-# INLINE lengthM #-} --- | Smart constructor -marray :: MutableArray# s a -> Int -> MArray s a -marray mary _n = MArray mary -{-# INLINE marray #-} - ------------------------------------------------------------------------ instance NFData a => NFData (Array a) where @@ -249,11 +238,11 @@ rnfArray ary0 = go ary0 n0 0 -- state thread, with each element containing the specified initial -- value. new :: Int -> a -> ST s (MArray s a) -new n@(I# n#) b = +new (I# n#) b = CHECK_GT("new",n,(0 :: Int)) ST $ \s -> case newArray# n# b s of - (# s', ary #) -> (# s', marray ary n #) + (# s', ary #) -> (# s', MArray ary #) {-# INLINE new #-} new_ :: Int -> ST s (MArray s a) @@ -308,25 +297,19 @@ indexM ary _i@(I# i#) = unsafeFreeze :: MArray s a -> ST s (Array a) unsafeFreeze mary = ST $ \s -> case unsafeFreezeArray# (unMArray mary) s of - (# s', ary #) -> (# s', array ary (lengthM mary) #) + (# s', ary #) -> (# s', Array ary #) {-# INLINE unsafeFreeze #-} unsafeThaw :: Array a -> ST s (MArray s a) unsafeThaw ary = ST $ \s -> case unsafeThawArray# (unArray ary) s of - (# s', mary #) -> (# s', marray mary (length ary) #) + (# s', mary #) -> (# s', MArray mary #) {-# INLINE unsafeThaw #-} run :: (forall s . ST s (MArray s e)) -> Array e run act = runST $ act >>= unsafeFreeze {-# INLINE run #-} -run2 :: (forall s. ST s (MArray s e, a)) -> (Array e, a) -run2 k = runST (do - (marr,b) <- k - arr <- unsafeFreeze marr - return (arr,b)) - -- | Unsafely copy the elements of an array. Array bounds are not checked. copy :: Array e -> Int -> MArray s e -> Int -> Int -> ST s () copy !src !_sidx@(I# sidx#) !dst !_didx@(I# didx#) _n@(I# n#) = @@ -469,10 +452,10 @@ undefinedElem = error "Data.HashMap.Array: Undefined element" {-# NOINLINE undefinedElem #-} thaw :: Array e -> Int -> Int -> ST s (MArray s e) -thaw !ary !_o@(I# o#) !n@(I# n#) = +thaw !ary !_o@(I# o#) (I# n#) = CHECK_LE("thaw", _o + n, length ary) ST $ \ s -> case thawArray# (unArray ary) o# n# s of - (# s2, mary# #) -> (# s2, marray mary# n #) + (# s2, mary# #) -> (# s2, MArray mary# #) {-# INLINE thaw #-} -- | /O(n)/ Delete an element at the given position in this array, From 0aa1c6a4e79ccb41ee71668c1970cf13a84b0253 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 25 Jun 2020 21:01:42 +0200 Subject: [PATCH 04/13] Travis: Remove job using ghc-head (#279) The latest ghc-head version is 8.7.20181217, so this job is quite useless now. See https://launchpad.net/~hvr/+archive/ubuntu/ghc --- .travis.yml | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/.travis.yml b/.travis.yml index d372d5a2..cd76dfc0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ # This Travis job script has been generated by a script via # -# haskell-ci '--benchmarks-jobs= >=7.10' '--ghc-head' '--installed=-containers' '--installed=-binary' 'unordered-containers.cabal' +# haskell-ci '--benchmarks-jobs= >=7.10' '--installed=-containers' '--installed=-binary' 'unordered-containers.cabal' # # To regenerate the script (for example after adjusting tested-with) run # @@ -8,7 +8,7 @@ # # For more information, see https://github.com/haskell-CI/haskell-ci # -# version: 0.10.1 +# version: 0.10.2 # version: ~> 1.0 language: c @@ -57,11 +57,6 @@ jobs: - compiler: ghc-7.8.4 addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-7.8.4","cabal-install-3.2"]}} os: linux - - compiler: ghc-head - addons: {"apt":{"sources":[{"sourceline":"deb http://ppa.launchpad.net/hvr/ghc/ubuntu xenial main","key_url":"https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x063dab2bdc0b3f9fcebc378bff3aeacef6f88286"}],"packages":["ghc-head","cabal-install-head"]}} - os: linux - allow_failures: - - compiler: ghc-head before_install: - HC=$(echo "/opt/$CC/bin/ghc" | sed 's/-/\//') - WITHCOMPILER="-w $HC" @@ -80,7 +75,6 @@ before_install: - BENCH=--enable-benchmarks - if [ $HCNUMVER -lt 71000 ] ; then BENCH=--disable-benchmarks ; fi - HEADHACKAGE=false - - if [ $HCNUMVER -gt 81001 ] ; then HEADHACKAGE=true ; fi - rm -f $CABALHOME/config - | echo "verbose: normal +nowrap +markoutput" >> $CABALHOME/config @@ -98,17 +92,6 @@ before_install: echo " prefix: $CABALHOME" >> $CABALHOME/config echo "repository hackage.haskell.org" >> $CABALHOME/config echo " url: http://hackage.haskell.org/" >> $CABALHOME/config - - | - if $HEADHACKAGE; then - echo "allow-newer: $($HCPKG list --simple-output | sed -E 's/([a-zA-Z-]+)-[0-9.]+/*:\1/g')" >> $CABALHOME/config - echo "repository head.hackage.ghc.haskell.org" >> $CABALHOME/config - echo " url: https://ghc.gitlab.haskell.org/head.hackage/" >> $CABALHOME/config - echo " secure: True" >> $CABALHOME/config - echo " root-keys: 7541f32a4ccca4f97aea3b22f5e593ba2c0267546016b992dfadcd2fe944e55d" >> $CABALHOME/config - echo " 26021a13b401500c8eb2761ca95c61f2d625bfef951b939a8124ed12ecf07329" >> $CABALHOME/config - echo " f76d08be13e9a61a377a85e2fb63f4c5435d40f8feb3e12eb05905edb8cdea89" >> $CABALHOME/config - echo " key-threshold: 3" >> $CABALHOME/config - fi install: - ${CABAL} --version - echo "$(${HC} --version) [$(${HC} --print-project-git-commit-id 2> /dev/null || echo '?')]" @@ -172,5 +155,5 @@ script: - rm -f cabal.project.local - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all -# REGENDATA ("0.10.1",["--benchmarks-jobs= >=7.10","--ghc-head","--installed=-containers","--installed=-binary","unordered-containers.cabal"]) +# REGENDATA ("0.10.2",["--benchmarks-jobs= >=7.10","--installed=-containers","--installed=-binary","unordered-containers.cabal"]) # EOF From 4bf043038405f66a306ab4dac15249744766b445 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 25 Jun 2020 21:02:16 +0200 Subject: [PATCH 05/13] Remove dependency on deepseq-generics (#278) --- benchmarks/Benchmarks.hs | 7 ++----- unordered-containers.cabal | 3 +-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/benchmarks/Benchmarks.hs b/benchmarks/Benchmarks.hs index 65e7e53c..da07ec99 100644 --- a/benchmarks/Benchmarks.hs +++ b/benchmarks/Benchmarks.hs @@ -1,9 +1,8 @@ -{-# LANGUAGE CPP, DeriveGeneric, GADTs, PackageImports, RecordWildCards #-} +{-# LANGUAGE CPP, DeriveAnyClass, DeriveGeneric, GADTs, PackageImports, RecordWildCards #-} module Main where import Control.DeepSeq -import Control.DeepSeq.Generics (genericRnf) import Gauge (bench, bgroup, defaultMain, env, nf, whnf) import Data.Bits ((.&.)) import Data.Functor.Identity @@ -65,9 +64,7 @@ data Env = Env { im :: !(IM.IntMap Int), ihm :: !(IHM.Map String Int), ihmbs :: !(IHM.Map BS.ByteString Int) - } deriving Generic - -instance NFData Env where rnf = genericRnf + } deriving (Generic, NFData) setupEnv :: IO Env setupEnv = do diff --git a/unordered-containers.cabal b/unordered-containers.cabal index cdaa0825..c4646eb4 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -199,8 +199,7 @@ benchmark benchmarks bytestring, containers, gauge >= 0.2.5 && < 0.3, - deepseq >= 1.1, - deepseq-generics, + deepseq >= 1.4, hashable >= 1.0.1.1, hashmap, mtl, From 7485f5cd8a7af9a85292475d2ac0a584a0fc3a86 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Mon, 13 Jul 2020 01:18:56 +0200 Subject: [PATCH 06/13] Remove custom unsafeShift{L,R} definitions (#281) With GHC < 8.2, the standard `unsafeShift{L,R}` definitions aren't inlined properly in larger unfoldings. This results in benchmark slowdowns on the order of 1 to 6%. --- Data/HashMap/Base.hs | 5 ++--- Data/HashMap/UnsafeShift.hs | 16 ---------------- unordered-containers.cabal | 1 - 3 files changed, 2 insertions(+), 20 deletions(-) delete mode 100644 Data/HashMap/UnsafeShift.hs diff --git a/Data/HashMap/Base.hs b/Data/HashMap/Base.hs index 007a71b1..395c5fd9 100644 --- a/Data/HashMap/Base.hs +++ b/Data/HashMap/Base.hs @@ -129,7 +129,7 @@ import Data.Semigroup (Semigroup((<>))) #endif import Control.DeepSeq (NFData(rnf)) import Control.Monad.ST (ST) -import Data.Bits ((.&.), (.|.), complement, popCount) +import Data.Bits ((.&.), (.|.), complement, popCount, unsafeShiftL, unsafeShiftR) import Data.Data hiding (Typeable) import qualified Data.Foldable as Foldable #if MIN_VERSION_base(4,10,0) @@ -144,7 +144,6 @@ import qualified Data.HashMap.Array as A import qualified Data.Hashable as H import Data.Hashable (Hashable) import Data.HashMap.Unsafe (runST) -import Data.HashMap.UnsafeShift (unsafeShiftL, unsafeShiftR) import Data.HashMap.List (isPermutationBy, unorderedCompare) import Data.Typeable (Typeable) @@ -2101,7 +2100,7 @@ bitsPerSubkey :: Int bitsPerSubkey = 4 maxChildren :: Int -maxChildren = fromIntegral $ 1 `unsafeShiftL` bitsPerSubkey +maxChildren = 1 `unsafeShiftL` bitsPerSubkey subkeyMask :: Bitmap subkeyMask = 1 `unsafeShiftL` bitsPerSubkey - 1 diff --git a/Data/HashMap/UnsafeShift.hs b/Data/HashMap/UnsafeShift.hs deleted file mode 100644 index 529ba504..00000000 --- a/Data/HashMap/UnsafeShift.hs +++ /dev/null @@ -1,16 +0,0 @@ -{-# LANGUAGE MagicHash #-} - -module Data.HashMap.UnsafeShift - ( unsafeShiftL - , unsafeShiftR - ) where - -import GHC.Exts (Word(W#), Int(I#), uncheckedShiftL#, uncheckedShiftRL#) - -unsafeShiftL :: Word -> Int -> Word -unsafeShiftL (W# x#) (I# i#) = W# (x# `uncheckedShiftL#` i#) -{-# INLINE unsafeShiftL #-} - -unsafeShiftR :: Word -> Int -> Word -unsafeShiftR (W# x#) (I# i#) = W# (x# `uncheckedShiftRL#` i#) -{-# INLINE unsafeShiftR #-} diff --git a/unordered-containers.cabal b/unordered-containers.cabal index c4646eb4..aec2a713 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -46,7 +46,6 @@ library Data.HashMap.Strict.Base Data.HashMap.List Data.HashMap.Unsafe - Data.HashMap.UnsafeShift Data.HashSet.Base build-depends: From afcbc7793fa0c5eeeffeb0262a9880149c5267eb Mon Sep 17 00:00:00 2001 From: Matt Renaud Date: Mon, 20 Jul 2020 03:34:10 -0700 Subject: [PATCH 07/13] Improve HashSet API docs. (#267) - More information in introduction (basic operations, and using HashSet with custom data types) - Examples alongside function docs --- Data/HashSet.hs | 107 +++++++++++++++++++++++++++++++++++-------- Data/HashSet/Base.hs | 66 +++++++++++++++++++++----- 2 files changed, 143 insertions(+), 30 deletions(-) diff --git a/Data/HashSet.hs b/Data/HashSet.hs index a7dda9f9..43d8bba8 100644 --- a/Data/HashSet.hs +++ b/Data/HashSet.hs @@ -4,25 +4,94 @@ #endif ------------------------------------------------------------------------ --- | --- Module : Data.HashSet --- Copyright : 2011 Bryan O'Sullivan --- License : BSD-style --- Maintainer : johan.tibell@gmail.com --- Stability : provisional --- Portability : portable --- --- A set of /hashable/ values. A set cannot contain duplicate items. --- A 'HashSet' makes no guarantees as to the order of its elements. --- --- The implementation is based on /hash array mapped trie/. A --- 'HashSet' is often faster than other tree-based set types, --- especially when value comparison is expensive, as in the case of --- strings. --- --- Many operations have a average-case complexity of /O(log n)/. The --- implementation uses a large base (i.e. 16) so in practice these --- operations are constant time. +{-| +Module : Data.HashSet +Copyright : 2011 Bryan O'Sullivan +License : BSD-style +Maintainer : johan.tibell@gmail.com +Stability : provisional +Portability : portable + += Introduction + +'HashSet' allows you to store /unique/ elements, providing efficient insertion, +lookups, and deletion. A 'HashSet' makes no guarantees as to the order of its +elements. + +If you are storing sets of "Data.Int"s consider using "Data.IntSet" from the + package. + + +== Examples + +All the examples below assume @HashSet@ is imported qualified, and uses the following @dataStructures@ set. + +>>> import qualified Data.HashSet as HashSet +>>> let dataStructures = HashSet.fromList ["Set", "Map", "Graph", "Sequence"] + +=== Basic Operations + +Check membership in a set: + +>>> -- Check if "Map" and "Trie" are in the set of data structures. +>>> HashSet.member "Map" dataStructures +True +>>> HashSet.member "Trie" dataStructures +False + +Add a new entry to the set: + +>>> let moreDataStructures = HashSet.insert "Trie" dataStructures +>>> HashSet.member "Trie" moreDataStructures +> True + +Remove the @\"Graph\"@ entry from the set of data structures. + +>>> let fewerDataStructures = HashSet.delete "Graph" dataStructures +>>> HashSet.toList fewerDataStructures +["Map","Set","Sequence"] + + +Create a new set and combine it with our original set. + +>>> let unorderedDataStructures = HashSet.fromList ["HashSet", "HashMap"] +>>> HashSet.union dataStructures unorderedDataStructures +fromList ["Map","HashSet","Graph","HashMap","Set","Sequence"] + +=== Using custom data with HashSet + +To create a @HashSet@ of your custom type, the type must have instances for +'Data.Eq.Eq' and 'Data.Hashable.Hashable'. The @Hashable@ typeclass is defined in the + package, see the +documentation for information on how to make your type an instance of +@Hashable@. + +We'll start by setting up our custom data type: + +>>> :set -XDeriveGeneric +>>> import GHC.Generics (Generic) +>>> import Data.Hashable +>>> data Person = Person { name :: String, likesDogs :: Bool } deriving (Show, Eq, Generic) +>>> instance Hashable Person + +And now we'll use it! + +>>> let people = HashSet.fromList [Person "Lana" True, Person "Joe" False, Person "Simon" True] +>>> HashSet.filter likesDogs people +fromList [Person {name = "Simon", likesDogs = True},Person {name = "Lana", likesDogs = True}] + + +== Performance + +The implementation is based on /hash array mapped tries/. A +'HashSet' is often faster than other 'Data.Ord.Ord'-based set types, +especially when value comparisons are expensive, as in the case of +strings. + +Many operations have a average-case complexity of /O(log n)/. The +implementation uses a large base (i.e. 16) so in practice these +operations are constant time. +-} module Data.HashSet ( diff --git a/Data/HashSet/Base.hs b/Data/HashSet/Base.hs index ccc77729..fb2bf721 100644 --- a/Data/HashSet/Base.hs +++ b/Data/HashSet/Base.hs @@ -19,7 +19,7 @@ -- A set of /hashable/ values. A set cannot contain duplicate items. -- A 'HashSet' makes no guarantees as to the order of its elements. -- --- The implementation is based on /hash array mapped trie/. A +-- The implementation is based on /hash array mapped tries/. A -- 'HashSet' is often faster than other tree-based set types, -- especially when value comparison is expensive, as in the case of -- strings. @@ -36,10 +36,6 @@ module Data.HashSet.Base , empty , singleton - -- * Combine - , union - , unions - -- * Basic interface , null , size @@ -50,6 +46,10 @@ module Data.HashSet.Base -- * Transformations , map + -- * Combine + , union + , unions + -- * Difference and intersection , difference , intersection @@ -260,24 +260,39 @@ hashSetDataType :: DataType hashSetDataType = mkDataType "Data.HashSet.Base.HashSet" [fromListConstr] -- | /O(1)/ Construct an empty set. +-- +-- >>> HashSet.empty +-- fromList [] empty :: HashSet a empty = HashSet H.empty -- | /O(1)/ Construct a set with a single element. +-- +-- >>> HashSet.singleton 1 +-- fromList [1] singleton :: Hashable a => a -> HashSet a singleton a = HashSet (H.singleton a ()) {-# INLINABLE singleton #-} --- | /O(1)/ Convert to the equivalent 'HashMap'. +-- | /O(1)/ Convert to set to the equivalent 'HashMap' with @()@ values. +-- +-- >>> HashSet.toMap (HashSet.singleton 1) +-- fromList [(1,())] toMap :: HashSet a -> HashMap a () toMap = asMap --- | /O(1)/ Convert from the equivalent 'HashMap'. +-- | /O(1)/ Convert from the equivalent 'HashMap' with @()@ values. +-- +-- >>> HashSet.fromMap (HashMap.singleton 1 ()) +-- fromList [1] fromMap :: HashMap a () -> HashSet a fromMap = HashSet -- | /O(n)/ Produce a 'HashSet' of all the keys in the given 'HashMap'. -- +-- >>> HashSet.keysSet (HashMap.fromList [(1, "a"), (2, "b")] +-- fromList [1,2] +-- -- @since 0.2.10.0 keysSet :: HashMap k a -> HashSet k keysSet m = fromMap (() <$ m) @@ -287,8 +302,6 @@ keysSet m = fromMap (() <$ m) -- To obtain good performance, the smaller set must be presented as -- the first argument. -- --- ==== __Examples__ --- -- >>> union (fromList [1,2]) (fromList [2,3]) -- fromList [1,2,3] union :: (Eq a, Hashable a) => HashSet a -> HashSet a -> HashSet a @@ -303,17 +316,32 @@ unions = List.foldl' union empty {-# INLINE unions #-} -- | /O(1)/ Return 'True' if this set is empty, 'False' otherwise. +-- +-- >>> HashSet.null HashSet.empty +-- True +-- >>> HashSet.null (HashSet.singleton 1) +-- False null :: HashSet a -> Bool null = H.null . asMap {-# INLINE null #-} -- | /O(n)/ Return the number of elements in this set. +-- +-- >>> HashSet.size HashSet.empty +-- 0 +-- >>> HashSet.size (HashSet.fromList [1,2,3]) +-- 3 size :: HashSet a -> Int size = H.size . asMap {-# INLINE size #-} -- | /O(log n)/ Return 'True' if the given value is present in this -- set, 'False' otherwise. +-- +-- >>> HashSet.member 1 (Hashset.fromList [1,2,3]) +-- True +-- >>> HashSet.member 1 (Hashset.fromList [4,5,6]) +-- False member :: (Eq a, Hashable a) => a -> HashSet a -> Bool member a s = case H.lookup a (asMap s) of Just _ -> True @@ -321,30 +349,46 @@ member a s = case H.lookup a (asMap s) of {-# INLINABLE member #-} -- | /O(log n)/ Add the specified value to this set. +-- +-- >>> HashSet.insert 1 HashSet.empty +-- fromList [1] insert :: (Eq a, Hashable a) => a -> HashSet a -> HashSet a insert a = HashSet . H.insert a () . asMap {-# INLINABLE insert #-} --- | /O(log n)/ Remove the specified value from this set if --- present. +-- | /O(log n)/ Remove the specified value from this set if present. +-- +-- >>> HashSet.delete 1 (HashSet.fromList [1,2,3]) +-- fromList [2,3] +-- >>> HashSet.delete 1 (HashSet.fromList [4,5,6]) +-- fromList [4,5,6] delete :: (Eq a, Hashable a) => a -> HashSet a -> HashSet a delete a = HashSet . H.delete a . asMap {-# INLINABLE delete #-} -- | /O(n)/ Transform this set by applying a function to every value. -- The resulting set may be smaller than the source. +-- +-- >>> HashSet.map show (HashSet.fromList [1,2,3]) +-- HashSet.fromList ["1","2","3"] map :: (Hashable b, Eq b) => (a -> b) -> HashSet a -> HashSet b map f = fromList . List.map f . toList {-# INLINE map #-} -- | /O(n)/ Difference of two sets. Return elements of the first set -- not existing in the second. +-- +-- >>> HashSet.difference (HashSet.fromList [1,2,3]) (HashSet.fromList [2,3,4]) +-- fromList [1] difference :: (Eq a, Hashable a) => HashSet a -> HashSet a -> HashSet a difference (HashSet a) (HashSet b) = HashSet (H.difference a b) {-# INLINABLE difference #-} -- | /O(n)/ Intersection of two sets. Return elements present in both -- the first set and the second. +-- +-- >>> HashSet.intersection (HashSet.fromList [1,2,3]) (HashSet.fromList [2,3,4]) +-- fromList [2,3] intersection :: (Eq a, Hashable a) => HashSet a -> HashSet a -> HashSet a intersection (HashSet a) (HashSet b) = HashSet (H.intersection a b) {-# INLINABLE intersection #-} From 6f1a92f6cc9fe817661ba5dc56bf609a5145d683 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Mon, 20 Jul 2020 12:53:54 +0200 Subject: [PATCH 08/13] Rename and expose internal modules (#283) This also removes some "Stability" annotations from internal modules. Context: #211. --- Data/HashMap/{Base.hs => Internal.hs} | 24 +++++++++++---- Data/HashMap/{ => Internal}/Array.hs | 26 +++++++++++++---- Data/HashMap/{ => Internal}/List.hs | 19 ++++++++++-- .../{Strict/Base.hs => Internal/Strict.hs} | 29 ++++++++++++++----- Data/HashMap/{ => Internal}/Unsafe.hs | 18 ++++++++++-- Data/HashMap/Lazy.hs | 4 +-- Data/HashMap/Strict.hs | 4 +-- Data/HashSet.hs | 2 +- Data/HashSet/{Base.hs => Internal.hs} | 25 ++++++++++++---- tests/List.hs | 4 +-- unordered-containers.cabal | 15 +++++----- utils/Stats.hs | 6 ++-- 12 files changed, 128 insertions(+), 48 deletions(-) rename Data/HashMap/{Base.hs => Internal.hs} (99%) rename Data/HashMap/{ => Internal}/Array.hs (95%) rename Data/HashMap/{ => Internal}/List.hs (82%) rename Data/HashMap/{Strict/Base.hs => Internal/Strict.hs} (97%) rename Data/HashMap/{ => Internal}/Unsafe.hs (69%) rename Data/HashSet/{Base.hs => Internal.hs} (95%) diff --git a/Data/HashMap/Base.hs b/Data/HashMap/Internal.hs similarity index 99% rename from Data/HashMap/Base.hs rename to Data/HashMap/Internal.hs index 395c5fd9..656dc14c 100644 --- a/Data/HashMap/Base.hs +++ b/Data/HashMap/Internal.hs @@ -11,7 +11,19 @@ #endif {-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} -module Data.HashMap.Base +-- | = WARNING +-- +-- This module is considered __internal__. +-- +-- The Package Versioning Policy __does not apply__. +-- +-- The contents of this module may change __in any way whatsoever__ +-- and __without any warning__ between minor versions of this package. +-- +-- Authors importing this module are expected to track development +-- closely. + +module Data.HashMap.Internal ( HashMap(..) , Leaf(..) @@ -140,11 +152,11 @@ import GHC.Exts ((==#), build, reallyUnsafePtrEquality#) import Prelude hiding (filter, foldl, foldr, lookup, map, null, pred) import Text.Read hiding (step) -import qualified Data.HashMap.Array as A +import qualified Data.HashMap.Internal.Array as A import qualified Data.Hashable as H import Data.Hashable (Hashable) -import Data.HashMap.Unsafe (runST) -import Data.HashMap.List (isPermutationBy, unorderedCompare) +import Data.HashMap.Internal.Unsafe (runST) +import Data.HashMap.Internal.List (isPermutationBy, unorderedCompare) import Data.Typeable (Typeable) import GHC.Exts (isTrue#) @@ -283,7 +295,7 @@ fromListConstr :: Constr fromListConstr = mkConstr hashMapDataType "fromList" [] Prefix hashMapDataType :: DataType -hashMapDataType = mkDataType "Data.HashMap.Base.HashMap" [fromListConstr] +hashMapDataType = mkDataType "Data.HashMap.Internal.HashMap" [fromListConstr] type Hash = Word type Bitmap = Word @@ -729,7 +741,7 @@ lookupDefault def k t = findWithDefault def k t #endif (!) m k = case lookup k m of Just v -> v - Nothing -> error "Data.HashMap.Base.(!): key not found" + Nothing -> error "Data.HashMap.Internal.(!): key not found" {-# INLINABLE (!) #-} infixl 9 ! diff --git a/Data/HashMap/Array.hs b/Data/HashMap/Internal/Array.hs similarity index 95% rename from Data/HashMap/Array.hs rename to Data/HashMap/Internal/Array.hs index 8dc23e9e..d1a5307f 100644 --- a/Data/HashMap/Array.hs +++ b/Data/HashMap/Internal/Array.hs @@ -1,10 +1,24 @@ {-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, UnboxedTuples, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} --- | Zero based arrays. +-- | = WARNING +-- +-- This module is considered __internal__. +-- +-- The Package Versioning Policy __does not apply__. +-- +-- The contents of this module may change __in any way whatsoever__ +-- and __without any warning__ between minor versions of this package. +-- +-- Authors importing this module are expected to track development +-- closely. +-- +-- = Description +-- +-- Zero based arrays. -- -- Note that no bounds checking are performed. -module Data.HashMap.Array +module Data.HashMap.Internal.Array ( Array , MArray @@ -88,7 +102,7 @@ import Data.Monoid (Monoid (..)) import qualified Prelude #endif -import Data.HashMap.Unsafe (runST) +import Data.HashMap.Internal.Unsafe (runST) import Control.Monad ((>=>)) @@ -163,9 +177,9 @@ copyMutableArray# = copySmallMutableArray# -- This fugly hack is brought by GHC's apparent reluctance to deal -- with MagicHash and UnboxedTuples when inferring types. Eek! # define CHECK_BOUNDS(_func_,_len_,_k_) \ -if (_k_) < 0 || (_k_) >= (_len_) then error ("Data.HashMap.Array." ++ (_func_) ++ ": bounds error, offset " ++ show (_k_) ++ ", length " ++ show (_len_)) else +if (_k_) < 0 || (_k_) >= (_len_) then error ("Data.HashMap.Internal.Array." ++ (_func_) ++ ": bounds error, offset " ++ show (_k_) ++ ", length " ++ show (_len_)) else # define CHECK_OP(_func_,_op_,_lhs_,_rhs_) \ -if not ((_lhs_) _op_ (_rhs_)) then error ("Data.HashMap.Array." ++ (_func_) ++ ": Check failed: _lhs_ _op_ _rhs_ (" ++ show (_lhs_) ++ " vs. " ++ show (_rhs_) ++ ")") else +if not ((_lhs_) _op_ (_rhs_)) then error ("Data.HashMap.Internal.Array." ++ (_func_) ++ ": Check failed: _lhs_ _op_ _rhs_ (" ++ show (_lhs_) ++ " vs. " ++ show (_rhs_) ++ ")") else # define CHECK_GT(_func_,_lhs_,_rhs_) CHECK_OP(_func_,>,_lhs_,_rhs_) # define CHECK_LE(_func_,_lhs_,_rhs_) CHECK_OP(_func_,<=,_lhs_,_rhs_) # define CHECK_EQ(_func_,_lhs_,_rhs_) CHECK_OP(_func_,==,_lhs_,_rhs_) @@ -448,7 +462,7 @@ foldMap f = \ary0 -> case length ary0 of {-# INLINE foldMap #-} undefinedElem :: a -undefinedElem = error "Data.HashMap.Array: Undefined element" +undefinedElem = error "Data.HashMap.Internal.Array: Undefined element" {-# NOINLINE undefinedElem #-} thaw :: Array e -> Int -> Int -> ST s (MArray s e) diff --git a/Data/HashMap/List.hs b/Data/HashMap/Internal/List.hs similarity index 82% rename from Data/HashMap/List.hs rename to Data/HashMap/Internal/List.hs index c0f55aa0..cb4cfcbd 100644 --- a/Data/HashMap/List.hs +++ b/Data/HashMap/Internal/List.hs @@ -1,9 +1,24 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} --- | Extra list functions + +-- | = WARNING +-- +-- This module is considered __internal__. +-- +-- The Package Versioning Policy __does not apply__. +-- +-- The contents of this module may change __in any way whatsoever__ +-- and __without any warning__ between minor versions of this package. +-- +-- Authors importing this module are expected to track development +-- closely. +-- +-- = Description +-- +-- Extra list functions -- -- In separate module to aid testing. -module Data.HashMap.List +module Data.HashMap.Internal.List ( isPermutationBy , deleteBy , unorderedCompare diff --git a/Data/HashMap/Strict/Base.hs b/Data/HashMap/Internal/Strict.hs similarity index 97% rename from Data/HashMap/Strict/Base.hs rename to Data/HashMap/Internal/Strict.hs index 48dced5e..971ebd7e 100644 --- a/Data/HashMap/Strict/Base.hs +++ b/Data/HashMap/Internal/Strict.hs @@ -8,9 +8,22 @@ -- Copyright : 2010-2012 Johan Tibell -- License : BSD-style -- Maintainer : johan.tibell@gmail.com --- Stability : provisional -- Portability : portable -- +-- = WARNING +-- +-- This module is considered __internal__. +-- +-- The Package Versioning Policy __does not apply__. +-- +-- The contents of this module may change __in any way whatsoever__ +-- and __without any warning__ between minor versions of this package. +-- +-- Authors importing this module are expected to track development +-- closely. +-- +-- = Description +-- -- A map from /hashable/ keys to values. A map cannot contain -- duplicate keys; each key can map to at most one value. A 'HashMap' -- makes no guarantees as to the order of its elements. @@ -23,7 +36,7 @@ -- Many operations have a average-case complexity of /O(log n)/. The -- implementation uses a large base (i.e. 16) so in practice these -- operations are constant time. -module Data.HashMap.Strict.Base +module Data.HashMap.Internal.Strict ( -- * Strictness properties -- $strictness @@ -107,15 +120,15 @@ import qualified Data.List as L import Data.Hashable (Hashable) import Prelude hiding (map, lookup) -import qualified Data.HashMap.Array as A -import qualified Data.HashMap.Base as HM -import Data.HashMap.Base hiding ( +import qualified Data.HashMap.Internal.Array as A +import qualified Data.HashMap.Internal as HM +import Data.HashMap.Internal hiding ( alter, alterF, adjust, fromList, fromListWith, fromListWithKey, insert, insertWith, differenceWith, intersectionWith, intersectionWithKey, map, mapWithKey, mapMaybe, mapMaybeWithKey, singleton, update, unionWith, unionWithKey, traverseWithKey) -import Data.HashMap.Unsafe (runST) +import Data.HashMap.Internal.Unsafe (runST) #if MIN_VERSION_base(4,8,0) import Data.Functor.Identity #endif @@ -310,7 +323,7 @@ alterF f = \ !k !m -> {-# INLINABLE [0] alterF #-} #if MIN_VERSION_base(4,8,0) --- See notes in Data.HashMap.Base +-- See notes in Data.HashMap.Internal test_bottom :: a test_bottom = error "Data.HashMap.alterF internal error: hit test_bottom" @@ -322,7 +335,7 @@ impossibleAdjust = error "Data.HashMap.alterF internal error: impossible adjust" {-# RULES --- See detailed notes on alterF rules in Data.HashMap.Base. +-- See detailed notes on alterF rules in Data.HashMap.Internal. "alterFWeird" forall f. alterF f = alterFWeird (f Nothing) (f (Just test_bottom)) f diff --git a/Data/HashMap/Unsafe.hs b/Data/HashMap/Internal/Unsafe.hs similarity index 69% rename from Data/HashMap/Unsafe.hs rename to Data/HashMap/Internal/Unsafe.hs index 382d5dbf..ae56c9a6 100644 --- a/Data/HashMap/Unsafe.hs +++ b/Data/HashMap/Internal/Unsafe.hs @@ -4,7 +4,21 @@ {-# LANGUAGE MagicHash, Rank2Types, UnboxedTuples #-} #endif --- | This module exports a workaround for this bug: +-- | = WARNING +-- +-- This module is considered __internal__. +-- +-- The Package Versioning Policy __does not apply__. +-- +-- The contents of this module may change __in any way whatsoever__ +-- and __without any warning__ between minor versions of this package. +-- +-- Authors importing this module are expected to track development +-- closely. +-- +-- = Description +-- +-- This module exports a workaround for this bug: -- -- http://hackage.haskell.org/trac/ghc/ticket/5916 -- @@ -12,7 +26,7 @@ -- understand what's going on here. -- -- Code that uses this module should be compiled with -fno-full-laziness -module Data.HashMap.Unsafe +module Data.HashMap.Internal.Unsafe ( runST ) where diff --git a/Data/HashMap/Lazy.hs b/Data/HashMap/Lazy.hs index 8252c6d3..64ccde3d 100644 --- a/Data/HashMap/Lazy.hs +++ b/Data/HashMap/Lazy.hs @@ -100,8 +100,8 @@ module Data.HashMap.Lazy , HS.keysSet ) where -import Data.HashMap.Base as HM -import qualified Data.HashSet.Base as HS +import Data.HashMap.Internal as HM +import qualified Data.HashSet.Internal as HS import Prelude () -- $strictness diff --git a/Data/HashMap/Strict.hs b/Data/HashMap/Strict.hs index e94f8464..72e5d11f 100644 --- a/Data/HashMap/Strict.hs +++ b/Data/HashMap/Strict.hs @@ -99,8 +99,8 @@ module Data.HashMap.Strict , HS.keysSet ) where -import Data.HashMap.Strict.Base as HM -import qualified Data.HashSet.Base as HS +import Data.HashMap.Internal.Strict as HM +import qualified Data.HashSet.Internal as HS import Prelude () -- $strictness diff --git a/Data/HashSet.hs b/Data/HashSet.hs index 43d8bba8..ea38c700 100644 --- a/Data/HashSet.hs +++ b/Data/HashSet.hs @@ -137,5 +137,5 @@ module Data.HashSet , fromMap ) where -import Data.HashSet.Base +import Data.HashSet.Internal import Prelude () diff --git a/Data/HashSet/Base.hs b/Data/HashSet/Internal.hs similarity index 95% rename from Data/HashSet/Base.hs rename to Data/HashSet/Internal.hs index fb2bf721..2cf5fd2d 100644 --- a/Data/HashSet/Base.hs +++ b/Data/HashSet/Internal.hs @@ -9,13 +9,26 @@ ------------------------------------------------------------------------ -- | --- Module : Data.HashSet.Base +-- Module : Data.HashSet.Internal -- Copyright : 2011 Bryan O'Sullivan -- License : BSD-style -- Maintainer : johan.tibell@gmail.com --- Stability : provisional -- Portability : portable -- +-- = WARNING +-- +-- This module is considered __internal__. +-- +-- The Package Versioning Policy __does not apply__. +-- +-- The contents of this module may change __in any way whatsoever__ +-- and __without any warning__ between minor versions of this package. +-- +-- Authors importing this module are expected to track development +-- closely. +-- +-- = Description +-- -- A set of /hashable/ values. A set cannot contain duplicate items. -- A 'HashSet' makes no guarantees as to the order of its elements. -- @@ -28,7 +41,7 @@ -- implementation uses a large base (i.e. 16) so in practice these -- operations are constant time. -module Data.HashSet.Base +module Data.HashSet.Internal ( HashSet @@ -79,7 +92,7 @@ module Data.HashSet.Base import Control.DeepSeq (NFData(..)) import Data.Data hiding (Typeable) -import Data.HashMap.Base +import Data.HashMap.Internal ( HashMap, foldMapWithKey, foldlWithKey, foldrWithKey , equalKeys, equalKeys1) import Data.Hashable (Hashable(hashWithSalt)) @@ -91,7 +104,7 @@ import Data.Monoid (Monoid(..)) import GHC.Exts (build) import Prelude hiding (filter, foldr, foldl, map, null) import qualified Data.Foldable as Foldable -import qualified Data.HashMap.Base as H +import qualified Data.HashMap.Internal as H import qualified Data.List as List import Data.Typeable (Typeable) import Text.Read @@ -257,7 +270,7 @@ fromListConstr :: Constr fromListConstr = mkConstr hashSetDataType "fromList" [] Prefix hashSetDataType :: DataType -hashSetDataType = mkDataType "Data.HashSet.Base.HashSet" [fromListConstr] +hashSetDataType = mkDataType "Data.HashSet.Internal.HashSet" [fromListConstr] -- | /O(1)/ Construct an empty set. -- diff --git a/tests/List.hs b/tests/List.hs index 2bf8e0b2..f95889df 100644 --- a/tests/List.hs +++ b/tests/List.hs @@ -1,6 +1,6 @@ module Main (main) where -import Data.HashMap.List +import Data.HashMap.Internal.List import Data.List (nub, sort, sortBy) import Data.Ord (comparing) @@ -9,7 +9,7 @@ import Test.Framework.Providers.QuickCheck2 (testProperty) import Test.QuickCheck ((==>), (===), property, Property) tests :: Test -tests = testGroup "Data.HashMap.List" +tests = testGroup "Data.HashMap.Internal.List" [ testProperty "isPermutationBy" pIsPermutation , testProperty "isPermutationBy of different length" pIsPermutationDiffLength , testProperty "pUnorderedCompare" pUnorderedCompare diff --git a/unordered-containers.cabal b/unordered-containers.cabal index aec2a713..e81d8b41 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -37,16 +37,15 @@ flag debug library exposed-modules: + Data.HashMap.Internal + Data.HashMap.Internal.Array + Data.HashMap.Internal.List + Data.HashMap.Internal.Strict + Data.HashMap.Internal.Unsafe Data.HashMap.Lazy Data.HashMap.Strict Data.HashSet - other-modules: - Data.HashMap.Array - Data.HashMap.Base - Data.HashMap.Strict.Base - Data.HashMap.List - Data.HashMap.Unsafe - Data.HashSet.Base + Data.HashSet.Internal build-depends: base >= 4.7 && < 5, @@ -130,7 +129,7 @@ test-suite list-tests hs-source-dirs: tests . main-is: List.hs other-modules: - Data.HashMap.List + Data.HashMap.Internal.List type: exitcode-stdio-1.0 build-depends: diff --git a/utils/Stats.hs b/utils/Stats.hs index e0f27020..8b01ecdc 100644 --- a/utils/Stats.hs +++ b/utils/Stats.hs @@ -3,9 +3,9 @@ {-# OPTIONS_GHC -funbox-strict-fields #-} module Stats where -import qualified Data.HashMap.Array as A -import Data.HashMap.Base (HashMap(..)) -import qualified Data.HashMap.Base as HM +import qualified Data.HashMap.Internal.Array as A +import Data.HashMap.Internal (HashMap(..)) +import qualified Data.HashMap.Internal as HM import Data.Semigroup data Histogram = H { From f508e185900637da22d6fbf4f139cabd428070f4 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Tue, 21 Jul 2020 03:07:46 +0200 Subject: [PATCH 09/13] alterF: Skip deleting the key when it's already absent (#288) Fixes #287. --- Data/HashMap/Internal.hs | 2 +- Data/HashMap/Internal/Strict.hs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 656dc14c..a572ab5f 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -1282,7 +1282,7 @@ alterF f = \ !k !m -> mv = lookup' h k m in (<$> f mv) $ \fres -> case fres of - Nothing -> delete' h k m + Nothing -> maybe m (const (delete' h k m)) mv Just v' -> insert' h k v' m -- We unconditionally rewrite alterF in RULES, but we expose an diff --git a/Data/HashMap/Internal/Strict.hs b/Data/HashMap/Internal/Strict.hs index 971ebd7e..b774cf41 100644 --- a/Data/HashMap/Internal/Strict.hs +++ b/Data/HashMap/Internal/Strict.hs @@ -314,7 +314,7 @@ alterF f = \ !k !m -> mv = lookup' h k m in (<$> f mv) $ \fres -> case fres of - Nothing -> delete' h k m + Nothing -> maybe m (const (delete' h k m)) mv Just !v' -> insert' h k v' m -- We rewrite this function unconditionally in RULES, but we expose From 352591a4cc171d71c828f659288fb6e3440c28a7 Mon Sep 17 00:00:00 2001 From: Sven Keidel Date: Thu, 30 Jul 2020 14:26:05 +0200 Subject: [PATCH 10/13] Fast inclusion operation on hashmaps and hashsets (#282) * add fast subset operation * add explanation for subkey offset in lookupCont * rename subset operation for compatibility with containers API * update docs: union is not a least upper bound operator for `isSubmapOf`. * explain runtime complexity of isSubmapOf. * isSubmapOfBy: move `Empty` case to top * isSubmapOfBy: fix comments * isSubsetOf: add example * isSubmapOf: quickcheck test for compatibility with containers * isSubmapOf: use arbitrary instance of HashMap * isSubmapOf: fix comments again * isSubmapOf: update doc for runtime complexity * remove mathematical symbols from user doc * add difference subset quickcheck property * add `all` function for arrays * fix comments in `isSubmapOf` * fix wrong runtime complexity of set inclusion * delete unused property * fix error in `isSubmapOf` based on wrong assumption * add benchmarks * change a few recursive `isSubmap` cases to `False` * add strictness annotations * make isSubmapOf and isSubmapOfBy INLINABLE --- Data/HashMap/Internal.hs | 141 ++++++++++++++++++++++++++++++-- Data/HashMap/Internal/Array.hs | 10 ++- Data/HashMap/Internal/Strict.hs | 2 + Data/HashMap/Lazy.hs | 2 + Data/HashMap/Strict.hs | 2 + Data/HashSet.hs | 1 + Data/HashSet/Internal.hs | 13 +++ benchmarks/Benchmarks.hs | 82 +++++++++++++++---- tests/HashMapProperties.hs | 58 ++++++++++++- 9 files changed, 282 insertions(+), 29 deletions(-) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index a572ab5f..0634e54d 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -49,6 +49,8 @@ module Data.HashMap.Internal , update , alter , alterF + , isSubmapOf + , isSubmapOfBy -- * Combine -- ** Union @@ -148,7 +150,7 @@ import qualified Data.Foldable as Foldable import Data.Bifoldable #endif import qualified Data.List as L -import GHC.Exts ((==#), build, reallyUnsafePtrEquality#) +import GHC.Exts ((==#), build, reallyUnsafePtrEquality#, inline) import Prelude hiding (filter, foldl, foldr, lookup, map, null, pred) import Text.Read hiding (step) @@ -590,12 +592,12 @@ lookup k m = case lookup# k m of {-# INLINE lookup #-} lookup# :: (Eq k, Hashable k) => k -> HashMap k v -> (# (# #) | v #) -lookup# k m = lookupCont (\_ -> (# (# #) | #)) (\v _i -> (# | v #)) (hash k) k m +lookup# k m = lookupCont (\_ -> (# (# #) | #)) (\v _i -> (# | v #)) (hash k) k 0 m {-# INLINABLE lookup# #-} #else -lookup k m = lookupCont (\_ -> Nothing) (\v _i -> Just v) (hash k) k m +lookup k m = lookupCont (\_ -> Nothing) (\v _i -> Just v) (hash k) k 0 m {-# INLINABLE lookup #-} #endif @@ -614,7 +616,7 @@ lookup' h k m = case lookupRecordCollision# h k m of (# | (# a, _i #) #) -> Just a {-# INLINE lookup' #-} #else -lookup' h k m = lookupCont (\_ -> Nothing) (\v _i -> Just v) h k m +lookup' h k m = lookupCont (\_ -> Nothing) (\v _i -> Just v) h k 0 m {-# INLINABLE lookup' #-} #endif @@ -649,13 +651,13 @@ lookupRecordCollision h k m = case lookupRecordCollision# h k m of -- into lookupCont because inlining takes care of that. lookupRecordCollision# :: Eq k => Hash -> k -> HashMap k v -> (# (# #) | (# v, Int# #) #) lookupRecordCollision# h k m = - lookupCont (\_ -> (# (# #) | #)) (\v (I# i) -> (# | (# v, i #) #)) h k m + lookupCont (\_ -> (# (# #) | #)) (\v (I# i) -> (# | (# v, i #) #)) h k 0 m -- INLINABLE to specialize to the Eq instance. {-# INLINABLE lookupRecordCollision# #-} #else /* GHC < 8.2 so there are no unboxed sums */ -lookupRecordCollision h k m = lookupCont (\_ -> Absent) Present h k m +lookupRecordCollision h k m = lookupCont (\_ -> Absent) Present h k 0 m {-# INLINABLE lookupRecordCollision #-} #endif @@ -667,6 +669,10 @@ lookupRecordCollision h k m = lookupCont (\_ -> Absent) Present h k m -- so we can be representation-polymorphic in the result type. Since -- this whole thing is always inlined, we don't have to worry about -- any extra CPS overhead. +-- +-- The @Int@ argument is the offset of the subkey in the hash. When looking up +-- keys at the top-level of a hashmap, the offset should be 0. When looking up +-- keys at level @n@ of a hashmap, the offset should be @n * bitsPerSubkey@. lookupCont :: #if __GLASGOW_HASKELL__ >= 802 forall rep (r :: TYPE rep) k v. @@ -677,8 +683,10 @@ lookupCont :: => ((# #) -> r) -- Absent continuation -> (v -> Int -> r) -- Present continuation -> Hash -- The hash of the key - -> k -> HashMap k v -> r -lookupCont absent present !h0 !k0 !m0 = go h0 k0 0 m0 + -> k + -> Int -- The offset of the subkey in the hash. + -> HashMap k v -> r +lookupCont absent present !h0 !k0 !s0 !m0 = go h0 k0 s0 m0 where go :: Eq k => Hash -> k -> Int -> HashMap k v -> r go !_ !_ !_ Empty = absent (# #) @@ -1409,6 +1417,116 @@ alterFEager f !k m = (<$> f mv) $ \fres -> {-# INLINABLE alterFEager #-} #endif +-- | /O(n*log m)/ Inclusion of maps. A map is included in another map if the keys +-- are subsets and the corresponding values are equal: +-- +-- > isSubmapOf m1 m2 = keys m1 `isSubsetOf` keys m2 && +-- > and [ v1 == v2 | (k1,v1) <- toList m1; let v2 = m2 ! k1 ] +-- +-- ==== __Examples__ +-- +-- >>> fromList [(1,'a')] `isSubmapOf` fromList [(1,'a'),(2,'b')] +-- True +-- +-- >>> fromList [(1,'a'),(2,'b')] `isSubmapOf` fromList [(1,'a')] +-- False +isSubmapOf :: (Eq k, Hashable k, Eq v) => HashMap k v -> HashMap k v -> Bool +isSubmapOf = (inline isSubmapOfBy) (==) +{-# INLINABLE isSubmapOf #-} + +-- | /O(n*log m)/ Inclusion of maps with value comparison. A map is included in +-- another map if the keys are subsets and if the comparison function is true +-- for the corresponding values: +-- +-- > isSubmapOfBy cmpV m1 m2 = keys m1 `isSubsetOf` keys m2 && +-- > and [ v1 `cmpV` v2 | (k1,v1) <- toList m1; let v2 = m2 ! k1 ] +-- +-- ==== __Examples__ +-- +-- >>> isSubmapOfBy (<=) (fromList [(1,'a')]) (fromList [(1,'b'),(2,'c')]) +-- True +-- +-- >>> isSubmapOfBy (<=) (fromList [(1,'b')]) (fromList [(1,'a'),(2,'c')]) +-- False +isSubmapOfBy :: (Eq k, Hashable k) => (v1 -> v2 -> Bool) -> HashMap k v1 -> HashMap k v2 -> Bool +-- For maps without collisions the complexity is O(n*log m), where n is the size +-- of m1 and m the size of m2: the inclusion operation visits every leaf in m1 at least once. +-- For each leaf in m1, it looks up the key in m2. +-- +-- The worst case complexity is O(n*m). The worst case is when both hashmaps m1 +-- and m2 are collision nodes for the same hash. Since collision nodes are +-- unsorted arrays, it requires for every key in m1 a linear search to to find a +-- matching key in m2, hence O(n*m). +isSubmapOfBy comp !m1 !m2 = go 0 m1 m2 + where + -- An empty map is always a submap of any other map. + go _ Empty _ = True + + -- If the second map is empty and the first is not, it cannot be a submap. + go _ _ Empty = False + + -- If the first map contains only one entry, lookup the key in the second map. + go s (Leaf h1 (L k1 v1)) t2 = lookupCont (\_ -> False) (\v2 _ -> comp v1 v2) h1 k1 s t2 + + -- In this case, we need to check that for each x in ls1, there is a y in + -- ls2 such that x `comp` y. This is the worst case complexity-wise since it + -- requires a O(m*n) check. + go _ (Collision h1 ls1) (Collision h2 ls2) = + h1 == h2 && subsetArray comp ls1 ls2 + + -- In this case, we only need to check the entries in ls2 with the hash h1. + go s t1@(Collision h1 _) (BitmapIndexed b ls2) + | b .&. m == 0 = False + | otherwise = + go (s+bitsPerSubkey) t1 (A.index ls2 (sparseIndex b m)) + where m = mask h1 s + + -- Similar to the previous case we need to traverse l2 at the index for the hash h1. + go s t1@(Collision h1 _) (Full ls2) = + go (s+bitsPerSubkey) t1 (A.index ls2 (index h1 s)) + + -- In cases where the first and second map are BitmapIndexed or Full, + -- traverse down the tree at the appropriate indices. + go s (BitmapIndexed b1 ls1) (BitmapIndexed b2 ls2) = + submapBitmapIndexed (go (s+bitsPerSubkey)) b1 ls1 b2 ls2 + go s (BitmapIndexed b1 ls1) (Full ls2) = + submapBitmapIndexed (go (s+bitsPerSubkey)) b1 ls1 fullNodeMask ls2 + go s (Full ls1) (Full ls2) = + submapBitmapIndexed (go (s+bitsPerSubkey)) fullNodeMask ls1 fullNodeMask ls2 + + -- Collision and Full nodes always contain at least two entries. Hence it + -- cannot be a map of a leaf. + go _ (Collision {}) (Leaf {}) = False + go _ (BitmapIndexed {}) (Leaf {}) = False + go _ (Full {}) (Leaf {}) = False + go _ (BitmapIndexed {}) (Collision {}) = False + go _ (Full {}) (Collision {}) = False + go _ (Full {}) (BitmapIndexed {}) = False +{-# INLINABLE isSubmapOfBy #-} + +-- | /O(min n m))/ Checks if a bitmap indexed node is a submap of another. +submapBitmapIndexed :: (HashMap k v1 -> HashMap k v2 -> Bool) -> Bitmap -> A.Array (HashMap k v1) -> Bitmap -> A.Array (HashMap k v2) -> Bool +submapBitmapIndexed comp !b1 !ary1 !b2 !ary2 = subsetBitmaps && go 0 0 (b1Orb2 .&. negate b1Orb2) + where + go :: Int -> Int -> Bitmap -> Bool + go !i !j !m + | m > b1Orb2 = True + + -- In case a key is both in ary1 and ary2, check ary1[i] <= ary2[j] and + -- increment the indices i and j. + | b1Andb2 .&. m /= 0 = comp (A.index ary1 i) (A.index ary2 j) && + go (i+1) (j+1) (m `unsafeShiftL` 1) + + -- In case a key occurs in ary1, but not ary2, only increment index j. + | b2 .&. m /= 0 = go i (j+1) (m `unsafeShiftL` 1) + + -- In case a key neither occurs in ary1 nor ary2, continue. + | otherwise = go i j (m `unsafeShiftL` 1) + + b1Andb2 = b1 .&. b2 + b1Orb2 = b1 .|. b2 + subsetBitmaps = b1Orb2 == b2 +{-# INLINABLE submapBitmapIndexed #-} ------------------------------------------------------------------------ -- * Combine @@ -2076,6 +2194,13 @@ updateOrConcatWithKey f ary1 ary2 = A.run $ do return mary {-# INLINABLE updateOrConcatWithKey #-} +-- | /O(n*m)/ Check if the first array is a subset of the second array. +subsetArray :: Eq k => (v1 -> v2 -> Bool) -> A.Array (Leaf k v1) -> A.Array (Leaf k v2) -> Bool +subsetArray cmpV ary1 ary2 = A.length ary1 <= A.length ary2 && A.all inAry2 ary1 + where + inAry2 (L k1 v1) = lookupInArrayCont (\_ -> False) (\v2 _ -> cmpV v1 v2) k1 ary2 + {-# INLINE inAry2 #-} + ------------------------------------------------------------------------ -- Manually unrolled loops diff --git a/Data/HashMap/Internal/Array.hs b/Data/HashMap/Internal/Array.hs index d1a5307f..8dc33607 100644 --- a/Data/HashMap/Internal/Array.hs +++ b/Data/HashMap/Internal/Array.hs @@ -59,6 +59,7 @@ module Data.HashMap.Internal.Array , foldr , foldr' , foldMap + , all , thaw , map @@ -79,9 +80,9 @@ import GHC.ST (ST(..)) import Control.Monad.ST (stToIO) #if __GLASGOW_HASKELL__ >= 709 -import Prelude hiding (filter, foldMap, foldr, foldl, length, map, read, traverse) +import Prelude hiding (filter, foldMap, foldr, foldl, length, map, read, traverse, all) #else -import Prelude hiding (filter, foldr, foldl, length, map, read) +import Prelude hiding (filter, foldr, foldl, length, map, read, all) #endif #if __GLASGOW_HASKELL__ >= 710 @@ -461,6 +462,11 @@ foldMap f = \ary0 -> case length ary0 of in go 0 {-# INLINE foldMap #-} +-- | Verifies that a predicate holds for all elements of an array. +all :: (a -> Bool) -> Array a -> Bool +all p = foldr (\a acc -> p a && acc) True +{-# INLINE all #-} + undefinedElem :: a undefinedElem = error "Data.HashMap.Internal.Array: Undefined element" {-# NOINLINE undefinedElem #-} diff --git a/Data/HashMap/Internal/Strict.hs b/Data/HashMap/Internal/Strict.hs index b774cf41..cd606a71 100644 --- a/Data/HashMap/Internal/Strict.hs +++ b/Data/HashMap/Internal/Strict.hs @@ -63,6 +63,8 @@ module Data.HashMap.Internal.Strict , update , alter , alterF + , isSubmapOf + , isSubmapOfBy -- * Combine -- ** Union diff --git a/Data/HashMap/Lazy.hs b/Data/HashMap/Lazy.hs index 64ccde3d..1fe36b61 100644 --- a/Data/HashMap/Lazy.hs +++ b/Data/HashMap/Lazy.hs @@ -49,6 +49,8 @@ module Data.HashMap.Lazy , update , alter , alterF + , isSubmapOf + , isSubmapOfBy -- * Combine -- ** Union diff --git a/Data/HashMap/Strict.hs b/Data/HashMap/Strict.hs index 72e5d11f..1b29423f 100644 --- a/Data/HashMap/Strict.hs +++ b/Data/HashMap/Strict.hs @@ -48,6 +48,8 @@ module Data.HashMap.Strict , update , alter , alterF + , isSubmapOf + , isSubmapOfBy -- * Combine -- ** Union diff --git a/Data/HashSet.hs b/Data/HashSet.hs index ea38c700..9a316dcb 100644 --- a/Data/HashSet.hs +++ b/Data/HashSet.hs @@ -111,6 +111,7 @@ module Data.HashSet , member , insert , delete + , isSubsetOf -- * Transformations , map diff --git a/Data/HashSet/Internal.hs b/Data/HashSet/Internal.hs index 2cf5fd2d..c429e839 100644 --- a/Data/HashSet/Internal.hs +++ b/Data/HashSet/Internal.hs @@ -55,6 +55,7 @@ module Data.HashSet.Internal , member , insert , delete + , isSubsetOf -- * Transformations , map @@ -310,6 +311,18 @@ fromMap = HashSet keysSet :: HashMap k a -> HashSet k keysSet m = fromMap (() <$ m) +-- | /O(n*log m)/ Inclusion of sets. +-- +-- ==== __Examples__ +-- +-- >>> fromList [1,3] `isSubsetOf` fromList [1,2,3] +-- True +-- +-- >>> fromList [1,2] `isSubsetOf` fromList [1,3] +-- False +isSubsetOf :: (Eq a, Hashable a) => HashSet a -> HashSet a -> Bool +isSubsetOf s1 s2 = H.isSubmapOfBy (\_ _ -> True) (asMap s1) (asMap s2) + -- | /O(n+m)/ Construct a set containing all elements from both sets. -- -- To obtain good performance, the smaller set must be presented as diff --git a/benchmarks/Benchmarks.hs b/benchmarks/Benchmarks.hs index da07ec99..0d3cbedc 100644 --- a/benchmarks/Benchmarks.hs +++ b/benchmarks/Benchmarks.hs @@ -55,15 +55,23 @@ data Env = Env { elemsDupBS :: ![(BS.ByteString, Int)], elemsDupI :: ![(Int, Int)], - hm :: !(HM.HashMap String Int), - hmbs :: !(HM.HashMap BS.ByteString Int), - hmi :: !(HM.HashMap Int Int), - hmi2 :: !(HM.HashMap Int Int), - m :: !(M.Map String Int), - mbs :: !(M.Map BS.ByteString Int), - im :: !(IM.IntMap Int), - ihm :: !(IHM.Map String Int), - ihmbs :: !(IHM.Map BS.ByteString Int) + hm :: !(HM.HashMap String Int), + hmSubset :: !(HM.HashMap String Int), + hmbs :: !(HM.HashMap BS.ByteString Int), + hmbsSubset :: !(HM.HashMap BS.ByteString Int), + hmi :: !(HM.HashMap Int Int), + hmiSubset :: !(HM.HashMap Int Int), + hmi2 :: !(HM.HashMap Int Int), + m :: !(M.Map String Int), + mSubset :: !(M.Map String Int), + mbs :: !(M.Map BS.ByteString Int), + mbsSubset :: !(M.Map BS.ByteString Int), + im :: !(IM.IntMap Int), + imSubset :: !(IM.IntMap Int), + ihm :: !(IHM.Map String Int), + ihmSubset :: !(IHM.Map String Int), + ihmbs :: !(IHM.Map BS.ByteString Int), + ihmbsSubset :: !(IHM.Map BS.ByteString Int) } deriving (Generic, NFData) setupEnv :: IO Env @@ -89,16 +97,29 @@ setupEnv = do elemsDupBS = zip keysDupBS [1..n] elemsDupI = zip keysDupI [1..n] - hm = HM.fromList elems - hmbs = HM.fromList elemsBS - hmi = HM.fromList elemsI - hmi2 = HM.fromList elemsI2 - m = M.fromList elems - mbs = M.fromList elemsBS - im = IM.fromList elemsI - ihm = IHM.fromList elems - ihmbs = IHM.fromList elemsBS + hm = HM.fromList elems + hmSubset = HM.fromList (takeSubset n elems) + hmbs = HM.fromList elemsBS + hmbsSubset = HM.fromList (takeSubset n elemsBS) + hmi = HM.fromList elemsI + hmiSubset = HM.fromList (takeSubset n elemsI) + hmi2 = HM.fromList elemsI2 + m = M.fromList elems + mSubset = M.fromList (takeSubset n elems) + mbs = M.fromList elemsBS + mbsSubset = M.fromList (takeSubset n elemsBS) + im = IM.fromList elemsI + imSubset = IM.fromList (takeSubset n elemsI) + ihm = IHM.fromList elems + ihmSubset = IHM.fromList (takeSubset n elems) + ihmbs = IHM.fromList elemsBS + ihmbsSubset = IHM.fromList (takeSubset n elemsBS) return Env{..} + where + takeSubset n elements = + -- use 50% of the elements for a subset check. + let subsetSize = round (fromIntegral n * 0.5 :: Double) :: Int + in take subsetSize elements main :: IO () main = do @@ -140,6 +161,10 @@ main = do [ bench "String" $ whnf M.fromList elems , bench "ByteString" $ whnf M.fromList elemsBS ] + , bgroup "isSubmapOf" + [ bench "String" $ whnf (M.isSubmapOf mSubset) m + , bench "ByteString" $ whnf (M.isSubmapOf mbsSubset) mbs + ] ] -- ** Map from the hashmap package @@ -177,6 +202,10 @@ main = do [ bench "String" $ whnf IHM.fromList elems , bench "ByteString" $ whnf IHM.fromList elemsBS ] + , bgroup "isSubmapOf" + [ bench "String" $ whnf (IHM.isSubmapOf ihmSubset) ihm + , bench "ByteString" $ whnf (IHM.isSubmapOf ihmbsSubset) ihmbs + ] , bgroup "hash" [ bench "String" $ whnf hash hm , bench "ByteString" $ whnf hash hmbs @@ -194,6 +223,7 @@ main = do , bench "delete-miss" $ whnf (deleteIM keysI') im , bench "size" $ whnf IM.size im , bench "fromList" $ whnf IM.fromList elemsI + , bench "isSubmapOf" $ whnf (IM.isSubmapOf imSubset) im ] , env setupEnv $ \ ~(Env{..}) -> @@ -269,6 +299,16 @@ main = do , bench "ByteString" $ whnf (alterFDelete keysBS') hmbs , bench "Int" $ whnf (alterFDelete keysI') hmi ] + , bgroup "isSubmapOf" + [ bench "String" $ whnf (HM.isSubmapOf hmSubset) hm + , bench "ByteString" $ whnf (HM.isSubmapOf hmbsSubset) hmbs + , bench "Int" $ whnf (HM.isSubmapOf hmiSubset) hmi + ] + , bgroup "isSubmapOfNaive" + [ bench "String" $ whnf (isSubmapOfNaive hmSubset) hm + , bench "ByteString" $ whnf (isSubmapOfNaive hmbsSubset) hmbs + , bench "Int" $ whnf (isSubmapOfNaive hmiSubset) hmi + ] -- Combine , bench "union" $ whnf (HM.union hmi) hmi2 @@ -396,6 +436,12 @@ alterFDelete xs m0 = {-# SPECIALIZE alterFDelete :: [BS.ByteString] -> HM.HashMap BS.ByteString Int -> HM.HashMap BS.ByteString Int #-} +isSubmapOfNaive :: (Eq k, Hashable k) => HM.HashMap k Int -> HM.HashMap k Int -> Bool +isSubmapOfNaive m1 m2 = and [ Just v1 == HM.lookup k1 m2 | (k1,v1) <- HM.toList m1 ] +{-# SPECIALIZE isSubmapOfNaive :: HM.HashMap Int Int -> HM.HashMap Int Int -> Bool #-} +{-# SPECIALIZE isSubmapOfNaive :: HM.HashMap String Int -> HM.HashMap String Int -> Bool #-} +{-# SPECIALIZE isSubmapOfNaive :: HM.HashMap BS.ByteString Int -> HM.HashMap BS.ByteString Int -> Bool #-} + ------------------------------------------------------------------------ -- * Map diff --git a/tests/HashMapProperties.hs b/tests/HashMapProperties.hs index 38fca50f..e9bcf19c 100644 --- a/tests/HashMapProperties.hs +++ b/tests/HashMapProperties.hs @@ -1,4 +1,5 @@ {-# LANGUAGE CPP, GeneralizedNewtypeDeriving #-} +{-# OPTIONS_GHC -fno-warn-orphans #-} -- because of Arbitrary (HashMap k v) -- | Tests for the 'Data.HashMap.Lazy' module. We test functions by -- comparing them to a simpler model, an association list. @@ -15,13 +16,15 @@ import Data.Hashable (Hashable(hashWithSalt)) import qualified Data.List as L import Data.Ord (comparing) #if defined(STRICT) +import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HM import qualified Data.Map.Strict as M #else +import Data.HashMap.Lazy (HashMap) import qualified Data.HashMap.Lazy as HM import qualified Data.Map.Lazy as M #endif -import Test.QuickCheck (Arbitrary, Property, (==>), (===)) +import Test.QuickCheck (Arbitrary(..), Property, (==>), (===), forAll, elements) import Test.Framework (Test, defaultMain, testGroup) import Test.Framework.Providers.QuickCheck2 (testProperty) #if MIN_VERSION_base(4,8,0) @@ -38,6 +41,9 @@ newtype Key = K { unK :: Int } instance Hashable Key where hashWithSalt salt k = hashWithSalt salt (unK k) `mod` 20 +instance (Eq k, Hashable k, Arbitrary k, Arbitrary v) => Arbitrary (HashMap k v) where + arbitrary = fmap (HM.fromList) arbitrary + ------------------------------------------------------------------------ -- * Properties @@ -225,6 +231,44 @@ pAlterFLookup k f = `eq` getConst . HM.alterF (Const . apply f) k +pSubmap :: [(Key, Int)] -> [(Key, Int)] -> Bool +pSubmap xs ys = M.isSubmapOf (M.fromList xs) (M.fromList ys) == + HM.isSubmapOf (HM.fromList xs) (HM.fromList ys) + +pSubmapReflexive :: HashMap Key Int -> Bool +pSubmapReflexive m = HM.isSubmapOf m m + +pSubmapUnion :: HashMap Key Int -> HashMap Key Int -> Bool +pSubmapUnion m1 m2 = HM.isSubmapOf m1 (HM.union m1 m2) + +pNotSubmapUnion :: HashMap Key Int -> HashMap Key Int -> Property +pNotSubmapUnion m1 m2 = not (HM.isSubmapOf m1 m2) ==> HM.isSubmapOf m1 (HM.union m1 m2) + +pSubmapDifference :: HashMap Key Int -> HashMap Key Int -> Bool +pSubmapDifference m1 m2 = HM.isSubmapOf (HM.difference m1 m2) m1 + +pNotSubmapDifference :: HashMap Key Int -> HashMap Key Int -> Property +pNotSubmapDifference m1 m2 = + not (HM.null (HM.intersection m1 m2)) ==> + not (HM.isSubmapOf m1 (HM.difference m1 m2)) + +pSubmapDelete :: HashMap Key Int -> Property +pSubmapDelete m = not (HM.null m) ==> + forAll (elements (HM.keys m)) $ \k -> + HM.isSubmapOf (HM.delete k m) m + +pNotSubmapDelete :: HashMap Key Int -> Property +pNotSubmapDelete m = + not (HM.null m) ==> + forAll (elements (HM.keys m)) $ \k -> + not (HM.isSubmapOf m (HM.delete k m)) + +pSubmapInsert :: Key -> Int -> HashMap Key Int -> Property +pSubmapInsert k v m = not (HM.member k m) ==> HM.isSubmapOf m (HM.insert k v m) + +pNotSubmapInsert :: Key -> Int -> HashMap Key Int -> Property +pNotSubmapInsert k v m = not (HM.member k m) ==> not (HM.isSubmapOf (HM.insert k v m) m) + ------------------------------------------------------------------------ -- ** Combine @@ -439,6 +483,18 @@ tests = , testProperty "alterFInsertWith" pAlterFInsertWith , testProperty "alterFDelete" pAlterFDelete , testProperty "alterFLookup" pAlterFLookup + , testGroup "isSubmapOf" + [ testProperty "container compatibility" pSubmap + , testProperty "m ⊆ m" pSubmapReflexive + , testProperty "m1 ⊆ m1 ∪ m2" pSubmapUnion + , testProperty "m1 ⊈ m2 ⇒ m1 ∪ m2 ⊈ m1" pNotSubmapUnion + , testProperty "m1\\m2 ⊆ m1" pSubmapDifference + , testProperty "m1 ∩ m2 ≠ ∅ ⇒ m1 ⊈ m1\\m2 " pNotSubmapDifference + , testProperty "delete k m ⊆ m" pSubmapDelete + , testProperty "m ⊈ delete k m " pNotSubmapDelete + , testProperty "k ∉ m ⇒ m ⊆ insert k v m" pSubmapInsert + , testProperty "k ∉ m ⇒ insert k v m ⊈ m" pNotSubmapInsert + ] ] -- Combine , testProperty "union" pUnion From efa706bac9e0d0597315546f1cf7a7372a4113a9 Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 30 Jul 2020 17:17:32 +0200 Subject: [PATCH 11/13] Mark internal modules `not-home` for haddock (#294) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …to ensure that hyperlinks preferably target the public modules. --- Data/HashMap/Internal.hs | 1 + Data/HashMap/Internal/Array.hs | 1 + Data/HashMap/Internal/List.hs | 1 + Data/HashMap/Internal/Strict.hs | 1 + Data/HashMap/Internal/Unsafe.hs | 2 ++ Data/HashSet/Internal.hs | 1 + 6 files changed, 7 insertions(+) diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 0634e54d..30ad0e01 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -10,6 +10,7 @@ {-# LANGUAGE UnboxedSums #-} #endif {-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} +{-# OPTIONS_HADDOCK not-home #-} -- | = WARNING -- diff --git a/Data/HashMap/Internal/Array.hs b/Data/HashMap/Internal/Array.hs index 8dc33607..f87a1780 100644 --- a/Data/HashMap/Internal/Array.hs +++ b/Data/HashMap/Internal/Array.hs @@ -1,5 +1,6 @@ {-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, UnboxedTuples, ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} +{-# OPTIONS_HADDOCK not-home #-} -- | = WARNING -- diff --git a/Data/HashMap/Internal/List.hs b/Data/HashMap/Internal/List.hs index cb4cfcbd..8c0b639b 100644 --- a/Data/HashMap/Internal/List.hs +++ b/Data/HashMap/Internal/List.hs @@ -1,5 +1,6 @@ {-# LANGUAGE ScopedTypeVariables #-} {-# OPTIONS_GHC -fno-full-laziness -funbox-strict-fields #-} +{-# OPTIONS_HADDOCK not-home #-} -- | = WARNING -- diff --git a/Data/HashMap/Internal/Strict.hs b/Data/HashMap/Internal/Strict.hs index cd606a71..0e692173 100644 --- a/Data/HashMap/Internal/Strict.hs +++ b/Data/HashMap/Internal/Strict.hs @@ -1,6 +1,7 @@ {-# LANGUAGE BangPatterns, CPP, PatternGuards, MagicHash, UnboxedTuples #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE Trustworthy #-} +{-# OPTIONS_HADDOCK not-home #-} ------------------------------------------------------------------------ -- | diff --git a/Data/HashMap/Internal/Unsafe.hs b/Data/HashMap/Internal/Unsafe.hs index ae56c9a6..9dc95b6e 100644 --- a/Data/HashMap/Internal/Unsafe.hs +++ b/Data/HashMap/Internal/Unsafe.hs @@ -4,6 +4,8 @@ {-# LANGUAGE MagicHash, Rank2Types, UnboxedTuples #-} #endif +{-# OPTIONS_HADDOCK not-home #-} + -- | = WARNING -- -- This module is considered __internal__. diff --git a/Data/HashSet/Internal.hs b/Data/HashSet/Internal.hs index c429e839..1fdc091c 100644 --- a/Data/HashSet/Internal.hs +++ b/Data/HashSet/Internal.hs @@ -6,6 +6,7 @@ #if __GLASGOW_HASKELL__ >= 702 {-# LANGUAGE Trustworthy #-} #endif +{-# OPTIONS_HADDOCK not-home #-} ------------------------------------------------------------------------ -- | From 3cc98a44dcf34299439a3fb0dca9a77bb7c5ea1c Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Thu, 30 Jul 2020 17:27:22 +0200 Subject: [PATCH 12/13] Travis: Limit branch builds to master (#295) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …not much use having them in PRs. --- .travis.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index cd76dfc0..7b0bf2ce 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ # This Travis job script has been generated by a script via # -# haskell-ci '--benchmarks-jobs= >=7.10' '--installed=-containers' '--installed=-binary' 'unordered-containers.cabal' +# haskell-ci '--benchmarks-jobs= >=7.10' '--installed=-containers' '--installed=-binary' 'unordered-containers.cabal' '--branches=master' # # To regenerate the script (for example after adjusting tested-with) run # @@ -17,6 +17,9 @@ dist: xenial git: # whether to recursively clone submodules submodules: false +branches: + only: + - master cache: directories: - $HOME/.cabal/packages @@ -155,5 +158,5 @@ script: - rm -f cabal.project.local - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all -# REGENDATA ("0.10.2",["--benchmarks-jobs= >=7.10","--installed=-containers","--installed=-binary","unordered-containers.cabal"]) +# REGENDATA ("0.10.2",["--benchmarks-jobs= >=7.10","--installed=-containers","--installed=-binary","unordered-containers.cabal","--branches=master"]) # EOF From fa562abae3a0c4a910c7bed6fbe68fdad43ce7ce Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Wed, 5 Aug 2020 01:45:42 +0200 Subject: [PATCH 13/13] Prepare release 0.2.12.0 (#296) --- CHANGES.md | 22 ++++++++++++++++++++++ Data/HashMap/Internal.hs | 4 ++++ Data/HashSet/Internal.hs | 2 ++ unordered-containers.cabal | 2 +- 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 3f4f1764..61418c40 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,25 @@ +## [0.2.12.0] + +* Add `HashMap.isSubmapOf[By]` and `HashSet.isSubsetOf`. Thanks Sven Keidel. ([#282]) + +* Expose internal modules. ([#283]) + +* Documentation improvements in `Data.HashSet`, including a beginner-friendly + introduction. Thanks Matt Renaud. ([#267]) + +* `HashMap.alterF`: Skip key deletion for absent keys. ([#288]) + +* Remove custom `unsafeShift{L,R}` definitions. ([#281]) + +* Various other documentation improvements. + +[0.2.12.0]: https://github.com/haskell-unordered-containers/unordered-containers/compare/v0.2.11.0...v0.2.12.0 +[#267]: https://github.com/haskell-unordered-containers/unordered-containers/pull/267 +[#281]: https://github.com/haskell-unordered-containers/unordered-containers/pull/281 +[#282]: https://github.com/haskell-unordered-containers/unordered-containers/pull/282 +[#283]: https://github.com/haskell-unordered-containers/unordered-containers/pull/283 +[#288]: https://github.com/haskell-unordered-containers/unordered-containers/pull/288 + ## 0.2.11.0 * Add `HashMap.findWithDefault` (soft-deprecates `HashMap.lookupDefault`). diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 30ad0e01..1f75d6a7 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -1431,6 +1431,8 @@ alterFEager f !k m = (<$> f mv) $ \fres -> -- -- >>> fromList [(1,'a'),(2,'b')] `isSubmapOf` fromList [(1,'a')] -- False +-- +-- @since 0.2.12 isSubmapOf :: (Eq k, Hashable k, Eq v) => HashMap k v -> HashMap k v -> Bool isSubmapOf = (inline isSubmapOfBy) (==) {-# INLINABLE isSubmapOf #-} @@ -1449,6 +1451,8 @@ isSubmapOf = (inline isSubmapOfBy) (==) -- -- >>> isSubmapOfBy (<=) (fromList [(1,'b')]) (fromList [(1,'a'),(2,'c')]) -- False +-- +-- @since 0.2.12 isSubmapOfBy :: (Eq k, Hashable k) => (v1 -> v2 -> Bool) -> HashMap k v1 -> HashMap k v2 -> Bool -- For maps without collisions the complexity is O(n*log m), where n is the size -- of m1 and m the size of m2: the inclusion operation visits every leaf in m1 at least once. diff --git a/Data/HashSet/Internal.hs b/Data/HashSet/Internal.hs index 1fdc091c..68ed078a 100644 --- a/Data/HashSet/Internal.hs +++ b/Data/HashSet/Internal.hs @@ -321,6 +321,8 @@ keysSet m = fromMap (() <$ m) -- -- >>> fromList [1,2] `isSubsetOf` fromList [1,3] -- False +-- +-- @since 0.2.12 isSubsetOf :: (Eq a, Hashable a) => HashSet a -> HashSet a -> Bool isSubsetOf s1 s2 = H.isSubmapOfBy (\_ _ -> True) (asMap s1) (asMap s2) diff --git a/unordered-containers.cabal b/unordered-containers.cabal index e81d8b41..e5df5eee 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -1,5 +1,5 @@ name: unordered-containers -version: 0.2.11.0 +version: 0.2.12.0 synopsis: Efficient hashing-based container types description: Efficient hashing-based container types. The containers have been