import qualified Data.Aeson as Aeson
import qualified Data.ByteString.Lazy as BL
import qualified Data.Map.Strict as Map
+import qualified Data.Text as T
+import Data.Text (Text)
import System.Directory (doesFileExist, listDirectory)
import System.FilePath ((</>), takeExtension)
-- | Combine many 'ModuleDump's, deduplicating classes / families /
-- instances by their 'QualName' identity. The first occurrence wins.
+--
+-- Before deduplication every 'QualName' in the schema is normalised so
+-- the same package referenced from different dumps maps to the same id.
+-- Cabal otherwise hands us @pkg-1.0-inplace@, @pkg-1.0-<hash>@, and
+-- @pkg-1.0-l-api-<hash>@ for the same package depending on whether the
+-- caller is building it locally, depending on the installed version, or
+-- using an internal-library — they're all the same package, but their
+-- raw package ids differ. We strip the version-and-suffix tail.
mergeDumps :: [ModuleDump] -> ProgramData
-mergeDumps dumps = ProgramData
- { pdClasses = dedupOn ciName (concatMap (pdClasses . mdData) dumps)
- , pdInstances = concatMap (pdInstances . mdData) dumps
- -- Instances aren't deduplicated: each is uniquely identified by class
- -- + arg types, but its serialised form already contains everything we
- -- need; collisions are unlikely except for re-extracted modules.
- , pdTypeFamilies = dedupOn tfName (concatMap (pdTypeFamilies . mdData) dumps)
- , pdFamInstances = concatMap (pdFamInstances . mdData) dumps
- }
+mergeDumps dumps =
+ let normalised = map (mapDumpQNs normalisePackageInQN) dumps
+ in ProgramData
+ { pdClasses = dedupOn ciName (concatMap (pdClasses . mdData) normalised)
+ , pdInstances = concatMap (pdInstances . mdData) normalised
+ -- Instances aren't deduplicated: each is uniquely identified by class
+ -- + arg types, but its serialised form already contains everything we
+ -- need; collisions are unlikely except for re-extracted modules.
+ , pdTypeFamilies = dedupOn tfName (concatMap (pdTypeFamilies . mdData) normalised)
+ , pdFamInstances = concatMap (pdFamInstances . mdData) normalised
+ }
dedupOn :: Ord k => (a -> k) -> [a] -> [a]
dedupOn key = Map.elems . Map.fromListWith (\_new old -> old) . map (\x -> (key x, x))
+
+------------------------------------------------------------------------------
+-- QualName normalisation
+
+-- | Strip the @-<version>-…@ tail off a Cabal package id so that
+-- @cardano-ledger-shelley-1.18.1.0-inplace@ and
+-- @cardano-ledger-shelley-1.18.1.0-734aab…@ both collapse to
+-- @cardano-ledger-shelley@. The heuristic: split on @-@, find the first
+-- segment that starts with a digit (the version), keep everything before
+-- it. Falls back to the original string if no such segment exists.
+normalisePackageId :: Text -> Text
+normalisePackageId pkg =
+ let parts = T.splitOn "-" pkg
+ isVersionPart t = case T.uncons t of
+ Just (c, _) -> c >= '0' && c <= '9'
+ Nothing -> False
+ name = T.intercalate "-" (takeWhile (not . isVersionPart) parts)
+ in if T.null name then pkg else name
+
+normalisePackageInQN :: QualName -> QualName
+normalisePackageInQN q = q { qnPackage = normalisePackageId (qnPackage q) }
+
+-- | Apply a 'QualName' transformation to every 'QualName' that appears
+-- anywhere inside a 'ModuleDump'. Used to canonicalise package ids before
+-- merging so cross-dump references collapse.
+mapDumpQNs :: (QualName -> QualName) -> ModuleDump -> ModuleDump
+mapDumpQNs f d = d { mdData = mapPdQNs f (mdData d) }
+
+mapPdQNs :: (QualName -> QualName) -> ProgramData -> ProgramData
+mapPdQNs f pd = pd
+ { pdClasses = map (mapClassQNs f) (pdClasses pd)
+ , pdInstances = map (mapInstanceQNs f) (pdInstances pd)
+ , pdTypeFamilies = map (mapTypeFamilyQNs f) (pdTypeFamilies pd)
+ , pdFamInstances = map (mapFamInstQNs f) (pdFamInstances pd)
+ }
+
+mapClassQNs :: (QualName -> QualName) -> ClassInfo -> ClassInfo
+mapClassQNs f c = c
+ { ciName = f (ciName c)
+ , ciSuperclasses = map (mapSuperEdgeQNs f) (ciSuperclasses c)
+ , ciAssocTypes = map f (ciAssocTypes c)
+ }
+
+mapSuperEdgeQNs :: (QualName -> QualName) -> SuperclassEdge -> SuperclassEdge
+mapSuperEdgeQNs f se = se
+ { seSuperclass = f (seSuperclass se)
+ , seArgs = map (mapTypeArgQNs f) (seArgs se)
+ }
+
+mapTypeArgQNs :: (QualName -> QualName) -> TypeArg -> TypeArg
+mapTypeArgQNs f t = case t of
+ TyVarRef i -> TyVarRef i
+ TyConApp q args -> TyConApp (f q) (map (mapTypeArgQNs f) args)
+ FamilyApp q args -> FamilyApp (f q) (map (mapTypeArgQNs f) args)
+ LitArg s -> LitArg s
+ OtherArg s -> OtherArg s
+
+mapInstanceQNs :: (QualName -> QualName) -> InstanceInfo -> InstanceInfo
+mapInstanceQNs f i = i
+ { iiClass = f (iiClass i)
+ , iiArgs = map (mapTypeArgQNs f) (iiArgs i)
+ , iiContext = map (mapPredQNs f) (iiContext i)
+ }
+
+mapPredQNs :: (QualName -> QualName) -> PredInfo -> PredInfo
+mapPredQNs f p = p
+ { piClass = f (piClass p)
+ , piArgs = map (mapTypeArgQNs f) (piArgs p)
+ }
+
+mapTypeFamilyQNs :: (QualName -> QualName) -> TypeFamilyInfo -> TypeFamilyInfo
+mapTypeFamilyQNs f tf = tf
+ { tfName = f (tfName tf)
+ , tfFlavor = case tfFlavor tf of
+ AssocFam q -> AssocFam (f q)
+ other -> other
+ , tfEquations = map (mapFamInstQNs f) (tfEquations tf)
+ }
+
+mapFamInstQNs :: (QualName -> QualName) -> FamInstInfo -> FamInstInfo
+mapFamInstQNs f fi = fi
+ { fiFamily = f (fiFamily fi)
+ , fiArgs = map (mapTypeArgQNs f) (fiArgs fi)
+ , fiRhs = mapTypeArgQNs f (fiRhs fi)
+ }