From c1239564b90d301c1adfb1bf8c56154e963630d7 Mon Sep 17 00:00:00 2001 From: Javier Sagredo Date: Mon, 4 May 2026 00:49:22 +0200 Subject: [PATCH] Normalise package ids when merging dumps --- src/Classgraph/Merge.hs | 115 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 106 insertions(+), 9 deletions(-) diff --git a/src/Classgraph/Merge.hs b/src/Classgraph/Merge.hs index 2fac1ad..9e05642 100644 --- a/src/Classgraph/Merge.hs +++ b/src/Classgraph/Merge.hs @@ -12,6 +12,8 @@ import Control.Monad (filterM) import qualified Data.Aeson as Aeson import qualified Data.ByteString.Lazy as BL import qualified Data.Map.Strict as Map +import qualified Data.Text as T +import Data.Text (Text) import System.Directory (doesFileExist, listDirectory) import System.FilePath ((), takeExtension) @@ -46,16 +48,111 @@ readDump fp = do -- | Combine many 'ModuleDump's, deduplicating classes / families / -- instances by their 'QualName' identity. The first occurrence wins. +-- +-- Before deduplication every 'QualName' in the schema is normalised so +-- the same package referenced from different dumps maps to the same id. +-- Cabal otherwise hands us @pkg-1.0-inplace@, @pkg-1.0-@, and +-- @pkg-1.0-l-api-@ for the same package depending on whether the +-- caller is building it locally, depending on the installed version, or +-- using an internal-library — they're all the same package, but their +-- raw package ids differ. We strip the version-and-suffix tail. mergeDumps :: [ModuleDump] -> ProgramData -mergeDumps dumps = ProgramData - { pdClasses = dedupOn ciName (concatMap (pdClasses . mdData) dumps) - , pdInstances = concatMap (pdInstances . mdData) dumps - -- Instances aren't deduplicated: each is uniquely identified by class - -- + arg types, but its serialised form already contains everything we - -- need; collisions are unlikely except for re-extracted modules. - , pdTypeFamilies = dedupOn tfName (concatMap (pdTypeFamilies . mdData) dumps) - , pdFamInstances = concatMap (pdFamInstances . mdData) dumps - } +mergeDumps dumps = + let normalised = map (mapDumpQNs normalisePackageInQN) dumps + in ProgramData + { pdClasses = dedupOn ciName (concatMap (pdClasses . mdData) normalised) + , pdInstances = concatMap (pdInstances . mdData) normalised + -- Instances aren't deduplicated: each is uniquely identified by class + -- + arg types, but its serialised form already contains everything we + -- need; collisions are unlikely except for re-extracted modules. + , pdTypeFamilies = dedupOn tfName (concatMap (pdTypeFamilies . mdData) normalised) + , pdFamInstances = concatMap (pdFamInstances . mdData) normalised + } dedupOn :: Ord k => (a -> k) -> [a] -> [a] dedupOn key = Map.elems . Map.fromListWith (\_new old -> old) . map (\x -> (key x, x)) + +------------------------------------------------------------------------------ +-- QualName normalisation + +-- | Strip the @--…@ tail off a Cabal package id so that +-- @cardano-ledger-shelley-1.18.1.0-inplace@ and +-- @cardano-ledger-shelley-1.18.1.0-734aab…@ both collapse to +-- @cardano-ledger-shelley@. The heuristic: split on @-@, find the first +-- segment that starts with a digit (the version), keep everything before +-- it. Falls back to the original string if no such segment exists. +normalisePackageId :: Text -> Text +normalisePackageId pkg = + let parts = T.splitOn "-" pkg + isVersionPart t = case T.uncons t of + Just (c, _) -> c >= '0' && c <= '9' + Nothing -> False + name = T.intercalate "-" (takeWhile (not . isVersionPart) parts) + in if T.null name then pkg else name + +normalisePackageInQN :: QualName -> QualName +normalisePackageInQN q = q { qnPackage = normalisePackageId (qnPackage q) } + +-- | Apply a 'QualName' transformation to every 'QualName' that appears +-- anywhere inside a 'ModuleDump'. Used to canonicalise package ids before +-- merging so cross-dump references collapse. +mapDumpQNs :: (QualName -> QualName) -> ModuleDump -> ModuleDump +mapDumpQNs f d = d { mdData = mapPdQNs f (mdData d) } + +mapPdQNs :: (QualName -> QualName) -> ProgramData -> ProgramData +mapPdQNs f pd = pd + { pdClasses = map (mapClassQNs f) (pdClasses pd) + , pdInstances = map (mapInstanceQNs f) (pdInstances pd) + , pdTypeFamilies = map (mapTypeFamilyQNs f) (pdTypeFamilies pd) + , pdFamInstances = map (mapFamInstQNs f) (pdFamInstances pd) + } + +mapClassQNs :: (QualName -> QualName) -> ClassInfo -> ClassInfo +mapClassQNs f c = c + { ciName = f (ciName c) + , ciSuperclasses = map (mapSuperEdgeQNs f) (ciSuperclasses c) + , ciAssocTypes = map f (ciAssocTypes c) + } + +mapSuperEdgeQNs :: (QualName -> QualName) -> SuperclassEdge -> SuperclassEdge +mapSuperEdgeQNs f se = se + { seSuperclass = f (seSuperclass se) + , seArgs = map (mapTypeArgQNs f) (seArgs se) + } + +mapTypeArgQNs :: (QualName -> QualName) -> TypeArg -> TypeArg +mapTypeArgQNs f t = case t of + TyVarRef i -> TyVarRef i + TyConApp q args -> TyConApp (f q) (map (mapTypeArgQNs f) args) + FamilyApp q args -> FamilyApp (f q) (map (mapTypeArgQNs f) args) + LitArg s -> LitArg s + OtherArg s -> OtherArg s + +mapInstanceQNs :: (QualName -> QualName) -> InstanceInfo -> InstanceInfo +mapInstanceQNs f i = i + { iiClass = f (iiClass i) + , iiArgs = map (mapTypeArgQNs f) (iiArgs i) + , iiContext = map (mapPredQNs f) (iiContext i) + } + +mapPredQNs :: (QualName -> QualName) -> PredInfo -> PredInfo +mapPredQNs f p = p + { piClass = f (piClass p) + , piArgs = map (mapTypeArgQNs f) (piArgs p) + } + +mapTypeFamilyQNs :: (QualName -> QualName) -> TypeFamilyInfo -> TypeFamilyInfo +mapTypeFamilyQNs f tf = tf + { tfName = f (tfName tf) + , tfFlavor = case tfFlavor tf of + AssocFam q -> AssocFam (f q) + other -> other + , tfEquations = map (mapFamInstQNs f) (tfEquations tf) + } + +mapFamInstQNs :: (QualName -> QualName) -> FamInstInfo -> FamInstInfo +mapFamInstQNs f fi = fi + { fiFamily = f (fiFamily fi) + , fiArgs = map (mapTypeArgQNs f) (fiArgs fi) + , fiRhs = mapTypeArgQNs f (fiRhs fi) + } -- 2.54.0