package org.apache.archiva.reports.consumers; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ import org.apache.archiva.admin.model.beans.ManagedRepository; import org.apache.archiva.checksum.ChecksumAlgorithm; import org.apache.archiva.checksum.ChecksummedFile; import org.apache.archiva.configuration.ArchivaConfiguration; import org.apache.archiva.configuration.ConfigurationNames; import org.apache.archiva.configuration.FileTypes; import org.apache.archiva.consumers.AbstractMonitoredConsumer; import org.apache.archiva.consumers.ConsumerException; import org.apache.archiva.consumers.KnownRepositoryContentConsumer; import org.apache.archiva.metadata.model.ArtifactMetadata; import org.apache.archiva.metadata.repository.MetadataRepository; import org.apache.archiva.metadata.repository.MetadataRepositoryException; import org.apache.archiva.metadata.repository.RepositorySession; import org.apache.archiva.metadata.repository.RepositorySessionFactory; import org.apache.archiva.metadata.repository.storage.RepositoryPathTranslator; import org.apache.archiva.redback.components.registry.Registry; import org.apache.archiva.redback.components.registry.RegistryListener; import org.apache.archiva.metadata.model.facets.RepositoryProblemFacet; import org.apache.commons.collections.CollectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.context.annotation.Scope; import org.springframework.stereotype.Service; import javax.annotation.PostConstruct; import javax.inject.Inject; import javax.inject.Named; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.List; /** * Search the artifact repository of known SHA1 Checksums for potential duplicate artifacts. * <p> * TODO: no need for this to be a scanner - we can just query the database / content repository to get a full list */ @Service ( "knownRepositoryContentConsumer#duplicate-artifacts" ) @Scope ( "prototype" ) public class DuplicateArtifactsConsumer extends AbstractMonitoredConsumer implements KnownRepositoryContentConsumer, RegistryListener { private Logger log = LoggerFactory.getLogger( DuplicateArtifactsConsumer.class ); private String id = "duplicate-artifacts"; private String description = "Check for Duplicate Artifacts via SHA1 Checksums"; @Inject private ArchivaConfiguration configuration; @Inject private FileTypes filetypes; /** * FIXME: this could be multiple implementations and needs to be configured. */ @Inject private RepositorySessionFactory repositorySessionFactory; private List<String> includes = new ArrayList<>(); private File repositoryDir; private String repoId; /** * FIXME: needs to be selected based on the repository in question */ @Inject @Named ( value = "repositoryPathTranslator#maven2" ) private RepositoryPathTranslator pathTranslator; private RepositorySession repositorySession; @Override public String getId() { return id; } @Override public String getDescription() { return description; } @Override public List<String> getIncludes() { return includes; } @Override public List<String> getExcludes() { return Collections.emptyList(); } @Override public void beginScan( ManagedRepository repo, Date whenGathered ) throws ConsumerException { repoId = repo.getId(); this.repositoryDir = new File( repo.getLocation() ); repositorySession = repositorySessionFactory.createSession(); } @Override public void beginScan( ManagedRepository repo, Date whenGathered, boolean executeOnEntireRepo ) throws ConsumerException { beginScan( repo, whenGathered ); } @Override public void processFile( String path ) throws ConsumerException { File artifactFile = new File( this.repositoryDir, path ); // TODO: would be quicker to somehow make sure it ran after the update database consumer, or as a part of that // perhaps could use an artifact context that is retained for all consumers? First in can set the SHA-1 // alternatively this could come straight from the storage resolver, which could populate the artifact metadata // in the later parse call with the desired checksum and use that String checksumSha1; ChecksummedFile checksummedFile = new ChecksummedFile( artifactFile ); try { checksumSha1 = checksummedFile.calculateChecksum( ChecksumAlgorithm.SHA1 ); } catch ( IOException e ) { throw new ConsumerException( e.getMessage(), e ); } MetadataRepository metadataRepository = repositorySession.getRepository(); Collection<ArtifactMetadata> results; try { results = metadataRepository.getArtifactsByChecksum( repoId, checksumSha1 ); } catch ( MetadataRepositoryException e ) { repositorySession.close(); throw new ConsumerException( e.getMessage(), e ); } if ( CollectionUtils.isNotEmpty( results ) ) { ArtifactMetadata originalArtifact; try { originalArtifact = pathTranslator.getArtifactForPath( repoId, path ); } catch ( Exception e ) { log.warn( "Not reporting problem for invalid artifact in checksum check: {}", e.getMessage() ); return; } for ( ArtifactMetadata dupArtifact : results ) { String id = path.substring( path.lastIndexOf( '/' ) + 1 ); if ( dupArtifact.getId().equals( id ) && dupArtifact.getNamespace().equals( originalArtifact.getNamespace() ) && dupArtifact.getProject().equals( originalArtifact.getProject() ) && dupArtifact.getVersion().equals( originalArtifact.getVersion() ) ) { // Skip reference to itself. log.debug( "Not counting duplicate for artifact {} for path {}", dupArtifact, path ); continue; } RepositoryProblemFacet problem = new RepositoryProblemFacet(); problem.setRepositoryId( repoId ); problem.setNamespace( originalArtifact.getNamespace() ); problem.setProject( originalArtifact.getProject() ); problem.setVersion( originalArtifact.getVersion() ); problem.setId( id ); // FIXME: need to get the right storage resolver for the repository the dupe artifact is in, it might be // a different type // FIXME: we need the project version here, not the artifact version problem.setMessage( "Duplicate Artifact Detected: " + path + " <--> " + pathTranslator.toPath( dupArtifact.getNamespace(), dupArtifact.getProject(), dupArtifact.getVersion(), dupArtifact.getId() ) ); problem.setProblem( "duplicate-artifact" ); try { metadataRepository.addMetadataFacet( repoId, problem ); } catch ( MetadataRepositoryException e ) { throw new ConsumerException( e.getMessage(), e ); } } } } @Override public void processFile( String path, boolean executeOnEntireRepo ) throws ConsumerException { processFile( path ); } @Override public void completeScan() { repositorySession.close(); } @Override public void completeScan( boolean executeOnEntireRepo ) { completeScan(); } @Override public void afterConfigurationChange( Registry registry, String propertyName, Object propertyValue ) { if ( ConfigurationNames.isRepositoryScanning( propertyName ) ) { initIncludes(); } } @Override public void beforeConfigurationChange( Registry registry, String propertyName, Object propertyValue ) { /* do nothing */ } private void initIncludes() { includes.clear(); includes.addAll( filetypes.getFileTypePatterns( FileTypes.ARTIFACTS ) ); } @PostConstruct public void initialize() { initIncludes(); configuration.addChangeListener( this ); } }