package edu.unc.lib.deposit.validate; import java.io.File; import java.net.URI; import java.net.URISyntaxException; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import org.jdom2.Element; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.SimpleSelector; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.philvarner.clamavj.ClamScan; import com.philvarner.clamavj.ScanResult; import edu.unc.lib.deposit.work.AbstractDepositJob; import edu.unc.lib.dl.fedora.PID; import edu.unc.lib.dl.util.ContentModelHelper; import edu.unc.lib.dl.util.PremisEventLogger; import edu.unc.lib.dl.util.PremisEventLogger.Type; import edu.unc.lib.staging.Stages; import edu.unc.lib.staging.StagingException; /** * Scans all files mentioned in N3 manifest for viruses. If a Staging configuration * is supplied then staged files will be resolved. Otherwise files are resolved * with the bag directory as the base URI. * @author count0 * */ public class VirusScanJob extends AbstractDepositJob { private static final Logger log = LoggerFactory .getLogger(VirusScanJob.class); private ClamScan clamScan; private Stages stages; public VirusScanJob() { super(); } public ClamScan getClamScan() { return clamScan; } public void setClamScan(ClamScan clamScan) { this.clamScan = clamScan; } public Stages getStages() { return stages; } public void setStages(Stages stages) { this.stages = stages; } public VirusScanJob(String uuid, String depositUUID) { super(uuid, depositUUID); } @Override public void runJob() { log.debug("Running virus checks on : {}", getDepositDirectory()); // get ClamScan software and database versions String version = this.clamScan.cmd("nVERSION\n".getBytes()).trim(); Map<PID, String> hrefs = new HashMap<PID, String>(); Map<String, String> failures = new HashMap<String, String>(); Model model = getReadOnlyModel(); Property fileLocation = model .createProperty(ContentModelHelper.DepositRelationship.stagingLocation.toString()); StmtIterator i = model.listStatements(new SimpleSelector((Resource)null, fileLocation, (RDFNode)null)); while (i.hasNext()) { Statement s = i.nextStatement(); PID p = new PID(s.getSubject().getURI()); String href = s.getObject().asLiteral().getString(); hrefs.put(p, href); } setTotalClicks(hrefs.size()); int scannedObjects = 0; for (Entry<PID, String> href : hrefs.entrySet()) { verifyRunning(); URI manifestURI; URI storageURI = null; try { manifestURI = new URI(href.getValue()); if (manifestURI.getScheme() == null) { storageURI = manifestURI; } else { storageURI = getStages().getStorageURI(manifestURI); } } catch (URISyntaxException e) { failJob(e, "Unable to parse manifest URI: {0}", href.getValue()); } catch (StagingException e) { failJob(e, "Unable to resolve staging location for file: {0}", href.getValue()); } if (storageURI.getScheme() == null || storageURI.getScheme().contains("file")) { if(!storageURI.isAbsolute()) { storageURI = getDepositDirectory().toURI().resolve(storageURI); } File file = new File(storageURI.getPath()); ScanResult result = this.clamScan.scan(file); switch (result.getStatus()) { case FAILED: Element ev = getEventLog().logEvent( PremisEventLogger.Type.VIRUS_CHECK, "File failed pre-ingest scan for viruses: "+storageURI.getPath(), href.getKey(), ContentModelHelper.Datastream.DATA_FILE.getName()); PremisEventLogger.addSoftwareAgent(ev, "ClamAV", version); PremisEventLogger.addDetailedOutcome(ev, "failure", "found virus signature " + result.getSignature(), null); appendDepositEvent(href.getKey(), ev); failures.put(storageURI.toString(), result.getSignature()); break; case ERROR: throw new Error( "Virus checks are producing errors: " + result.getException() .getLocalizedMessage()); case PASSED: Element ev2 = getEventLog().logEvent( PremisEventLogger.Type.VIRUS_CHECK, "File passed pre-ingest scan for viruses.", href.getKey(), ContentModelHelper.Datastream.DATA_FILE.getName()); PremisEventLogger.addSoftwareAgent(ev2, "ClamAV", version); PremisEventLogger.addDetailedOutcome(ev2, "success", null, null); appendDepositEvent(href.getKey(), ev2); scannedObjects++; break; } } addClicks(1); } if(failures.size() > 0) { StringBuilder sb = new StringBuilder("Virus checks failed for some files:\n"); for(String uri : failures.keySet()) { sb.append(uri).append(" - ").append(failures.get(uri)).append("\n"); } failJob(failures.size() + " virus check(s) failed.", sb.toString()); } else { if (scannedObjects != hrefs.size()) { failJob("Virus scan job did not attempt to scan all files.", (hrefs.size() - scannedObjects) + " objects were not scanned."); } recordDepositEvent(Type.VIRUS_CHECK, "{0} files scanned for viruses.", scannedObjects); } } }