package dk.statsbiblioteket.medieplatform.autonomous.iterator.filesystem.transforming; import dk.statsbiblioteket.medieplatform.autonomous.iterator.AbstractIterator; import dk.statsbiblioteket.medieplatform.autonomous.iterator.common.AttributeParsingEvent; import dk.statsbiblioteket.medieplatform.autonomous.iterator.filesystem.FileAttributeParsingEvent; import dk.statsbiblioteket.util.Pair; import java.io.File; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; import java.util.regex.Pattern; /** Common super class for the transforming iterators. */ public abstract class CommonTransformingIterator extends AbstractIterator<File> { private final String groupingChar; private final File batchFolder; private String checksumPostfix; protected CommonTransformingIterator(File id, File batchFolder, String dataFilePattern, String checksumPostfix, String groupingChar) { super(id, dataFilePattern); this.batchFolder = batchFolder; this.checksumPostfix = checksumPostfix; this.groupingChar = groupingChar; } /** * Get the files that are identified as attributes in a collection of files * * @param files the files to examine * * @return the data files */ protected Collection<File> getDataFiles(Collection<File> files) { Collection<File> datafiles = new ArrayList<>(); for (File attribute : files) { if (attribute.getName().matches(getDataFilePattern())) { datafiles.add(attribute); } } return datafiles; } /** * Utility method, does the collection contain data files? * * @param files the files to examine * * @return true if a data file is found */ protected boolean containsDatafiles(Collection<File> files) { return getDataFiles(files).size() > 0; } /** * Get the shortest group that contain no datafiles from a list grouping. If there is no unique group, return null * * @param groupedByPrefix the map of groups * * @return the shortest group without datafiles or null */ protected Pair<String, List<File>> getShortestNoDataFilesGroup(Map<String, List<File>> groupedByPrefix) { Pair<String, List<File>> shortestGroup = null; for (Map.Entry<String, List<File>> group : groupedByPrefix.entrySet()) { if (!containsDatafiles(group.getValue())) { if (shortestGroup == null || shortestGroup.getLeft().length() > group.getKey().length()) { shortestGroup = new Pair<>(group.getKey(), group.getValue()); } } } return shortestGroup; } @Override protected AttributeParsingEvent makeAttributeEvent(File nodeID, File attributeID) { return new FileAttributeParsingEvent(toPathID(attributeID), attributeID, checksumPostfix); } @Override protected String getIdOfNode() { return toPathID(id); } public String getChecksumPostfix() { return checksumPostfix; } /** * Get the batchFolder of a file * * @param file the file * * @return the batchFolder * @see #groupingChar */ protected String getPrefix(File file) { return file.getName().split(groupingChar)[0]; } public String getGroupingChar() { return groupingChar; } public File getBatchFolder() { return batchFolder; } public String toPathID(File id) { return id.getAbsolutePath().replaceFirst(Pattern.quote(getBatchFolder().getAbsolutePath() + "/"), ""); } }