package org.molgenis.data.csv; import com.google.common.collect.Lists; import org.apache.commons.io.IOUtils; import org.molgenis.data.Entity; import org.molgenis.data.MolgenisDataException; import org.molgenis.data.MolgenisInvalidFormatException; import org.molgenis.data.Repository; import org.molgenis.data.meta.model.AttributeFactory; import org.molgenis.data.meta.model.EntityType; import org.molgenis.data.meta.model.EntityTypeFactory; import org.molgenis.data.processor.CellProcessor; import org.molgenis.data.support.FileRepositoryCollection; import org.molgenis.data.support.GenericImporterExtensions; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.util.StringUtils; import java.io.File; import java.io.IOException; import java.util.Enumeration; import java.util.Iterator; import java.util.List; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; /** * Reads csv and tsv files. Can be bundled together in a zipfile. * <p> * The exposes the files as {@link org.molgenis.data.Repository}. The names of the repositories are the names of the * files without the extension */ public class CsvRepositoryCollection extends FileRepositoryCollection { public static final String NAME = "CSV"; private static final String MAC_ZIP = "__MACOSX"; private final File file; private EntityTypeFactory entityTypeFactory; private AttributeFactory attrMetaFactory; private List<String> entityNames; private List<String> entityNamesLowerCase; public CsvRepositoryCollection(File file) throws MolgenisInvalidFormatException, IOException { this(file, (CellProcessor[]) null); } public CsvRepositoryCollection(File file, CellProcessor... cellProcessors) throws MolgenisInvalidFormatException, IOException { super(GenericImporterExtensions.getCSV(), cellProcessors); this.file = file; loadEntityNames(); } @Override public void init() throws IOException { // no operation } @Override public Iterable<String> getEntityNames() { return entityNames; } @Override public Repository<Entity> getRepository(String name) { if (!entityNamesLowerCase.contains(name.toLowerCase())) { return null; } return new CsvRepository(file, entityTypeFactory, attrMetaFactory, name, cellProcessors); } private void loadEntityNames() { String extension = StringUtils.getFilenameExtension(file.getName()); entityNames = Lists.newArrayList(); entityNamesLowerCase = Lists.newArrayList(); if (extension.equalsIgnoreCase(GenericImporterExtensions.ZIP.toString())) { ZipFile zipFile = null; try { zipFile = new ZipFile(file); for (Enumeration<? extends ZipEntry> e = zipFile.entries(); e.hasMoreElements(); ) { ZipEntry entry = e.nextElement(); if (!entry.getName().contains(MAC_ZIP)) { String name = getRepositoryName(entry.getName()); entityNames.add(name); entityNamesLowerCase.add(name.toLowerCase()); } } } catch (Exception e) { throw new MolgenisDataException(e); } finally { IOUtils.closeQuietly(zipFile); } } else { String name = getRepositoryName(file.getName()); entityNames.add(name); entityNamesLowerCase.add(name.toLowerCase()); } } private static String getRepositoryName(String fileName) { return StringUtils.stripFilenameExtension(StringUtils.getFilename(fileName)); } @Override public String getName() { return NAME; } @Override public Iterator<Repository<Entity>> iterator() { return new Iterator<Repository<Entity>>() { Iterator<String> it = getEntityNames().iterator(); @Override public boolean hasNext() { return it.hasNext(); } @Override public Repository<Entity> next() { return getRepository(it.next()); } }; } @Override public boolean hasRepository(String name) { return entityNames.contains(name); } @Override public boolean hasRepository(EntityType entityType) { return hasRepository(entityType.getName()); } @Autowired public void setEntityTypeFactory(EntityTypeFactory entityTypeFactory) { this.entityTypeFactory = entityTypeFactory; } @Autowired public void setAttributeFactory(AttributeFactory attrMetaFactory) { this.attrMetaFactory = attrMetaFactory; } }