package org.molgenis.data.vcf;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import org.molgenis.data.Entity;
import org.molgenis.data.RepositoryCapability;
import org.molgenis.data.meta.model.AttributeFactory;
import org.molgenis.data.meta.model.EntityType;
import org.molgenis.data.meta.model.EntityTypeFactory;
import org.molgenis.data.support.AbstractRepository;
import org.molgenis.data.vcf.format.VcfToEntity;
import org.molgenis.data.vcf.model.VcfAttributes;
import org.molgenis.vcf.VcfReader;
import org.molgenis.vcf.VcfRecord;
import org.molgenis.vcf.meta.VcfMeta;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.Set;
import static java.util.Objects.requireNonNull;
/**
* Repository implementation for vcf files.
* <p>
* The filename without the extension is considered to be the entityname
*/
public class VcfRepository extends AbstractRepository
{
private static final Logger LOG = LoggerFactory.getLogger(VcfRepository.class);
public static final String DEFAULT_ATTRIBUTE_DESCRIPTION = "Description not provided";
public static final String NAME = "NAME";
public static final String ORIGINAL_NAME = "ORIGINAL_NAME";
public static final String PREFIX = "##";
private final VcfReaderFactory vcfReaderFactory;
private final String entityName;
private final VcfAttributes vcfAttributes;
private final EntityTypeFactory entityTypeFactory;
private final AttributeFactory attrMetaFactory;
protected final Supplier<VcfToEntity> vcfToEntitySupplier;
public VcfRepository(File file, String entityName, VcfAttributes vcfAttributes, EntityTypeFactory entityTypeFactory,
AttributeFactory attrMetaFactory) throws IOException
{
this(new VcfReaderFactoryImpl(file), entityName, vcfAttributes, entityTypeFactory, attrMetaFactory);
}
protected VcfRepository(VcfReaderFactory vcfReaderFactory, String entityName, VcfAttributes vcfAttributes,
EntityTypeFactory entityTypeFactory, AttributeFactory attrMetaFactory)
{
this.vcfReaderFactory = requireNonNull(vcfReaderFactory);
this.entityName = requireNonNull(entityName);
this.vcfAttributes = requireNonNull(vcfAttributes);
this.entityTypeFactory = requireNonNull(entityTypeFactory);
this.attrMetaFactory = requireNonNull(attrMetaFactory);
this.vcfToEntitySupplier = Suppliers.memoize(this::parseVcfMeta);
}
private VcfToEntity parseVcfMeta()
{
VcfReader reader = vcfReaderFactory.get();
try
{
VcfMeta vcfMeta = reader.getVcfMeta();
return new VcfToEntity(entityName, vcfMeta, vcfAttributes, entityTypeFactory, attrMetaFactory);
}
catch (Exception e)
{
LOG.error("Failed to read VCF Metadata from file", e);
return null;
}
finally
{
try
{
reader.close();
}
catch (IOException e)
{
LOG.info("Failed to close VcfReader", e);
}
}
}
/**
* Returns an iterator for this repository.
* <p>
* Use with caution! Multiple iterators will all point to the same line in the VCF file, leading to unpredictable
* behaviour. If you want to get the EntityType of this repository and you can't access getEntityType(),
* convert the iterator to a PeekingIterator and peek the first Entity.
*/
@Override
public Iterator<Entity> iterator()
{
Iterator<VcfRecord> vcfRecordIterator = Iterators.unmodifiableIterator(vcfReaderFactory.get().iterator());
VcfToEntity vcfToEntity = vcfToEntitySupplier.get();
return Iterators.transform(vcfRecordIterator, vcfToEntity::toEntity);
}
public EntityType getEntityType()
{
return vcfToEntitySupplier.get().getEntityType();
}
@Override
public void close() throws IOException
{
vcfReaderFactory.close();
}
@Override
public Set<RepositoryCapability> getCapabilities()
{
return Collections.emptySet();
}
@Override
public long count()
{
return Iterables.size(this);
}
}