package org.molgenis.data.annotation.core.entity.impl.hpo;
import au.com.bytecode.opencsv.CSVParser;
import au.com.bytecode.opencsv.CSVReader;
import com.google.common.collect.Iterables;
import org.molgenis.data.Entity;
import org.molgenis.data.MolgenisDataException;
import org.molgenis.data.Query;
import org.molgenis.data.QueryRule.Operator;
import org.molgenis.data.RepositoryCapability;
import org.molgenis.data.meta.model.AttributeFactory;
import org.molgenis.data.meta.model.EntityType;
import org.molgenis.data.meta.model.EntityTypeFactory;
import org.molgenis.data.support.AbstractRepository;
import org.molgenis.data.support.DynamicEntity;
import java.io.*;
import java.nio.charset.Charset;
import java.util.*;
import java.util.stream.Stream;
import static org.molgenis.data.meta.model.EntityType.AttributeRole.ROLE_ID;
public class HPORepository extends AbstractRepository
{
public static final String HPO_DISEASE_ID_COL_NAME = "diseaseId";
public static final String HPO_GENE_SYMBOL_COL_NAME = "gene-symbol";
public static final String HPO_ID_COL_NAME = "HPO-ID";
public static final String HPO_TERM_COL_NAME = "HPO-term-name";
private final EntityTypeFactory entityTypeFactory;
private final AttributeFactory attributeFactory;
private Map<String, List<Entity>> entitiesByGeneSymbol;
private final File file;
public HPORepository(File file, EntityTypeFactory entityTypeFactory, AttributeFactory attributeFactory)
{
this.file = file;
this.entityTypeFactory = entityTypeFactory;
this.attributeFactory = attributeFactory;
}
@Override
public Set<RepositoryCapability> getCapabilities()
{
return Collections.emptySet();
}
@Override
public EntityType getEntityType()
{
EntityType entityType = entityTypeFactory.create().setSimpleName("HPO");
entityType.addAttribute(attributeFactory.create().setName(HPO_DISEASE_ID_COL_NAME));
entityType.addAttribute(attributeFactory.create().setName(HPO_GENE_SYMBOL_COL_NAME));
entityType.addAttribute(attributeFactory.create().setName(HPO_ID_COL_NAME), ROLE_ID);
entityType.addAttribute(attributeFactory.create().setName(HPO_TERM_COL_NAME));
return entityType;
}
@Override
public Iterator<Entity> iterator()
{
return getEntities().iterator();
}
@Override
public Stream<Entity> findAll(Query<Entity> q)
{
if (q.getRules().isEmpty()) return getEntities().stream();
if ((q.getRules().size() != 1) || (q.getRules().get(0).getOperator() != Operator.EQUALS))
{
throw new MolgenisDataException("The only query allowed on this Repository is gene EQUALS");
}
String geneSymbol = (String) q.getRules().get(0).getValue();
List<Entity> entities = getEntitiesByGeneSymbol().get(geneSymbol);
return entities != null ? entities.stream() : Stream.empty();
}
@Override
public long count()
{
return Iterables.size(this);
}
private List<Entity> getEntities()
{
List<Entity> entities = new ArrayList<>();
getEntitiesByGeneSymbol().forEach((geneSymbol, geneSymbolEntities) -> entities.addAll(geneSymbolEntities));
return entities;
}
private Map<String, List<Entity>> getEntitiesByGeneSymbol()
{
if (entitiesByGeneSymbol == null)
{
entitiesByGeneSymbol = new LinkedHashMap<>();
try (CSVReader csvReader = new CSVReader(
new InputStreamReader(new FileInputStream(file), Charset.forName("UTF-8")), '\t',
CSVParser.DEFAULT_QUOTE_CHARACTER, 1))
{
String[] values = csvReader.readNext();
while (values != null)
{
String geneSymbol = values[1];
Entity entity = new DynamicEntity(getEntityType());
entity.set(HPO_DISEASE_ID_COL_NAME, values[0]);
entity.set(HPO_GENE_SYMBOL_COL_NAME, geneSymbol);
entity.set(HPO_ID_COL_NAME, values[3]);
entity.set(HPO_TERM_COL_NAME, values[4]);
List<Entity> entities = entitiesByGeneSymbol.get(geneSymbol);
if (entities == null)
{
entities = new ArrayList<>();
entitiesByGeneSymbol.put(geneSymbol, entities);
}
entities.add(entity);
values = csvReader.readNext();
}
}
catch (IOException e)
{
throw new UncheckedIOException(e);
}
}
return entitiesByGeneSymbol;
}
}