/*
* #%L
* gitools-biomart
* %%
* Copyright (C) 2013 Universitat Pompeu Fabra - Biomedical Genomics group
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/gpl-3.0.html>.
* #L%
*/
package org.gitools.datasources.biomart.idmapper;
import org.gitools.api.analysis.IProgressMonitor;
import org.gitools.datasources.biomart.BiomartService;
import org.gitools.datasources.biomart.queryhandler.BiomartQueryHandler;
import org.gitools.datasources.biomart.restful.model.Attribute;
import org.gitools.datasources.biomart.restful.model.Dataset;
import org.gitools.datasources.biomart.restful.model.Query;
import org.gitools.datasources.idmapper.*;
import java.util.*;
public class EnsemblMapper extends AbstractMapper implements AllIds {
private final BiomartService service;
private final String dataset;
public EnsemblMapper(BiomartService service, String dataset) {
super("Ensembl", false, true);
this.service = service;
this.dataset = dataset;
}
@Override
public MappingData map(MappingContext context, MappingData data, MappingNode src, MappingNode dst, IProgressMonitor monitor) throws MappingException {
String srcInternalName = getInternalName(src.getId());
String dstInternalName = getInternalName(dst.getId());
if (srcInternalName == null || dstInternalName == null) {
throw new MappingException("Unsupported mapping from " + src + " to " + dst);
}
monitor.begin("Getting mappings from Ensembl ...", 1);
final Map<String, Set<String>> map = new HashMap<>();
Query q = createQuery(dataset, srcInternalName, dstInternalName);
try {
service.queryModule(q, new BiomartQueryHandler() {
@Override
public void begin() throws Exception {
}
@Override
public void end() {
}
@Override
public void line(String[] rowFields) throws Exception {
String srcf = rowFields[0];
String dstf = rowFields[1];
Set<String> items = map.get(srcf);
if (items == null) {
items = new HashSet<>();
map.put(srcf, items);
}
items.add(dstf);
}
}, monitor);
} catch (Exception ex) {
throw new MappingException(ex);
}
monitor.begin("Mapping Ensembl IDs...", 1);
if (data.isEmpty()) {
data.identity(map.keySet());
}
data.map(map);
return data;
}
private static final Map<String, String> inameMap = new HashMap<>();
static {
inameMap.put(ENSEMBL_GENES, "ensembl_gene_id");
inameMap.put(ENSEMBL_TRANSCRIPTS, "ensembl_transcript_id");
inameMap.put(ENSEMBL_PROTEINS, "ensembl_peptide_id");
inameMap.put(PDB, "pdb");
inameMap.put(NCBI_REFSEQ, "embl");
inameMap.put(NCBI_GENES, "entrezgene");
inameMap.put(NCBI_UNIGENE, "unigene");
inameMap.put(UNIPROT, "uniprot_swissprot_accession");
inameMap.put(GO_BP, "go_biological_process_id");
inameMap.put(GO_MF, "go_molecular_function_id");
inameMap.put(GO_CL, "go_cellular_component_id");
inameMap.put(GO_ID, "go_id");
}
public static String getInternalName(String id) {
String iname = inameMap.get(id);
if (iname == null && id.startsWith("ensembl:")) {
return id.substring(8);
}
return iname;
}
public static Query createQuery(String dataset, String srcInternalName, String dstInternalName) {
Query q = new Query();
q.setVirtualSchemaName("default");
q.setUniqueRows(1);
Dataset ds = new Dataset();
ds.setName(dataset);
List<Attribute> attrs = ds.getAttribute();
Attribute srcAttr = new Attribute();
srcAttr.setName(srcInternalName);
attrs.add(srcAttr);
Attribute dstAttr = new Attribute();
dstAttr.setName(dstInternalName);
attrs.add(dstAttr);
q.getDatasets().add(ds);
return q;
}
}