package org.nextprot.api.tasks.solr.indexer.entry.impl;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.HashSet;
import java.util.ArrayList;
import org.nextprot.api.core.domain.ChromosomalLocation;
import org.nextprot.api.core.domain.Entry;
import org.nextprot.api.solr.index.EntryIndex.Fields;
import org.nextprot.api.tasks.solr.indexer.entry.EntryFieldBuilder;
import org.nextprot.api.tasks.solr.indexer.entry.FieldBuilder;
import com.google.common.base.Joiner;
@EntryFieldBuilder
public class ChromosomeFieldBuilder extends FieldBuilder {
@Override
protected void init(Entry entry) {
// build GENE_BAND by concatenating distinct band and chr+band
// build CHR_LOC field by concatenating distinct chromosomal locations (chr + band) after sorting them alphabetically
// build CHR_LOCS field based of first element in CHR_LOC
// Note that CHR_LOC is displayed in UI search result and CHR_LOC_S is used to sort UI search result
// this is why it is important to compute the CHR_LOC_S based on the first location displayed in UI (consistency)
List<String> gblist = new ArrayList<String>();
Set<String> clset = new TreeSet<String>();
Set<String> gbset = new TreeSet<String>( Collections.reverseOrder() );
// The reverse is important otherwise solr may find wrong locations with queries like 11q13 in "19q13.11 q13.11"
List<ChromosomalLocation> chrlocs = entry.getChromosomalLocations();
for (ChromosomalLocation data : chrlocs) {
String ch = data.getChromosome()==null ? "" : data.getChromosome();
String gb1 = data.getBand()==null ? "" : data.getBand();
String gb2 = ch + gb1;
String cl = ch + ("unknown".equals(gb1) ? "" : gb1);
gbset.add(gb1);
gbset.add(gb2);
clset.add(cl);
}
String gene_band = Joiner.on(" ").skipNulls().join(gbset).trim();
String chr_loc = Joiner.on(" ").skipNulls().join(clset).trim();
Integer chr_loc_s = sortChr(chr_loc);
addField(Fields.GENE_BAND, gene_band);
addField(Fields.CHR_LOC, chr_loc);
addField(Fields.CHR_LOC_S, chr_loc_s);
}
@Override
public Collection<Fields> getSupportedFields() {
return Arrays.asList(Fields.CHR_LOC, Fields.CHR_LOC_S, Fields.GENE_BAND);
}
// Allows to sort results based on chromosomal location
public static Integer sortChr(String chrs) {
// base the computation of chr_loc_s on first chr_loc
String chr = chrs.split(" ")[0];
String[] chr_loc = chr.split("([pq]|cen)"); // split on p or q
Integer f_chr0 = 1000000;
Integer f_q = 50000;
Integer f_chr1 = 1000;
Integer max_chr = 50; // max chr location after pq
Integer chr0, chr1;
// push unknown chromosome at the end
if (chr.indexOf("unknown") > -1 || chr.equals("")) {
return f_chr0 * 30;
}
if (chr_loc[0].equalsIgnoreCase("x")) {
chr0 = 23 * f_chr0;
}
else if (chr_loc[0].equalsIgnoreCase("y")) {
chr0 = 24 * f_chr0;
}
else if (chr_loc[0].equalsIgnoreCase("mt")) {
chr0 = 25 * f_chr0;
}
else {
chr0 = Integer.parseInt(chr_loc[0]) * f_chr0;
}
// sort(cen) = 10E5*XX + 10E4-1
if (chr.indexOf("cen") > -1)
return chr0 + f_q - 1;
// sort(chr) = 10E5*XX
if (chr_loc.length == 1)
return (chr0);
// extract double value from digits after p or q
Double aux = (Double.parseDouble(chr_loc[1].split("[-,]")[0]) * f_chr1);
chr1 = aux.intValue();
// sort(q) = 10E5*XX + 10E4 + 100*YY
if (chr.indexOf('q') > -1) {
return chr0 + chr1 + f_q;
}
// sort(p) = 10E6*XX + 1000*(45-YY) //descending order
return chr0 + f_chr1 * max_chr - chr1;
}
}