package org.nextprot.api.tasks.solr.indexer;
//import java.util.List;
import org.apache.solr.common.SolrInputDocument;
import org.nextprot.api.core.domain.DbXref;
import org.nextprot.api.core.domain.Publication;
import org.nextprot.api.core.domain.PublicationAuthor;
import org.nextprot.api.core.domain.publication.JournalResourceLocator;
import org.nextprot.api.core.utils.TerminologyUtils;
import java.util.ArrayList;
import java.util.Set;
import java.util.SortedSet;
public class PublicationSolrindexer extends SolrIndexer<Publication>{
public PublicationSolrindexer(String url) {
super(url);
}
@Override
public SolrInputDocument convertToSolrDocument(Publication publi) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", publi.getPublicationId());
Set<DbXref> xrefs = publi.getDbXrefs();
// TODO: this 'ac' field should be renamed 'xrefs'
if (xrefs != null)
// The format is slightly different in current publication indexes vs terminology indexes, check if justified
// if yes create an adhoc Publication.convertXrefsToSolrString method
{ doc.addField("ac",TerminologyUtils.convertXrefsToSolrString(new ArrayList<DbXref>(xrefs))); }
String filters="";
filters+=((publi.getIsComputed())?" computed":"");
filters+=((publi.getIsCurated())?" curated":""); // Change getIsCurated or set here to 'curated' if computed is false
filters+=((publi.getIsLargeScale())?" largescale":"");
doc.addField("filters", filters);
doc.addField("title", publi.getTitle());
doc.addField("title_s", publi.getTitle());
if (publi.getPublicationDate() != null) {
doc.addField("date", publi.getPublicationDate());
doc.addField("year", publi.getPublicationYear());
}
doc.addField("first_page", publi.getFirstPage());
doc.addField("last_page", publi.getLastPage());
doc.addField("volume", publi.getVolume());
doc.addField("volume_s", publi.getVolume());
doc.addField("abstract", publi.getAbstractText());
doc.addField("type", publi.getPublicationType());
//doc.addField("source", rs.getString("source"));
//This source feature is either PubMed (99.99%) or UniProt for published articles with no PMID, it is useless for the indexes since
// another way to get those is to query ac without pubmed ac:(-pubmed)
if (publi.isLocatedInScientificJournal()) {
JournalResourceLocator journal = publi.getJournalResourceLocator();
String jfield = journal.getName();
if (journal.hasJournalId()) {
String jabbrev = journal.getAbbrev();
jfield += " " + jabbrev;
// TODO: rename "pretty_journal" to "abbrev_journal"
doc.addField("pretty_journal", jabbrev);
}
doc.addField("journal", jfield);
}
// no need the following anymore as journal name is now accessible from journal
//else if(publi.getJournal_from_properties() != null)
// doc.addField("journal", publi.getJournal_from_properties());
SortedSet<PublicationAuthor> authorset = publi.getAuthors();
if (authorset != null) {
String toIndex = "";
String inidotted = "";
int i = authorset.size();
StringBuilder sb = new StringBuilder();
// (select string_agg(nextprot.pubauthors.last_name||' '|| nextprot.pubauthors.fore_name || ' ' || regexp_replace(nextprot.pubauthors.initials,'(.)',E'\\1.','g'),' | ') -- PUBLI CONCAT AUTHORS
for (PublicationAuthor author : authorset) {
inidotted = author.getInitials().replaceAll("(.)", "$1\\."); // replace each character by itself plus a dot
//System.err.println("LastName: " + author.getLastName() + " ForeName: " + author.getForeName() + " Initials: " + author.getInitials() + " inidotted: " + inidotted);
toIndex = author.getLastName() + " " + author.getForeName() + " " + inidotted;
doc.addField("authors",toIndex.trim().replaceAll(" ", " "));
sb.append(author.getLastName() + " " + inidotted.replaceAll("\\.\\.\\.", "."));
if (--i != 0) sb.append(" | ");
}
doc.addField("pretty_authors",sb.toString()); // for display only
}
return doc;
}
/**
*
*/
}