package uk.ac.shef.dcs.jate.util; import org.apache.lucene.document.Document; import org.apache.lucene.index.*; import org.apache.lucene.util.BytesRef; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.schema.CopyField; import org.apache.solr.search.SolrIndexSearcher; import org.apache.tika.utils.ExceptionUtils; import uk.ac.shef.dcs.jate.JATEException; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.log4j.Logger; public class SolrUtil { /** * Get indexed term vectors * @param fieldname field where term vectors will be retrieved * @param solrIndexSearcher solr index searcher * @return Terms term vectors * @throws JATEException */ public static Terms getTermVector(String fieldname, SolrIndexSearcher solrIndexSearcher) throws JATEException { try { Fields fields = MultiFields.getFields(solrIndexSearcher.getLeafReader()); Terms vector = fields.terms(fieldname); if (vector == null) throw new JATEException(String.format("Cannot find expected field: %s", fieldname)); return vector; } catch (IOException ioe) { StringBuilder sb = new StringBuilder(String.format("Cannot find expected field: %s. Error stacktrack: \n", fieldname)); sb.append(org.apache.commons.lang.exception.ExceptionUtils.getFullStackTrace(ioe)); throw new JATEException(sb.toString()); } } public static void copyFields(Map<String, List<CopyField>> copyFields, float boost, Document doc) { for (String sourceField : copyFields.keySet()) { List<CopyField> copyFieldList = copyFields.get(sourceField); for (CopyField copyField : copyFieldList) { // remove previous one if exist doc.removeField(copyField.getDestination().getName()); IndexableField jateField = copyField.getDestination(). createField(doc.get(copyField.getSource().getName()), boost); doc.add(jateField); } } } /** * Get indexed (normalised) term strings * * @param indexedTermsVector term vectors indexed * @return List<String> utf-8 string of term * @throws IOException */ public static List<String> getNormalisedTerms(Terms indexedTermsVector) throws IOException { List<String> normTermStrs = new ArrayList<>(); if (indexedTermsVector == null || indexedTermsVector.size() == 0) { return normTermStrs; } TermsEnum iterTerms = indexedTermsVector.iterator(); BytesRef text; while((text = iterTerms.next()) != null) { normTermStrs.add(text.utf8ToString()); } return normTermStrs; } public static Terms getTermVector(int docId, String fieldname, SolrIndexSearcher solrIndexSearcher) throws JATEException { try { Terms vector = solrIndexSearcher.getLeafReader().getTermVector(docId, fieldname); return vector; } catch (IOException ioe) { StringBuilder sb = new StringBuilder(String.format("Cannot find expected field: %s. Error stacktrack:\n", fieldname)); sb.append(org.apache.commons.lang.exception.ExceptionUtils.getFullStackTrace(ioe)); throw new JATEException(sb.toString()); } } public static void commit(SolrClient solr, Logger logger, String... messages) { try { solr.commit(); } catch (SolrServerException e) { StringBuilder message = new StringBuilder("FAILED TO COMMIT TO SOLR: "); message.append(Arrays.toString(messages)).append("\n") .append(ExceptionUtils.getStackTrace(e)).append("\n"); logger.error(message.toString()); } catch (IOException e) { StringBuilder message = new StringBuilder("FAILED TO COMMIT TO SOLR: "); message.append(Arrays.toString(messages)).append("\n") .append(ExceptionUtils.getStackTrace(e)).append("\n"); logger.error(message.toString()); } } }