package uk.ac.shef.dcs.jate; import org.apache.commons.lang.exception.ExceptionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Properties; /** * JATE Properties maps to "jate.properties" properties file * * Provide configuration of integration between JATE and Solr index engine */ public class JATEProperties { //private static final Logger LOG = Logger.getLogger(JATEProperties.class.getName()); private final Logger log = LoggerFactory.getLogger(getClass()); private Properties prop = new Properties(); public static final String PROPERTIES_FILE = "jate.properties"; // The Solr uniqueKey field public static final String PROPERTY_SOLR_FIELD_ID = "solr_field_id"; // n-grams field from a corpus public static final String PROPERTY_SOLR_FIELD_CONTENT_NGRAMS = "solr_field_content_ngrams"; // solr content/text field public static final String PROPERTY_SOLR_FIELD_CONTENT_TERMS = "solr_field_content_terms"; // document metadata extracted from Tika where term will be extracted from // see also @code{uk.ac.shef.dcs.jate.io.TikaMultiFieldDocumentCreator} public static final String PROPERTY_SOLR_FIELD_MAP_DOC_PARTS = "solr_field_map_doc_parts"; // SOLR (string) field name where final filtered candidate terms will be indexed and stored to public static final String PROPERTY_SOLR_FIELD_DOMAIN_TERMS = "solr_field_domain_terms"; // Maximum of data units each thread (worker) of a SolrParallelIndexingWorker should commit to solr public static final String PROPERTY_INDEXER_MAX_UNITS_TO_COMMIT = "indexer_max_units_to_commit"; // Maximum % of parallel CPU cores used public static final String PROPERTY_MAX_CORES = "max_cores"; public static final Integer VALUE_DEFAULT_INDEXER_MAX_UNITS_TO_COMMIT = 500; /** * load JATE Properties from class path */ public JATEProperties() throws JATEException { InputStream stream = null; try { stream = JATEProperties.class.getClassLoader().getResourceAsStream(PROPERTIES_FILE); try { prop.load(stream); } catch (IOException e) { throw new JATEException(String.format("Properties file '%s' not found in your class path.", PROPERTIES_FILE)); } } finally { try { if (stream != null) { stream.close(); } } catch (IOException e) { e.printStackTrace(); } } } public JATEProperties(String propFile) throws JATEException { FileInputStream propertyFileStream = null; try { propertyFileStream = new FileInputStream(propFile); prop.load(propertyFileStream); } catch (IOException e) { throw new JATEException(String.format("Specified properties file not found! [%s]", propFile)); } finally { if (propertyFileStream != null) { try { propertyFileStream.close(); } catch (IOException e) { e.printStackTrace(); } } } } public String getSolrFieldNameID() throws JATEException { String idField = getString(PROPERTY_SOLR_FIELD_ID); if (idField == null) throw new JATEException(String.format("'%s' not defined in jate.properties", PROPERTY_SOLR_FIELD_ID)); return idField; } public void setSolrFieldNameID(String solrFieldNameID) { prop.setProperty(PROPERTY_SOLR_FIELD_ID, solrFieldNameID); } public String getSolrFieldNameJATENGramInfo() throws JATEException { String ngramField = getString(PROPERTY_SOLR_FIELD_CONTENT_NGRAMS); if (ngramField == null) throw new JATEException(String.format("'%s' not specified in jate.properties", PROPERTY_SOLR_FIELD_CONTENT_NGRAMS)); return ngramField; } public void setSolrFieldNameJATEGramInfo(String solrFieldNameJATEGramInfo) { prop.setProperty(PROPERTY_SOLR_FIELD_CONTENT_NGRAMS, solrFieldNameJATEGramInfo); } public String getSolrFieldNameJATEDomainTerms() throws JATEException { String domainTermsField = getString(PROPERTY_SOLR_FIELD_DOMAIN_TERMS); if (domainTermsField == null) throw new JATEException(String.format("'%s' not specified in jate.properties", PROPERTY_SOLR_FIELD_DOMAIN_TERMS)); return domainTermsField; } public void setSolrFieldNameJATEDomainTerms(String solrFieldNameJATEDomainTerms) { prop.setProperty(PROPERTY_SOLR_FIELD_DOMAIN_TERMS, solrFieldNameJATEDomainTerms); } /** * get solr field for candidate terms * @return candidate term field specified * @throws JATEException */ public String getSolrFieldNameJATECTerms() throws JATEException { String content2terms = getString(PROPERTY_SOLR_FIELD_CONTENT_TERMS); if (content2terms == null) throw new JATEException(String.format("term candidate field '%s' is not defined in jate.properties", PROPERTY_SOLR_FIELD_CONTENT_TERMS)); return content2terms; } public void setSolrFieldNameJATECTerms(String solrFieldNameJATECTerms) { prop.setProperty(PROPERTY_SOLR_FIELD_CONTENT_TERMS, solrFieldNameJATECTerms); } /** * get solr field specified for document metadata (usually extracted via Tika plugin) * * @return field name specified for document metadata */ public String getSolrFieldNameJATECTermsF() { String docparts2terms = getString(PROPERTY_SOLR_FIELD_MAP_DOC_PARTS); if (docparts2terms == null) { log.warn(String.format("Dynamic field '%s' is not defined in jate.properties", PROPERTY_SOLR_FIELD_MAP_DOC_PARTS)); } return docparts2terms; } public void setSolrFieldNameJATETermsF(String solrFieldNameJATETermsF) { prop.setProperty(PROPERTY_SOLR_FIELD_MAP_DOC_PARTS, solrFieldNameJATETermsF); } private String getString(String propertyName) { String string = prop.getProperty(propertyName); return string; } public int getIndexerMaxUnitsToCommit() { int defaultMax = VALUE_DEFAULT_INDEXER_MAX_UNITS_TO_COMMIT; try { int v = getInt(PROPERTY_INDEXER_MAX_UNITS_TO_COMMIT); if (v < 1) { log.warn("'indexer_max_units_to_commit' illegal value:" + v + ". Default=500 is used."); v = defaultMax; } return v; } catch (NumberFormatException nfe) { StringBuilder sb = new StringBuilder(String.format("'%s' illegal value. Default=500 is used.", PROPERTY_INDEXER_MAX_UNITS_TO_COMMIT)); sb.append("\n").append(ExceptionUtils.getFullStackTrace(nfe)); log.warn(sb.toString()); return defaultMax; } catch (NullPointerException ne) { StringBuilder sb = new StringBuilder(String.format("'%s' illegal value. Default=500 is used.", PROPERTY_INDEXER_MAX_UNITS_TO_COMMIT)); sb.append("\n").append(ExceptionUtils.getFullStackTrace(ne)); log.warn(sb.toString()); return defaultMax; } } public int getMaxCPUCores() { int defaultV = 1; try { int v = getInt(PROPERTY_MAX_CORES); if (v <= 0) { log.warn(String.format("'%s' illegal value: %s. Default=1 is used.", PROPERTY_MAX_CORES, v)); v = defaultV; } return v; } catch (NumberFormatException nfe) { StringBuilder sb = new StringBuilder(String.format("'%s' illegal value. Default=1 is used.", PROPERTY_MAX_CORES)); sb.append("\n").append(ExceptionUtils.getFullStackTrace(nfe)); log.warn(sb.toString()); return defaultV; } catch (NullPointerException ne) { StringBuilder sb = new StringBuilder(String.format("'%s' illegal value. Default=1 is used.", PROPERTY_MAX_CORES)); sb.append("\n").append(ExceptionUtils.getFullStackTrace(ne)); log.warn(sb.toString()); return defaultV; } } public void setMaxCPUCores(Integer maxCPUCores) { prop.setProperty(PROPERTY_MAX_CORES, String.valueOf(maxCPUCores)); } private int getInt(String propertyName) { String string = prop.getProperty(propertyName); return Integer.valueOf(string); } }