package org.opensextant.matching;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.SolrPingResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.DateUtil;
import org.apache.solr.core.CoreContainer;
import org.opensextant.placedata.Place;
import org.opensextant.vocab.Vocab;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MatcherFactory {
/** The name of environment var and system property for solr. */
private static String envParam = "solr.home";
/** String specifying solr home, could be file path or URL. */
private static String homeLocation;
/**
* States of solr server and thus the MatcherFactory is solr accessed via
* URL (remote) or embedded?
*/
private static boolean isRemote;
/** Do we have a valid solr home? */
private static boolean isConfigured;
/** Have we started solr? */
private static boolean isStarted;
/** The solr servers which are the heart of the MatcherFactory. */
private static SolrClient solrServerGeo;
private static SolrClient solrServerVocab;
/**
* All of the Matchers,Searchers and VocabMatchers the Factory has created
* weak references so they can be GC'ed.
*/
static Map<PlacenameMatcher, Boolean> matchers = new WeakHashMap<PlacenameMatcher, Boolean>();
static Map<PlacenameSearcher, Boolean> searchers = new WeakHashMap<PlacenameSearcher, Boolean>();
static Map<VocabMatcher, Boolean> vocabers = new WeakHashMap<VocabMatcher, Boolean>();
/** The fields of the geo match and query response. */
private static String gazetteerFieldNames = "id,place_id,name,name_expanded,lat,lon,geo,feat_class,feat_code,"
+ "FIPS_cc,cc,ISO3_cc,adm1,adm2,adm3,adm4,adm5,source,src_place_id,src_name_id,script,"
+ "name_bias,id_bias,name_type,name_type_system,partition,search_only";
/**
* The field names to load the gazetteer (same as match/query except for
* "geo" field which is created on load.
*/
private static String gazetteerFieldNamesLoader = "id,place_id,name,name_expanded,lat,lon,feat_class,feat_code,"
+ "FIPS_cc,cc,ISO3_cc,adm1,adm2,adm3,adm4,adm5,source,src_place_id,src_name_id,script,"
+ "name_bias,id_bias,name_type,name_type_system,partition,search_only";
/** The fixed fields of the vocab match and response. */
private static String vocabFieldNames = "id,phrase,category,taxonomy";
/** The initial parameters for matchers and the searchers and vocabers. */
private static ModifiableSolrParams matchParams = new ModifiableSolrParams();
private static ModifiableSolrParams searchParams = new ModifiableSolrParams();
private static ModifiableSolrParams vocabParams = new ModifiableSolrParams();
/**
* Mapping from gazetteer codes to hierachical expression used on the Place
* object.
*/
private static Map<String, String> featureCodeMap = new HashMap<String, String>();
/** The matching request handler. */
private static final String MATCH_REQUESTHANDLER = "/tag";
/** Log object. */
private static final Logger LOGGER = LoggerFactory.getLogger(MatcherFactory.class);
/** Set the base config for the matching and searching params. */
static {
matchParams.set(CommonParams.QT, MATCH_REQUESTHANDLER);
matchParams.set(CommonParams.FL, gazetteerFieldNames);
matchParams.set("tagsLimit", 100000);
matchParams.set(CommonParams.ROWS, 100000);
matchParams.set("subTags", false);
matchParams.set("matchText", false);
matchParams.set("overlaps", "LONGEST_DOMINANT_RIGHT");
matchParams.set("field", "name4matching");
matchParams.set(CommonParams.FQ, "search_only:false");
searchParams.set(CommonParams.Q, "*:*");
searchParams.set(CommonParams.FL, gazetteerFieldNames + ",score");
searchParams.set(CommonParams.ROWS, 100000);
vocabParams.set(CommonParams.QT, MATCH_REQUESTHANDLER);
vocabParams.set(CommonParams.FL, vocabFieldNames);
vocabParams.set("tagsLimit", 100000);
vocabParams.set(CommonParams.ROWS, 100000);
vocabParams.set("subTags", false);
vocabParams.set("matchText", false);
vocabParams.set("overlaps", "LONGEST_DOMINANT_RIGHT");
vocabParams.set("field", "phrase4matching");
}
/** Set the values of the feature codes. */
static {
featureCodeMap.put("A", "Geo.featureType.AdminRegion");
featureCodeMap.put("P", "Geo.featureType.PopulatedPlace");
featureCodeMap.put("V", "Geo.featureType.Vegetation");
featureCodeMap.put("L", "Geo.featureType.Area");
featureCodeMap.put("U", "Geo.featureType.Undersea");
featureCodeMap.put("R", "Geo.featureType.Street");
featureCodeMap.put("T", "Geo.featureType.Hypso");
featureCodeMap.put("H", "Geo.featureType.Hydro");
featureCodeMap.put("S", "Geo.featureType.SpotFeature");
}
private MatcherFactory() {
}
/**
* Configure this MatcherFctory.
*
* @param home
* solr home as a file path or URL
*/
public static void config(String home) {
if (isStarted) {
// already running
LOGGER.info("Tried to configure MatcherFactory when already started. Doing nothing.");
return;
}
// not running but already configured, must be re-configuring
if (isConfigured) {
LOGGER.info("Trying to re-configure MatcherFactory.");
isRemote = false;
isConfigured = false;
isStarted = false;
solrServerGeo = null;
solrServerVocab = null;
}
// get value for home
boolean foundHome = setHome(home);
if (!foundHome) {
isConfigured = false;
LOGGER.error("Could not configure MatcherFactory: Could not find a value for solr home");
return;
}
// determine if home is local or remote
boolean foundRemoteLocal = setLocalorRemote();
if (!foundRemoteLocal) {
isConfigured = false;
LOGGER.error("Could not configure MatcherFactory: Could not interpret:" + homeLocation);
return;
}
// all OK
isConfigured = true;
LOGGER.info("Configured MatcherFactory: solr.home=" + homeLocation + " Remote=" + isRemote);
}
/**
* Start this MatcherFactory.
*/
public static void start() {
if (!isConfigured) {
// can't start not configured
LOGGER.error("Could not start MatcherFactory, it hasn't been configured yet");
return;
}
// already started
if (isStarted) {
LOGGER.info("Tried to start MatcherFactory when it was already started. Doing nothing.");
return;
}
// if remote, use HttpSolrServer
if (isRemote) {
HttpSolrClient server = new HttpSolrClient(homeLocation+"gazetteer");
server.setAllowCompression(true);
solrServerGeo = server;
server = new HttpSolrClient(homeLocation+"vocabulary");
server.setAllowCompression(true);
solrServerVocab = server;
} else { // must be local, use EmbeddedSolrServer
CoreContainer solrContainer = new CoreContainer(homeLocation);
solrContainer.load();
EmbeddedSolrServer serverGeo = new EmbeddedSolrServer(solrContainer, "gazetteer");
EmbeddedSolrServer serverVocab = new EmbeddedSolrServer(solrContainer, "vocabulary");
solrServerGeo = serverGeo;
solrServerVocab = serverVocab;
}
// see if solr servers are really there
SolrPingResponse pingGeo;
SolrPingResponse pingVocab;
try {
pingGeo = solrServerGeo.ping();
pingVocab = solrServerVocab.ping();
} catch (SolrServerException e) {
LOGGER.error("Solr Server didn't respond to ping from MatcherFactory", e);
isStarted = false;
return;
} catch (IOException e) {
LOGGER.error("Solr Server didn't respond to ping from MatcherFactory", e);
isStarted = false;
return;
}
// started and got good ping
if (pingGeo.getStatus() == 0) {
isStarted = true;
} else {
LOGGER.error("Solr Server (Geo) responded with error code from ping from MatcherFactory. Got code:"
+ pingGeo.getStatus());
isStarted = false;
}
// started and got good ping
if (pingVocab.getStatus() == 0) {
isStarted = true;
} else {
LOGGER.error("Solr Server (Vocab) responded with error code from ping from MatcherFactory. Got code:"
+ pingVocab.getStatus());
isStarted = false;
}
// do warmup here?
return;
}
/** Set the value for solr home. */
private static boolean setHome(String home) {
String foundIt = home;
// explicit value given?
if (foundIt != null && foundIt.length() > 0) {
homeLocation = foundIt;
LOGGER.info("Explicit solr home found. Using " + foundIt);
return true;
} else {
LOGGER.debug("No explicit value given for solr home. Checking for system property");
}
// system property?
foundIt = System.getProperty(envParam);
if (foundIt != null && foundIt.length() > 0) {
homeLocation = foundIt;
LOGGER.info("System property for solr home found. Using " + foundIt);
return true;
} else {
LOGGER.debug("No " + envParam + " system property for solr home. Checking for env variable");
}
// environment variable?
foundIt = System.getenv(envParam);
if (foundIt != null && foundIt.length() > 0) {
homeLocation = foundIt;
LOGGER.info("Environment variable for solr home found. Using " + foundIt);
return true;
} else {
LOGGER.debug("No " + envParam + " environment variable for solr home");
}
// TODO add some sort of default location?
LOGGER.error("Tried everything and no value for solr home found");
return false;
}
private static boolean setLocalorRemote() {
if (validRemoteURL(homeLocation)) {
isRemote = true;
return true;
}
if (validFileURL(homeLocation)) {
isRemote = false;
return true;
}
if (validFile(homeLocation)) {
isRemote = false;
return true;
}
return false;
}
/**
* Get a PlacenameMatcher.
*
* @return a PlacenameMatcher
*/
public static PlacenameMatcher getMatcher() {
// if started/configed etc
if (isConfigured) {
if (isStarted) {
PlacenameMatcher tmp = new PlacenameMatcher(solrServerGeo, matchParams);
matchers.put(tmp, true);
return tmp;
} else {
// configured but not started
start();
LOGGER.debug("Autostarting MatcherFactory");
PlacenameMatcher tmp = new PlacenameMatcher(solrServerGeo, matchParams);
matchers.put(tmp, true);
return tmp;
}
} else {
// not configured
// try default config
LOGGER.debug("Trying default config and autostarting Matcher Factory");
config("");
if (isConfigured) {
LOGGER.debug("Default config worked. Try to start");
start();
PlacenameMatcher tmp = new PlacenameMatcher(solrServerGeo, matchParams);
matchers.put(tmp, true);
return tmp;
} else {
LOGGER.error("MatcherFactory not configured and default config didn't work");
return null;
}
}
}
/**
* Get a PlacenameSearcher.
*
* @return a PlacenameSearcher
*/
public static PlacenameSearcher getSearcher() {
// if started/configed etc
if (isConfigured) {
if (isStarted) {
PlacenameSearcher tmp = new PlacenameSearcher(solrServerGeo, searchParams);
searchers.put(tmp, true);
return tmp;
} else {
// configured but not started
start();
LOGGER.debug("Autostarting MatcherFactory");
PlacenameSearcher tmp = new PlacenameSearcher(solrServerGeo, searchParams);
searchers.put(tmp, true);
return tmp;
}
} else {
// not configured
// try default config
LOGGER.debug("Trying default config and autostarting Matcher Factory");
config("");
if (isConfigured) {
LOGGER.debug("Default config worked. Try to start");
start();
PlacenameSearcher tmp = new PlacenameSearcher(solrServerGeo, searchParams);
searchers.put(tmp, true);
return tmp;
} else {
LOGGER.error("MatcherFactory not configured and default config did'nt work");
return null;
}
}
}
/**
* Get a VocabMatcher.
*
* @return a VocabMatcher
*/
public static VocabMatcher getVocabMatcher() {
// if started/configed etc
if (isConfigured) {
if (isStarted) {
VocabMatcher tmp = new VocabMatcher(solrServerVocab, vocabParams);
vocabers.put(tmp, true);
return tmp;
} else {
// configured but not started
start();
LOGGER.debug("Autostarting MatcherFactory");
VocabMatcher tmp = new VocabMatcher(solrServerVocab, vocabParams);
vocabers.put(tmp, true);
return tmp;
}
} else {
// not configured
// try default config
LOGGER.debug("Trying default config and autostarting Matcher Factory");
config("");
if (isConfigured) {
LOGGER.debug("Default config worked. Try to start");
start();
VocabMatcher tmp = new VocabMatcher(solrServerVocab, vocabParams);
vocabers.put(tmp, true);
return tmp;
} else {
LOGGER.error("MatcherFactory not configured and default config did'nt work");
return null;
}
}
}
/**
* @param mtcher
* the matcher which is requesting the shutdown
*/
protected static void shutdown(PlacenameMatcher mtcher) {
matchers.remove(mtcher);
MatcherFactory.shutdown(false);
}
/**
* Request a shutdown.
*
* @param srcher
* the searcher which is request the shutdown
*/
protected static void shutdown(PlacenameSearcher srcher) {
searchers.remove(srcher);
MatcherFactory.shutdown(false);
}
protected static void shutdown(VocabMatcher vocabMatcher) {
vocabers.remove(vocabMatcher);
MatcherFactory.shutdown(false);
}
/**
* Shutdown the MatcherFactory.
*
* @param force
* if true, force a shutdown even if there are matchers and
* searchers still out there (rude)
*/
public static void shutdown(boolean force) {
if (force) {
try {
if (solrServerGeo != null) {
solrServerGeo.close();
}
if (solrServerVocab != null) {
solrServerVocab.close();
}
} catch (IOException e) {
LOGGER.error("Error trying close MatcherFactory" + e.getMessage());
}
isStarted = false;
} else {
try {
if (solrServerGeo != null && !factoryInUse()) {
solrServerGeo.close();
isStarted = false;
}
if (solrServerVocab != null && !factoryInUse()) {
solrServerVocab.close();
isStarted = false;
}
} catch (IOException e) {
LOGGER.error("Error trying close MatcherFactory" + e.getMessage());
}
}
}
private static boolean factoryInUse() {
return !matchers.isEmpty() || !searchers.isEmpty();
}
/**
* Check if a URL is valid.
*
* @param url
* the URL to check
* @return
*/
private static boolean validRemoteURL(String url) {
URL solrURL = null;
try {
solrURL = new URL(url);
} catch (MalformedURLException e) {
// eat the exception and return not valid
return false;
}
return "http".equalsIgnoreCase(solrURL.getProtocol());
}
private static boolean validFileURL(String url) {
URL solrURL = null;
try {
solrURL = new URL(url);
} catch (MalformedURLException e) {
// eat the exception and return not valid
return false;
}
// some sort of URL, check to see if its is a file URL
if ("file".equalsIgnoreCase(solrURL.getProtocol())) {
// see if points to something
File tmp;
try {
tmp = new File(solrURL.toURI());
} catch (URISyntaxException e) {
// can't convert to file.
LOGGER.error("Cannot use " + url + " as solr home. Doesn't appear to be valid file URL", e);
return false;
}
// check if valid file
return validFile(tmp.getAbsolutePath());
}
return false;
}
private static boolean validFile(String file) {
File tmp = new File(file);
if (!tmp.exists()) {
// file doesn't exist
return false;
}
if (!tmp.isDirectory()) {
// not a directory
return false;
}
File solrXML = new File(tmp, "solr.xml");
return solrXML.exists();
}
public static boolean isConfigured() {
return isConfigured;
}
public static boolean isStarted() {
return isStarted;
}
public static String getHomeLocation() {
return homeLocation;
}
protected static String getGazetteerFieldNames() {
return gazetteerFieldNames;
}
protected static String getGazetteerFieldNamesLoader() {
return gazetteerFieldNamesLoader;
}
protected static String getVocabFieldNames() {
return vocabFieldNames;
}
protected static SolrClient getSolrServerGeo() {
return solrServerGeo;
}
protected static SolrClient getSolrServerVocab() {
return solrServerVocab;
}
/**
* Get an integer from a record.
*/
protected static int getInteger(SolrDocument d, String f) {
Object obj = d.getFieldValue(f);
if (obj == null) {
return 0;
}
if (obj instanceof Integer) {
return ((Integer) obj).intValue();
} else {
Integer v = Integer.parseInt(obj.toString());
return v.intValue();
}
}
/**
* Get a floating point object from a record.
*/
protected static Float getFloat(SolrDocument d, String f) {
Object obj = d.getFieldValue(f);
if (obj != null) {
return (Float) obj;
} else {
return 0F;
}
}
/**
* Get a Date object from a record
*
* @throws java.text.ParseException
*/
protected static Date getDate(SolrDocument d, String f) throws java.text.ParseException {
if (d == null || f == null) {
return null;
}
Object obj = d.getFieldValue(f);
if (obj == null) {
return null;
}
if (obj instanceof Date) {
return (Date) obj;
} else if (obj instanceof String) {
return DateUtil.parseDate((String) obj);
}
return null;
}
protected static char getChar(SolrDocument solrDoc, String name) {
String result = getString(solrDoc, name);
if (result == null) {
return 0;
}
if (result.isEmpty()) {
return 0;
}
return result.charAt(0);
}
/**
* Get a String object from a record.
*/
protected static String getString(SolrDocument solrDoc, String name) {
Object result = solrDoc.getFirstValue(name);
if (result != null) {
return result.toString();
}
return null;
}
/**
* Get a double from a record.
*/
protected static double getDouble(SolrDocument solrDoc, String name) {
Object result = solrDoc.getFirstValue(name);
if (result == null) {
throw new IllegalStateException("Blank: " + name + " in " + solrDoc);
}
if (result instanceof Number) {
Number number = (Number) result;
return number.doubleValue();
} else {
return Double.parseDouble(result.toString());
}
}
/**
* Parse XY pair stored in Solr Spatial4J record. No validation is done.
*
* @return XY double array, [lat, lon]
*/
protected static double[] getCoordinate(SolrDocument solrDoc, String field) {
String xy = (String) solrDoc.getFirstValue(field);
if (xy == null) {
throw new IllegalStateException("Blank: " + field + " in " + solrDoc);
}
final double[] xyPair = { 0.0, 0.0 };
String[] latLon = xy.split(",", 2);
xyPair[0] = Double.parseDouble(latLon[0]);
xyPair[1] = Double.parseDouble(latLon[1]);
return xyPair;
}
/**
* Parse XY pair stored in Solr Spatial4J record. No validation is done.
*
* @return XY double array, [lat, lon]
*/
protected static double[] getCoordinate(String xy) {
final double[] xyPair = { 0.0, 0.0 };
String[] latLon = xy.split(",", 2);
xyPair[0] = Double.parseDouble(latLon[0]);
xyPair[1] = Double.parseDouble(latLon[1]);
return xyPair;
}
/**
* @param in
* a string to be interned
* @return the interned string
*/
protected static String internString(String in) {
if (in != null) {
return in.intern();
}
return in;
}
/**
* Create a Place object from a Solr document.
*
* @param gazEntry
* a solr document describing a Place
* @return the Place created from the solr Document
*/
protected static Place createPlace(SolrDocument gazEntry) {
// create the basic Place
Place place = new Place(getString(gazEntry, "place_id"), getString(gazEntry, "name"));
// add the expanded name
place.setExpandedPlaceName(getString(gazEntry, "name_expanded"));
// set name type and nameTypeSystem
place.setNameType(internString(getString(gazEntry, "name_type")));
place.setNameTypeSystem(internString(getString(gazEntry, "name_type_system")));
// set country coude using the cc (ISO2) value
place.setCountryCode(internString(getString(gazEntry, "cc")));
// Set the admin values
place.setAdmin1(internString(getString(gazEntry, "adm1")));
place.setAdmin2(internString(getString(gazEntry, "adm2")));
// map and set the feature class and code
place.setFeatureClass(internString(featureCodeMap.get(getString(gazEntry, "feat_class"))));
place.setFeatureCode(internString(getString(gazEntry, "feat_code")));
// set the source
place.setSource(internString(getString(gazEntry, "source")));
// set the geo
double[] xy = getCoordinate(gazEntry, "geo");
place.setLatitude(xy[0]);
place.setLongitude(xy[1]);
// set the bias values
place.setNameBias(getDouble(gazEntry, "name_bias"));
place.setIdBias(getDouble(gazEntry, "id_bias"));
return place;
}
protected static Vocab createVocab(SolrDocument solrDoc) {
Vocab v = new Vocab();
// get and set the fixed attributes
v.setId(getString(solrDoc, "id"));
v.setVocabMatch(getString(solrDoc, "phrase"));
// TODO add "collection" to schema and loader
v.setCollection("Generic");
v.setCategory(internString(getString(solrDoc, "category")));
v.setTaxonomy(internString(getString(solrDoc, "taxonomy")));
// set any other atttributes from the solrdoc into the vocabMatches
// attributes HashMap
String[] pieces = vocabFieldNames.split(",");
List<String> handled = new ArrayList<String>();
for (String s : pieces) {
handled.add(s);
}
Map<String, Object> tmpMap = solrDoc.getFieldValueMap();
Map<String, Object> others = new HashMap<String, Object>();
for (String s : tmpMap.keySet()) {
if (!handled.contains(s)) {
others.put(s, tmpMap.get(s));
}
}
v.setAttributes(others);
return v;
}
}