/*******************************************************************************
* Copyright (c) 2011 Miami-Dade County.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Public License v2.0
* which accompanies this distribution, and is available at
* http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
*
* Contributors:
* Miami-Dade County - initial API and implementation
******************************************************************************/
package org.sharegov.cirm.search.solr;
import static mjson.Json.array;
import static mjson.Json.object;
import static org.sharegov.cirm.search.solr.SearchQuery.not;
import static org.sharegov.cirm.search.solr.SearchQuery.and;
import static org.sharegov.cirm.search.solr.SearchQuery.boost;
import static org.sharegov.cirm.search.solr.SearchQuery.field;
import static org.sharegov.cirm.search.solr.SearchQuery.oneOf;
import static org.sharegov.cirm.search.solr.SearchQuery.or;
import static org.sharegov.cirm.search.solr.SearchQuery.quote;
import static org.sharegov.cirm.search.solr.SearchQuery.requiredField;
import static org.sharegov.cirm.utils.GenUtils.ko;
import static org.sharegov.cirm.utils.GenUtils.ok;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import mjson.Json;
import org.semanticweb.owlapi.model.OWLClass;
import org.semanticweb.owlapi.model.OWLNamedIndividual;
import org.semanticweb.owlapi.reasoner.OWLReasoner;
import org.sharegov.cirm.OWL;
import org.sharegov.cirm.owl.Model;
import org.sharegov.cirm.utils.Mapping;
public class SolrSearchEngine
{
static final String noJurisdiction = Model.upper("NO_JURISDICTION").toString();
static final String fedGov = Model.upper("Federal_Government").toString();
static final String stateGov = Model.upper("State_of_Florida").toString();
static final String countyGov = Model.upper("Miami-Dade_County").toString();
public static final String ESTIMATED_RESULT_COUNT = "estimatedResultCount";
public static final String START_PAGE = "start";
public static final String ROWS = "rows";
public static final String SORT_BY = "sortBy";
public static final String SORT_DIR = "sortDir";
OWLClass countyCl = OWL.owlClass("County_Organization"),
cityCl = OWL.owlClass("City_Organization");
private void setAgencyLabel(Json doc, OWLReasoner reasoner)
{
String agencyLabel = null;
for (Json o : doc.at("ontology").asJsonList())
{
if (o.asString().endsWith("City_of_Miami") || o.asString().contains("COM_"))
{
agencyLabel = "COM";
break;
}
OWLNamedIndividual ind = OWL.individual(o.asString());
if (reasoner.getTypes(ind, false).containsEntity(countyCl))
{
agencyLabel = "MDC";
break;
}
}
if (agencyLabel != null)
doc.set("agencyLabel", agencyLabel);
}
public Json find(String question, Json params, Mapping<Json, Boolean> filter)
{
SolrClient cl = new SolrClient();
SearchQuery query = new SearchQuery();
QueryExpression expression = null;
SearchResultTransform transform = new SearchResultTransform();
if (question == null || question.trim().length() == 0)
{
expression = requiredField("url", "h*");
}
else if (question.startsWith("http://") || question.startsWith("https://"))
expression = requiredField("url", '"' + question + '"');
else
expression =
or(boost(field("title", '"' + question + '"'), 5000.0f),
boost(field("title", question), 20.0f),
boost(field("keywords", '"' + question + '"'), 300.0f),
boost(field("keywords", question), 100.0f),
boost(field("text", '"' + question + '"'), 50.0f),
field("text", question));
Json meta = params.at("meta", object());
int rows = meta.at("rows", 25).asInteger();
int start = meta.at("start", 0).asInteger();
String sortBy = meta.at("sortBy", "score").asString();
String sortDir = meta.at("sortDir", "desc").asString();
query.setRows(rows);
query.setStart(start);
query.setSortBy(sortBy);
query.setSortDir(sortDir);
for (Map.Entry<String, Json> e : params.asJsonMap().entrySet())
{
String name = e.getKey();
if ("meta".equals(name))
continue;
else if ("agency".equals(name))
{
Set<String> ocrit = new HashSet<String>();
ocrit.add(noJurisdiction);
ocrit.add(e.getValue().asString());
if (e.getValue().asString().equals(countyGov))
{
ocrit.add(stateGov);
ocrit.add(fedGov);
}
else if (e.getValue().asString().equals(stateGov))
{
ocrit.add(fedGov);
}
else if (!e.getValue().asString().equals(fedGov))
{
ocrit.add(countyGov);
ocrit.add(stateGov);
ocrit.add(fedGov);
}
expression = and(expression, oneOf("ontology", ocrit));
expression = and(expression, not(field("notRelevantFor", e.getValue().toString())));
}
else
{
if (e.getValue().isArray())
expression = and(expression, oneOf(e.getKey(),
e.getValue().asList()));
else
{
Object v = e.getValue().getValue();
if (v instanceof String)
v = quote(v);
expression = and(expression, field(e.getKey(), v));
}
}
}
query.setExpression(expression);
Json result = ok().set("docs", array());
Json docs = result.at("docs");
if (filter == null)
{
Json A = cl.search(query, transform);
if (A.at("responseHeader").is("status", 0l))
result.set("docs", A.at("response").at("docs"))
.set("total", A.at("response").at("numFound"));
else
return ko("Search engine failed.").set("responseHeader", A.at("responseHeader"));
}
else while (docs.asJsonList().size() < rows)
{
query.setStart(start);
Json A = cl.search(query, transform);
if (!result.has("total"))
result.set("total", A.at("response").at("numFound"));
if (A.at("responseHeader").is("status", 0l))
A = A.at("response").at("docs");
else
return ko("Search engine failed.").set("responseHeader", A.at("responseHeader"));
if (A.asJsonList().isEmpty())
break;
for (Json x : A.asJsonList())
{
//System.out.println(x);
if (filter.eval(x))
docs.add(x);
if (docs.asJsonList().size() >= rows)
break;
}
start += rows;
}
OWLReasoner reasoner = OWL.reasoner();
for (Json x : result.at("docs").asJsonList())
setAgencyLabel(x, reasoner);
return result;
}
}