/* Copyright (2006-2012) Schibsted ASA
* This file is part of Possom.
*
* Possom is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Possom is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Possom. If not, see <http://www.gnu.org/licenses/>.
*
* WhoWhereSplitter.java
*
* Created on 22 February 2007, 14:04
*
*/
package no.sesat.search.query.finder;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import no.sesat.search.query.AndNotClause;
import no.sesat.search.query.Clause;
import no.sesat.search.query.DefaultOperatorClause;
import no.sesat.search.query.BinaryClause;
import no.sesat.search.query.LeafClause;
import no.sesat.search.query.NotClause;
import no.sesat.search.query.UnaryClause;
import no.sesat.search.query.QueryContext;
import no.sesat.search.query.XorClause;
import no.sesat.commons.visitor.AbstractReflectionVisitor;
import no.sesat.search.query.token.Categories;
import no.sesat.search.query.token.TokenPredicate;
/** Essentially a QueryTransformer, the similarity is also evident in the context required.
* <br/>
* But because it splits the one query into two it doesn't fit into the
* command's query transformation chain, and is used by commands manually instead.
* <br/>
* This class splits the query provided in the context into a who and where components.
* These are returned as strings in the WhoWhereSplit class and the user is expected to parse each into separate
* query objects if required.
* <br/>
* The specifications of the split are complicated and were originally given by sesam.se's HittaSearchCommand.
* <br/>
* If a query contains multiple fullnames and/or companynames then both who and where will be returned blank.<br/>
* If just one fullname or companyname is found none of it's terms are moved to the where component.<br/>
* Otherwise terms which match a geological tokenPredicate and do not match a name or phoneNumber tokenPredicate
* are moved to the where component.<br/>
*
* <br/>
* It is guaranteed that all terms in the original query can be found in either the who or where components except
* the case when both components are blank.<br/>
*
* <br/>
* The usefulness of this class is heavy dependant on the TokenPredicates:
* FULLNAME, COMPANYNAME, COMPANY_KEYWORD, FIRSTNAME, LASTNAME, GEOGLOBAL, and GEOLOCAL;
* being kept available and uptodate.<br/>
*
*
*
* @version <tt>$Id$</tt>
*/
public final class WhoWhereSplitter extends AbstractReflectionVisitor{
/**
* Context this class requires to work within.
*/
public interface Context extends QueryContext{
/** Get the terms with their current transformed representations. *
* @return
*/
Map<Clause,String> getTransformedTerms();
/**
*
* @return
*/
List<Application> getApplications();
}
// Constants -----------------------------------------------------
// Attributes ----------------------------------------------------
private final Context context;
private StringBuilder who;
private StringBuilder where;
private boolean hasCompany = false;
private boolean hasFullname = false;
private boolean multipleCompany = false;
private boolean multipleFullname = false;
private boolean validQuery = true;
private final FullnameOrCompanyFinder fullnameOrCompanyFinder = new FullnameOrCompanyFinder();
private final Set<UnaryClause> invalidatedPlaces = new HashSet<UnaryClause>();
// Static --------------------------------------------------------
// Constructors --------------------------------------------------
/** Creates a new instance of WhoWhereSplitter
* @param context
*/
public WhoWhereSplitter(final Context context) {
this.context = context;
}
// Public --------------------------------------------------------
/**
*
* @return
*/
public WhoWhereSplit getWhoWhereSplit(){
if(where == null){
who = new StringBuilder();
where = new StringBuilder();
fullnameOrCompanyFinder.visit(context.getQuery().getRootClause());
if(!(hasCompany && hasFullname) && !multipleCompany && !multipleFullname){
visit(context.getQuery().getRootClause());
}
}
return new WhoWhereSplit(
validQuery ? who.toString().trim() : "",
where.toString().trim()
);
}
// Package protected ---------------------------------------------
// Protected -----------------------------------------------------
/**
*
* @param clause
*/
protected void visitImpl(final LeafClause clause) {
final List<UnaryClause> parents
= context.getQuery().getParentFinder().getAncestors(context.getQuery().getRootClause(), clause);
final List<UnaryClause> validGeoParents = new ArrayList<UnaryClause>(parents);
validGeoParents.removeAll(invalidatedPlaces);
boolean geo = clause.getKnownPredicates().contains(Categories.GEOLOCAL)
|| clause.getKnownPredicates().contains(Categories.GEOGLOBAL)
|| ParentFinder.insideOf(validGeoParents, Categories.GEOLOCAL)
|| ParentFinder.insideOf(validGeoParents, Categories.GEOGLOBAL);
boolean onlyGeo = geo && clause.getField() == null;
// check if any possible parents of this clause match the fullname predicate.
final boolean insideFullname = context.getApplications().contains(Application.WHITE)
&& ParentFinder.insideOf(parents, Categories.FULLNAME);
boolean isNameOrNumber = context.getApplications().contains(Application.WHITE)
&& clause.getKnownPredicates().contains(Categories.FIRSTNAME);
isNameOrNumber |= context.getApplications().contains(Application.WHITE)
&& clause.getKnownPredicates().contains(Categories.LASTNAME);
isNameOrNumber |= clause.getKnownPredicates().contains(Categories.PHONENUMBER);
// check if the clause or any possible parents of this clause match the company predicate.
boolean isOrInsideCompany = context.getApplications().contains(Application.YELLOW);
isOrInsideCompany &=
clause.getKnownPredicates().contains(Categories.COMPANYENRICHMENT)
|| clause.getKnownPredicates().contains(Categories.COMPANY_KEYWORD)
|| ParentFinder.insideOf(parents, Categories.COMPANYENRICHMENT)
|| ParentFinder.insideOf(parents, Categories.COMPANY_KEYWORD);
if(hasCompany || hasFullname){
onlyGeo &= !insideFullname && !isOrInsideCompany;
}else{
// no fullname or company exists in the query, so firstname or lastname will do
onlyGeo &= !isNameOrNumber;
}
if (onlyGeo) {
// add this term to the geo query string
where.append(context.getTransformedTerms().get(clause));
}else{
if((hasCompany && !isOrInsideCompany && isNameOrNumber) || multipleCompany || multipleFullname ){
// this is a company query but this clause isn't the company but a loose name.
// abort this hitta search, see SEARCH-966 - hitta enrichment
// OR there are multiple fullnames or company names.
validQuery = false;
}else{
who.append(context.getTransformedTerms().get(clause));
// invalidate any parent geo term since part of it has now been used in the who field
for(UnaryClause parent : parents){
if(parent.getKnownPredicates().contains(Categories.GEOLOCAL)
|| parent.getKnownPredicates().contains(Categories.GEOGLOBAL)){
invalidatedPlaces.add(parent);
}
}
}
}
}
/**
*
* @param clause
*/
protected void visitImpl(final UnaryClause clause) {
if(validQuery){
clause.getFirstClause().accept(this);
}
}
/**
*
* @param clause
*/
protected void visitImpl(final BinaryClause clause) {
if(validQuery){
clause.getFirstClause().accept(this);
where.append(' ');
who.append(' ');
clause.getSecondClause().accept(this);
}
}
/**
*
* @param clause
*/
protected void visitImpl(final NotClause clause) {
}
/**
*
* @param clause
*/
protected void visitImpl(final AndNotClause clause) {
}
/**
*
* @param clause
*/
@SuppressWarnings("fallthrough")
protected void visitImpl(final XorClause clause) {
switch(clause.getHint()){
case NUMBER_GROUP_ON_LEFT:
clause.getSecondClause().accept(this);
break;
case PHONE_NUMBER_ON_LEFT:
if( !clause.getFirstClause().getKnownPredicates().contains(Categories.PHONENUMBER) ){
clause.getSecondClause().accept(this);
}
// intentionally fall through to default!
default:
clause.getFirstClause().accept(this);
break;
}
}
// Private -------------------------------------------------------
// Inner classes -------------------------------------------------
private final class FullnameOrCompanyFinder extends AbstractReflectionVisitor{
protected void visitImpl(final LeafClause clause) {
final Set<TokenPredicate> predicates = clause.getKnownPredicates();
final boolean insideFullname = context.getApplications().contains(Application.WHITE)
&& ParentFinder.insideOf(context.getQuery().getParentFinder().getAncestors(
context.getQuery().getRootClause(), clause),
Categories.FULLNAME);
if(!insideFullname){
boolean company = context.getApplications().contains(Application.YELLOW);
company &= predicates.contains(Categories.COMPANYENRICHMENT)
|| predicates.contains(Categories.COMPANY_KEYWORD);
multipleCompany = hasCompany && company;
hasCompany |= company;
}
}
protected void visitImpl(final UnaryClause clause) {
if(!(hasCompany && hasFullname) && !multipleCompany && !multipleFullname ){
clause.getFirstClause().accept(this);
}
}
protected void visitImpl(final BinaryClause clause) {
if(!(hasCompany && hasFullname) && !multipleCompany && !multipleFullname){
clause.getFirstClause().accept(this);
clause.getSecondClause().accept(this);
}
}
protected void visitImpl(final DefaultOperatorClause clause) {
final List<UnaryClause> parents
= context.getQuery().getParentFinder().getAncestors(context.getQuery().getRootClause(), clause);
final boolean insideFullname = context.getApplications().contains(Application.WHITE)
&& ParentFinder.insideOf(parents, Categories.FULLNAME);
boolean insideCompany = context.getApplications().contains(Application.YELLOW);
insideCompany &= ParentFinder.insideOf(parents, Categories.COMPANYENRICHMENT)
|| ParentFinder.insideOf(parents, Categories.COMPANY_KEYWORD);
if(!insideFullname && !insideCompany){
final Set<TokenPredicate> predicates = clause.getKnownPredicates();
boolean fullname = context.getApplications().contains(Application.WHITE)
&& predicates.contains(Categories.FULLNAME);
multipleFullname = fullname && hasFullname;
hasFullname |= fullname;
hasCompany |= !fullname && context.getApplications().contains(Application.YELLOW)
&& (predicates.contains(Categories.COMPANYENRICHMENT)
|| predicates.contains(Categories.COMPANY_KEYWORD));
if(!fullname || !(hasCompany && hasFullname) && !multipleCompany && !multipleFullname){
clause.getFirstClause().accept(this);
clause.getSecondClause().accept(this);
}
}
}
protected void visitImpl(final NotClause clause) {
}
protected void visitImpl(final AndNotClause clause) {
}
protected void visitImpl(final XorClause clause) {
if(!(hasCompany && hasFullname)){
clause.getFirstClause().accept(this);
}
}
}
/**
*
*/
public static final class WhoWhereSplit{
private final String who;
private final String where;
public WhoWhereSplit(final String who, final String where){
this.who = who;
this.where = where;
}
/**
*
* @return
*/
public String getWho(){
return who;
}
/**
*
* @return
*/
public String getWhere(){
return where;
}
}
/**
*
*/
public enum Application{
/**
* Apply WhoWhereSplitter to white logic.
* eg fullname, firstname, and lastname lists.
*/
WHITE,
/**
* Apply WhoWhereSplitter to yellow logic.
* eg companyenrich list.
*/
YELLOW;
}
}