/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.source.idol;
import java.net.URL;
import java.net.URLEncoder;
import java.util.concurrent.Callable;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.carrot2.core.Document;
import org.carrot2.core.IDocumentSource;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.Init;
import org.carrot2.core.attribute.Processing;
import org.carrot2.source.MultipageSearchEngine;
import org.carrot2.source.MultipageSearchEngineMetadata;
import org.carrot2.source.SearchEngineResponse;
import org.carrot2.source.opensearch.RomeFetcherUtils;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.AttributeLevel;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Group;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Label;
import org.carrot2.util.attribute.Level;
import org.carrot2.util.attribute.Required;
import org.carrot2.util.attribute.constraint.IntRange;
import org.slf4j.Logger;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import com.sun.syndication.fetcher.FeedFetcher;
import com.sun.syndication.fetcher.impl.HttpURLFeedFetcher;
/**
* A {@link IDocumentSource} fetching {@link Document}s (search results) from an IDOL
* Search Engine. Please note that you will need to install an XSLT stylesheet in your
* IDOL instance that transforms the search results into the OpenSearch format. The XSLT
* stylesheet is available under the <tt>org.carrot2.source.idol</tt> package, next to
* the binaries of this class.
*
* <p>
* Based on code donated by Julien Nioche. Autonomy IDOL support contributed by James
* Sealey.
* </p>
*
* @see "http://www.autonomy.com/content/Products/products-idol-server/index.en.html"
*/
@Bindable(prefix = "IdolDocumentSource")
public class IdolDocumentSource extends MultipageSearchEngine
{
/** Logger for this class. */
final static Logger logger = org.slf4j.LoggerFactory
.getLogger(IdolDocumentSource.class);
/**
* Maximum concurrent threads from all instances of this component.
*/
private static final int MAX_CONCURRENT_THREADS = 10;
/**
* URL of the IDOL Server.
*/
@Input
@Processing
@Init
@Attribute
@Required
@Label("IDOL server address")
@Level(AttributeLevel.BASIC)
@Group(SERVICE)
public String idolServerName;
/**
* IDOL Server Port.
*/
@Input
@Processing
@Init
@Attribute
@Required
@Label("IDOL server port")
@Level(AttributeLevel.BASIC)
@Group(SERVICE)
public int idolServerPort;
/**
* IDOL XSL Template Name. The Reference of an IDOL XSL template that outputs the
* results in OpenSearch format.
*/
@Input
@Processing
@Init
@Attribute
@Required
@Label("IDOL XSL template name")
@Level(AttributeLevel.ADVANCED)
@Group(SERVICE)
public String xslTemplateName;
/**
* Any other search attributes (separated by &) from the Autonomy Query Search
* API's Ensure all the attributes are entered to satisfy XSL that will be applied.
*/
@Input
@Processing
@Init
@Attribute
@Label("Other IDOLSearch attributes")
@Level(AttributeLevel.ADVANCED)
@Group(SERVICE)
public String otherSearchAttributes;
/**
* Results per page. The number of results per page the document source will expect
* the feed to return.
*/
@Input
@Processing
@Init
@Attribute
@Required
@IntRange(min = 1)
@Label("Results per page")
@Level(AttributeLevel.ADVANCED)
@Group(SERVICE)
public int resultsPerPage = 50;
/**
* Minimum IDOL Score. The minimum score of the results returned by IDOL.
*/
@Input
@Processing
@Init
@Attribute
@IntRange(min = 1)
@Label("Minimum score")
@Level(AttributeLevel.BASIC)
@Group(SERVICE)
public int minScore = 50;
/**
* Maximum number of results. The maximum number of results the document source can
* deliver.
*/
@Input
@Processing
@Init
@Attribute
@IntRange(min = 1)
@Label("Maximum results")
@Level(AttributeLevel.BASIC)
@Group(SERVICE)
public int maximumResults = 100;
/**
* User agent header. The contents of the User-Agent HTTP header to use when making
* requests to the feed URL. If empty or <code>null</code> value is provided, the
* following User-Agent will be sent:
* <code>Rome Client (http://tinyurl.com/64t5n) Ver: UNKNOWN</code>.
*/
@Input
@Init
@Processing
@Attribute
@Label("User agent")
@Level(AttributeLevel.ADVANCED)
@Group(SERVICE)
public String userAgent = null;
/**
* User name to use for authentication.
*/
@Input
@Processing
@Attribute
@Label("User name")
@Level(AttributeLevel.MEDIUM)
@Group(SERVICE)
public String userName;
/**
* Search engine metadata create upon initialization.
*/
private MultipageSearchEngineMetadata metadata;
/** Fetcher for OpenSearch feed. */
private FeedFetcher feedFetcher;
@Override
public void beforeProcessing()
{
this.metadata = new MultipageSearchEngineMetadata(resultsPerPage, maximumResults,
false);
this.feedFetcher = new HttpURLFeedFetcher();
if (org.apache.commons.lang.StringUtils.isNotBlank(this.userAgent))
{
this.feedFetcher.setUserAgent(this.userAgent);
}
}
@Override
public void process() throws ProcessingException
{
super.process(metadata,
getSharedExecutor(MAX_CONCURRENT_THREADS, this.getClass()));
}
@Override
protected Callable<SearchEngineResponse> createFetcher(final SearchRange bucket)
{
return new SearchEngineResponseCallable()
{
public SearchEngineResponse search() throws Exception
{
final String url = getURL();
logger.debug("Fetching URL: " + url);
return RomeFetcherUtils.fetchUrl(url, feedFetcher);
}
};
}
// Set the URL string using the Autonomy connection
private String getURL()
{
// Set stringbuilder to create the url string
StringBuilder url = new StringBuilder();
// Append Server Address
url.append("http://");
url.append(this.idolServerName);
url.append(":");
url.append(this.idolServerPort);
// build query parameters
url.append("/action=Query&");
url.append("Text=" + query + "&");
url.append("MinScore=" + this.minScore + "&");
url.append("maxresults=" + this.maximumResults + "&");
url.append("template=" + this.xslTemplateName + "&");
url.append(this.otherSearchAttributes);
// Add the security token if the username has been set
if (userName != null)
{
String securityToken = getSecurityToken();
if (securityToken != "")
{
url.append("&SecurityInfo=" + securityToken);
}
}
// return the URL to an IDOL OPEN Search results page
return url.toString();
}
// get the security token using the username
private String getSecurityToken()
{
String rtn = "";
try
{
String url = "http://" + this.idolServerName + ":" + this.idolServerPort
+ "/" + "action=userread&username=" + userName + "&securityinfo=true";
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
org.w3c.dom.Document doc = db.parse(new URL(url).openStream());
rtn = URLEncoder.encode(getSecurityInfo(doc), "UTF-8");
}
catch (Exception e)
{
logger.error("Could not get security token", e);
}
return rtn;
}
// extract the token from the XML document
private String getSecurityInfo(org.w3c.dom.Document document)
{
String rtn = "";
Element e = document.getDocumentElement();
NodeList nodeList = e.getElementsByTagName("responsedata");
for (int temp = 0; temp < nodeList.getLength(); temp++)
{
Node nNode = nodeList.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE)
{
Element eElement = (Element) nNode;
rtn = getTagValue("autn:securityinfo", eElement);
}
}
return rtn;
}
// get the tag value
private static String getTagValue(String sTag, Element eElement)
{
NodeList nlList = eElement.getElementsByTagName(sTag).item(0).getChildNodes();
Node nValue = (Node) nlList.item(0);
return nValue.getNodeValue();
}
}