package org.apache.lucene.queryparser.flexible.aqp.builders;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.mlt.MoreLikeThisQuery;
import org.apache.lucene.queryparser.flexible.aqp.NestedParseException;
import org.apache.lucene.queryparser.flexible.aqp.config.AqpAdsabsQueryConfigHandler;
import org.apache.lucene.queryparser.flexible.aqp.config.AqpRequestParams;
import org.apache.lucene.queryparser.flexible.aqp.parser.AqpSubqueryParser;
import org.apache.lucene.queryparser.flexible.aqp.parser.AqpSubqueryParserFull;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.LuceneCacheWrapper;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SecondOrderCollector;
import org.apache.lucene.search.SecondOrderCollector.FinalValueType;
import org.apache.lucene.search.SecondOrderCollectorAdsClassicScoringFormula;
import org.apache.lucene.search.SecondOrderCollectorCitedBy;
import org.apache.lucene.search.SecondOrderCollectorCites;
import org.apache.lucene.search.SecondOrderCollectorCitesRAM;
import org.apache.lucene.search.SecondOrderCollectorCitingTheMostCited;
import org.apache.lucene.search.SecondOrderCollectorOperatorExpertsCiting;
import org.apache.lucene.search.SecondOrderCollectorTopN;
import org.apache.lucene.search.SecondOrderQuery;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.SolrCacheWrapper;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.join.JoinUtil;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.spans.SpanPositionRangeQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.AqpFunctionQParser;
import org.apache.solr.search.BoostQParserPlugin;
import org.apache.solr.search.CitationLRUCache;
import org.apache.solr.search.DisMaxQParserPlugin;
import org.apache.solr.search.ExtendedDismaxQParserPlugin;
import org.apache.solr.search.FieldQParserPlugin;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.FunctionQParserPlugin;
import org.apache.solr.search.FunctionRangeQParserPlugin;
import org.apache.solr.search.LuceneQParserPlugin;
import org.apache.solr.search.NestedQParserPlugin;
import org.apache.solr.search.OldLuceneQParserPlugin;
import org.apache.solr.search.PrefixQParserPlugin;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.RawQParserPlugin;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SortSpecParsing;
import org.apache.solr.search.SpatialBoxQParserPlugin;
import org.apache.solr.search.SpatialFilterQParserPlugin;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.servlet.SolrRequestParsers;
import org.apache.solr.uninverting.UninvertingReader;
/**
* I know this is confusing. This is called in the building phase,
* by that time all the parsing was already done. All the parsers
* here return a QUERY
*
* @see AqpFunctionQueryBuilderProvider
*/
public class AqpAdsabsSubQueryProvider implements
AqpFunctionQueryBuilderProvider {
public static Map<String, AqpSubqueryParser> parsers = new HashMap<String, AqpSubqueryParser>();
//TODO: make configurable
static String[] citationSearchIdField = new String[]{"bibcode", "alternate_bibcode"};
static String citationSearchRefField = "reference";
private static LuceneCacheWrapper<NumericDocValues> getLuceneCache(FunctionQParser fp, String fieldname) throws SyntaxError {
LuceneCacheWrapper<NumericDocValues> cacheWrapper;
SchemaField field = fp.getReq().getSchema().getField(fieldname);
try {
cacheWrapper = LuceneCacheWrapper.getFloatCache(
"cite_read_boost", UninvertingReader.Type.SORTED_SET_FLOAT,
fp.getReq().getSearcher().getSlowAtomicReader());
} catch (IOException e) {
throw new SyntaxError("Naughty, naughty server error", e);
}
return cacheWrapper;
}
static {
/* @api.doc
*
* def lucene(query):
* """
* Default Lucene query parser
* """
*/
parsers.put(LuceneQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), LuceneQParserPlugin.NAME);
return q.getQuery();
}
});
/**
* comment XXX
*/
parsers.put(OldLuceneQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), OldLuceneQParserPlugin.NAME);
return q.getQuery();
}
});
parsers.put(FunctionQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), FunctionQParserPlugin.NAME);
return q.getQuery();
}
});
parsers.put(PrefixQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), PrefixQParserPlugin.NAME);
return q.getQuery();
}
});
parsers.put(BoostQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), BoostQParserPlugin.NAME);
return q.getQuery();
}
});
parsers.put(DisMaxQParserPlugin.NAME, new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), DisMaxQParserPlugin.NAME);
return simplify(q.getQuery());
}
});
parsers.put(ExtendedDismaxQParserPlugin.NAME, new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), ExtendedDismaxQParserPlugin.NAME);
return simplify(q.getQuery());
}
});
parsers.put(FieldQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), FieldQParserPlugin.NAME);
return q.getQuery();
}
});
parsers.put(RawQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
String qstr = fp.getString();
if (!qstr.substring(0,2).equals("{!")) {
throw new SyntaxError(
"Raw query parser requires you to specify local params, eg: raw({!f=field}"+fp.getString()+")");
}
QParser q = fp.subQuery(qstr, RawQParserPlugin.NAME);
return q.getQuery();
}
});
parsers.put(NestedQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), NestedQParserPlugin.NAME);
return q.getQuery();
}
});
parsers.put(FunctionRangeQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), FunctionRangeQParserPlugin.NAME);
return q.getQuery();
}
});
parsers.put(SpatialFilterQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), SpatialFilterQParserPlugin.NAME);
return q.getQuery();
}
});
parsers.put(SpatialBoxQParserPlugin.NAME, new AqpSubqueryParser() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), SpatialBoxQParserPlugin.NAME);
return q.getQuery();
}
});
/* @api.doc
*
* def trending(query):
* """
* Finds the 200 most interesting papers first, then uses
* this initial set to collect *all* readers of these papers
* and then finds other docs these readers read.
*
* Technical note: we are using modified MoreLikeThis
* functionality, with the following parameters:
*
* - setMinTermFrequency(0)
* - setMinDocFreq(2)
* - setMaxQueryTerms(200)
* - setBoost(2.0f)
* - setPercentTermsToMatch(0.0f)
*
* @since 40.2.0.0
*
* """
* return "trending(%s)" % query
*/
// coreads(Q) - what people read: MoreLikeThese(topn(200,classic_relevance(Q)))
parsers.put("trending", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser aqp = fp.subQuery(fp.getString(), "aqp");
Query innerQuery = aqp.parse();
SolrQueryRequest req = fp.getReq();
SolrIndexSearcher searcher = req.getSearcher();
// find the 200 most interesting papers and collect their readers
SecondOrderQuery discoverMostReadQ = new SecondOrderQuery(innerQuery,
new SecondOrderCollectorTopN(200));
discoverMostReadQ.getcollector().setFinalValueType(FinalValueType.ABS_COUNT);
final StringBuilder readers = new StringBuilder();
final HashSet<String> fieldsToLoad = new HashSet<String>();
final String fieldName = "reader";
fieldsToLoad.add(fieldName);
try {
searcher.search(discoverMostReadQ, new SimpleCollector() {
private Document d;
private LeafReader reader;
private boolean firstPassed = false;
@Override
public void setScorer(Scorer scorer) throws IOException {
//pass
}
@Override
public void collect(int doc) throws IOException {
d = reader.document(doc, fieldsToLoad);
for (String val: d.getValues(fieldName)) {
if (firstPassed)
readers.append(" ");
readers.append(val);
firstPassed = true;
}
}
@Override
public void doSetNextReader(LeafReaderContext context)
throws IOException {
this.reader = context.reader();
}
@Override
public boolean needsScores() {
return true;
}
});
} catch (IOException e) {
throw new SyntaxError(e.getMessage(), e);
}
MoreLikeThisQuery mlt = new MoreLikeThisQuery(readers.toString(), new String[] {fieldName},
new WhitespaceAnalyzer(), fieldName);
// configurable params
mlt.setMinTermFrequency(0);
mlt.setMinDocFreq(2);
mlt.setMaxQueryTerms(200);
mlt.setPercentTermsToMatch(0.0f);
//try {
// Query q = mlt.rewrite(req.getSearcher().getIndexReader());
// System.out.println(q);
//} catch (IOException e) {
//}
return new BoostQuery(mlt, 2.0f);
}
});
/* @api.doc
*
* def pos(query, start, end=None):
* """
* Positional search; returns only documents that
* are in the given position (range).
*
* Example:
*
* ```pos(author:accomazzi, 1)``` finds the papers
* where 'accomazzi' is the first author
*
* ```pos(author:accomazzi, 1, 1)``` finds the papers
* where 'accomazzi' is the only author
*
* ```pos(author:accomazzi, 1, 5)``` finds the papers
* where 'accomazzi' is listed as 1st-5th author
*
* Technical note:
*
* This query will work only for indexes that contain
* positional information, such as: title, author. It
* will not work for other indexes, such as bibcode,
* keyword. Though we'll still allow you to query
* them (even if it is useless).
*
*
* Syntax note:
*
* The old ADS Classic syntax was: ```^accomazzi$```
* where ```^``` means *first* and ```$``` means *last*.
* ADS Classic cannot search for position ranges, but
* the new system cannot search for the last (yet). It
* is low priority now.
*
* @since 40.2.0.0
*
* """
* return "pos(%s, %s, %s)" % (query, start, end or start)
*/
parsers.put("pos", new AqpSubqueryParserFull() {
@Override
public Query parse(FunctionQParser fp) throws SyntaxError {
Query query = fp.parseNestedQuery();
int start = fp.parseInt();
int end = start;
if (fp.hasMoreArguments()) {
end = fp.parseInt();
}
if (fp.hasMoreArguments()) {
throw new NestedParseException("Wrong number of arguments");
}
assert start > 0;
assert start <= end;
SpanConverter converter = new SpanConverter();
converter.setWrapNonConvertible(true);
// a field can have a different positionIncrementGap
int positionIncrementGap = 1;
if (fp.getReq() != null) {
IndexSchema schema = fp.getReq().getSchema();
SchemaField field = schema.getFieldOrNull(query.toString().split(":")[0]);
if (field != null) {
FieldType fType = field.getType();
//if (!fType.isMultiValued()) {
// throw new SyntaxError("The positional search doesn't make sense for: " + query);
//}
positionIncrementGap = fType.getIndexAnalyzer().getPositionIncrementGap(field.getName());
if (positionIncrementGap == 0)
positionIncrementGap = 1;
}
}
SpanQuery spanQuery;
try {
spanQuery = converter.getSpanQuery(new SpanConverterContainer(query, 1, true));
} catch (QueryNodeException e) {
SyntaxError ex = new SyntaxError(e.getMessage(), e);
ex.setStackTrace(e.getStackTrace());
throw ex;
}
return new SpanPositionRangeQuery(spanQuery, (start-1)*positionIncrementGap , end*positionIncrementGap); //lucene counts from zeroes
}
});
/* @api.doc
*
* def classic_relevance(query, ratio=0.5):
* """
* Toy-implementation of the ADS Classic relevance score
* algorithm. You can wrap any query and obtain the
* hits sorted in the ADS Classic ways (sort of)
*
* Technical note:
*
* This is inefficient and not to be used in production.
* We apply the **boost factor** that was computed beforehand
* by ADS Classic to each document that matches. (We are not
* scoring docs that are not selected by Lucene).
* The boost factor is inside ```cite_read_boost``` field -
* we'll use cache to retrieve these values fast,
* but it is still inefficient
*
*
* ADS Classic score is implemented as:
*
* ```new_score = (0.5 * norm(lucene_score)) + (0.5 * cite_read_boost)```
*
* where:
*
* norm(LS) = normalized score (in this case it will be a Lucene
* score, normalized to be in the range 1-0, where
* 1 = the first, best hit; LS/MaximumLuceneScore
*
* cite_read_boost = the document boosts are combination of
* normalized reads and cites:
* ```cite_read_boost = log(1 + cites + norm_reads)```
*
* where:
*
* ```norm_reads``` are normalized values for
* reads over the past two years
*
*
*
* @experimental
* @synonym cr()
* @since 40.2.2.0
* @since 40.3.0.1 - added parameter to configure ratio
*
* """
* return "classic_relevance(%s, %0.2f)" % (query,ratio)
*/
parsers.put("classic_relevance", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();
float ratio = 0.5f;
if (fp.hasMoreArguments()) {
ratio = fp.parseFloat();
}
if (ratio < 0 || ratio > 1.0f) {
throw new SyntaxError("The ratio must be in the range 0.0-1.0");
}
@SuppressWarnings("unchecked")
SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
LuceneCacheWrapper<NumericDocValues> boostWrapper = getLuceneCache(fp, "cite_read_boost");
return new SecondOrderQuery(innerQuery,
new SecondOrderCollectorAdsClassicScoringFormula(citationsWrapper, boostWrapper, ratio));
}
});
parsers.put("cr", parsers.get("classic_relevance"));
/* @api.doc
*
* def topn(max, query, spec=None):
* """
* Limit results to the best top N (by their ranking or sort order)
*
* @param max
* - integer, how many results should be considered
* @param query
* - query object
* @param spec
* - str, can be either 'relevance' or
* sort specification in the SOLR format
*
* Example:
*
* ```topn(200, title:hubble)``` returns only the
* first 200 papers based on the relevancy score
*
* ```topn(200, citations(title:hubble), citation_count desc)```
* returns only the
* first 200 papers, but because the results are
* sorted by number of citations, you will get the first
* 200 most cited papers
*
*
* Technical note:
*
* We do not impose limit of hits that you can return with
* this operator. But you must be aware that the query is
* going to be slower than normal queries.
*
* @since 40.2.2.0
* """
* return "topn(%s, %s, '%s')" % (int(max), query, spec or 'score')
*
*/
parsers.put("topn", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
int topN = -1;
try {
topN = fp.parseInt();
}
catch (NumberFormatException e) {
throw new SyntaxError("The function signature is topn(int, query, [sort order]). Error: " + e.getMessage());
}
if (topN < 1) { //|| topN > 50000 - previously, i was limiting the fields
throw new SyntaxError("Hmmm, the first argument of your operator must be a positive number.");
}
QParser eqp = fp.subQuery(fp.parseId(), "aqp");
Query innerQuery = eqp.getQuery();
if (innerQuery == null) {
throw new SyntaxError("This query is empty: " + eqp.getString());
}
String sortOrRank = "score";
if (fp.hasMoreArguments()) {
sortOrRank = fp.parseId();
}
sortOrRank = sortOrRank.toLowerCase();
if (sortOrRank.contains("\"") || sortOrRank.contains("\'")) {
sortOrRank = sortOrRank.substring(1, sortOrRank.length()-1);
}
if (sortOrRank.equals("score")) {
return new SecondOrderQuery(innerQuery,
new SecondOrderCollectorTopN(topN));
}
else {
SortSpec sortSpec = SortSpecParsing.parseSortSpec(sortOrRank, fp.getReq());
SolrIndexSearcher searcher = fp.getReq().getSearcher();
TopFieldCollector collector;
try {
collector = TopFieldCollector.create(searcher.weightSort(sortSpec.getSort()), topN, false, true, true);
} catch (IOException e) {
throw new SyntaxError("I am sorry, you can't use " + sortOrRank + " for topn() sorting. Reason: " + e.getMessage());
}
return new SecondOrderQuery(innerQuery,
new SecondOrderCollectorTopN(sortOrRank, topN, collector));
}
}
});
/* @api.doc
*
* def citations(query):
* """
* Finds set of papers that have **P** in their reference list
*
* 'P' is the set of papers that will be selected by the query
*
* Example:
*
* ```citations(title:hubble)``` returns papers (potentionally
* hundreds of thousands!) that are citing papers P
*
*
* ```citations(citations(author:huchra))``` returns papers
* (potentionally millions!) that are citing papers that
* are citing papers written by 'huchra'
*
*
* Technical note:
*
* We have optimized this query so that it works well with
* millions of hits. But don't expect miracles. 0.5M hits
* takes few hundred milliseconds; 2M hits will take seconds
* (but less than 10s, since that is the speed the old desktop
* did it)
*
*
* @since 40.1.0.0
* """
* return "citations(%s)" % (query,)
*
*/
parsers.put("citations", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();
@SuppressWarnings("unchecked")
SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
return new SecondOrderQuery(innerQuery,
new SecondOrderCollectorCitedBy(citationsWrapper), false);
}
});
/* @api.doc
*
* def references(query):
* """
* Finds set of papers that **are** in the references list of **P**
*
* 'P' is the set of papers that will be selected by the query
*
* Example:
*
* ```references(title:hubble)``` returns papers (potentionally
* few hundred) that are **cited by** papers that have 'hubble'
* in their title
*
*
* ```references(author:huchra)``` returns papers
* that your favorite author cites
*
*
* Technical note:
*
* The same caveats as citations()
*
*
* @since 40.1.0.0
* """
* return "references(%s)" % (query,)
*
*/
parsers.put("references", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();
@SuppressWarnings("unchecked")
SolrCacheWrapper<CitationLRUCache<Object, Integer>> referencesWrapper = new SolrCacheWrapper.ReferencesCache(
(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
return new SecondOrderQuery(innerQuery,
new SecondOrderCollectorCitesRAM(referencesWrapper), false);
}
});
/* @api.doc
*
* def joincitations(query):
* """
* Equivalent of citations() but implemented using lucene block-join
*
*
* @experimental
* @access devel
* @since 40.1.0.0
* """
* return "joincitations(%s)" % (query,)
*/
parsers.put("joincitations", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();
SolrQueryRequest req = fp.getReq();
try {
// XXX: not sure if i can use several fields: citationSearchIdField
return JoinUtil.createJoinQuery("bibcode", false, "reference", innerQuery,
req.getSearcher(), ScoreMode.Avg);
} catch (IOException e) {
throw new SyntaxError(e.getMessage());
}
}
});
/* @api.doc
*
* def joinreferences(query):
* """
* Equivalent of references() but implemented using lucene block-join
*
*
* @experimental
* @access devel
* @since 40.1.0.0
* """
* return "joinreferences(%s)" % (query,)
*
*/
parsers.put("joinreferences", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();
SolrQueryRequest req = fp.getReq();
try {
return JoinUtil.createJoinQuery("reference", true, "bibcode", innerQuery,
req.getSearcher(), ScoreMode.None); // will not work properly iff mode=Avg|Max
} catch (IOException e) {
throw new SyntaxError(e.getMessage());
}
}
});
/* @api.doc
*
* def useful(query):
* """
* What experts are citing; this mimics the ADS Classic implementation
* ```references(topn(200, classic_relevance(Q)))```
*
* In other words, this will first find papers using the inner query,
* it will re-score them using the ADS classic ranking formula,
* then selects 200 top papers. And then get **references from** these
* 200 papers.
*
* @experimental
* @since 40.2.0.0
* """
* return "useful(%s)" % (query,)
*
*/
parsers.put("useful", new AqpSubqueryParserFull() { // this function values can be analyzed
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();
@SuppressWarnings("unchecked")
SolrCacheWrapper<CitationLRUCache<Object, Integer>> referencesWrapper = new SolrCacheWrapper.ReferencesCache(
(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
LuceneCacheWrapper<NumericDocValues> boostWrapper = getLuceneCache(fp, "cite_read_boost");
SecondOrderQuery outerQuery = new SecondOrderQuery( // references
new SecondOrderQuery( // topn
new SecondOrderQuery(innerQuery, // classic_relevance
new SecondOrderCollectorAdsClassicScoringFormula(referencesWrapper, boostWrapper)),
new SecondOrderCollectorTopN(200)),
new SecondOrderCollectorCitesRAM(referencesWrapper));
outerQuery.getcollector().setFinalValueType(FinalValueType.ABS_COUNT_NORM);
return outerQuery;
};
});
/* @api.doc
*
* def useful2(query):
* """
* What experts are citing; original implementation of useful()
* -- using special collector
*
* Technical details:
*
* This function will add the cite_read_boost factor (from the
* 1st order set) to the score (of the 2nd order result set).
* If no boost factor is available, doc will be penalized by
* having its score lowered by 20%
*
* @access devel
* @experimental
* @since 40.1.2.0
* """
* return "useful2(%s)" % (query,)
*
*/
parsers.put("useful2", new AqpSubqueryParserFull() { // this function values can be analyzed
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();
@SuppressWarnings("unchecked")
SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
//TODO: make configurable the name of the field
LuceneCacheWrapper<NumericDocValues> boostWrapper = getLuceneCache(fp, "cite_read_boost");
return new SecondOrderQuery(innerQuery,
new SecondOrderCollectorOperatorExpertsCiting(citationsWrapper, boostWrapper));
}
});
/* @api.doc
*
* def reviews(query):
* """
* What is cited by experts; this mimics the ADS Classic implementation
* is: ```citations(topn(200, classic_relevance(Q)))```
*
* In other words, this will first find papers using the query,
* it will re-score them using the ADS classic ranking formula,
* then selects 200 top papers. And then get **citations for** these
* 200 papers.
*
* @experimental
* @since 40.2.0.0
* """
* return "reviews(%s)" % (query,)
*
*/
parsers.put("reviews", new AqpSubqueryParserFull() { // this function values can be analyzed
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();
@SuppressWarnings("unchecked")
SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
LuceneCacheWrapper<NumericDocValues> boostWrapper = getLuceneCache(fp, "cite_read_boost");
SecondOrderQuery outerQuery = new SecondOrderQuery( // citations
new SecondOrderQuery( // topn
new SecondOrderQuery(innerQuery, // classic_relevance
new SecondOrderCollectorAdsClassicScoringFormula(citationsWrapper, boostWrapper)),
new SecondOrderCollectorTopN(200)),
new SecondOrderCollectorCitedBy(citationsWrapper));
outerQuery.getcollector().setFinalValueType(FinalValueType.ABS_COUNT);
return outerQuery;
};
});
/* @api.doc
*
* def instructive(query):
* """
* The synonym of @see reviews
* """
* return reviews(query)
*/
parsers.put("instructive", parsers.get("reviews"));
// original impl of reviews() = find papers that cite the most cited papers
parsers.put("reviews2", new AqpSubqueryParserFull() { // this function values can be analyzed
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();
@SuppressWarnings("unchecked")
SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
LuceneCacheWrapper<NumericDocValues> boostWrapper = getLuceneCache(fp, "cite_read_boost");
return new SecondOrderQuery(innerQuery,
new SecondOrderCollectorCitingTheMostCited(citationsWrapper, boostWrapper));
}
});
parsers.put("citis", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();
@SuppressWarnings("unchecked")
SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
return new SecondOrderQuery(innerQuery,
new SecondOrderCollectorCites(citationsWrapper, new String[] {citationSearchRefField}), false);
}
});
parsers.put("aqp", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), "aqp");
return q.getQuery();
}
});
parsers.put("adismax", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
QParser q = fp.subQuery(fp.getString(), "adismax");
return simplify(q.getQuery());
}
});
parsers.put("edismax_nonanalyzed", new AqpSubqueryParserFull() { // used for nodes that were already analyzed
public Query parse(FunctionQParser fp) throws SyntaxError {
final String original = fp.getString();
QParser ep = fp.subQuery("xxx", "adismax");
Query q = ep.getQuery();
QParser fakeParser = new QParser(original, null, null, null) {
@Override
public Query parse() throws SyntaxError {
String[] parts = getString().split(":");
return new TermQuery(new Term(parts[0], original));
}
};
return simplify(reParse(q, fakeParser, TermQuery.class));
}
});
parsers.put("edismax_combined_aqp", new AqpSubqueryParserFull() { // will decide whether new aqp() parse is needed
public Query parse(FunctionQParser fp) throws SyntaxError {
final String original = fp.getString();
QParser eqp = fp.subQuery(original, "adismax");
Query q = eqp.getQuery();
return simplify(q);
}
protected Query swimDeep(DisjunctionMaxQuery query) throws SyntaxError {
List<Query> parts = query.getDisjuncts();
for (int i=0;i<parts.size();i++) {
Query oldQ = parts.get(i);
String field = null;
if (oldQ instanceof TermQuery) {
field = toBeAnalyzedAgain(((TermQuery) oldQ));
}
else if(oldQ instanceof BooleanQuery) {
List<BooleanClause>clauses = ((BooleanQuery) oldQ).clauses();
if (clauses.size()>0) {
Query firstQuery = clauses.get(0).getQuery();
if (firstQuery instanceof TermQuery) {
field = toBeAnalyzedAgain(((TermQuery) firstQuery));
}
}
}
if (field!=null) {
parts.set(i, reAnalyze(field, getParser().getString(),
oldQ.getClass().isInstance(BoostQuery.class) ? ((BoostQuery)oldQ).getBoost() : null));
}
else {
parts.set(i, swimDeep(oldQ));
}
}
return query;
}
private String toBeAnalyzedAgain(TermQuery q) {
//String f = q.getTerm().field();
//if (f.equals("author")) {
// return "author";
//}
return null;
//return f; // always re-analyze
}
private Query reAnalyze(String field, String value, Float boost) throws SyntaxError {
QParser fParser = getParser();
System.out.println(field+ ":"+fParser.getString() + "|value=" + value);
QParser aqp = fParser.subQuery(field+ ":"+fParser.getString(), "aqp");
Query q = aqp.getQuery();
if (boost != null && boost != 1.0f) {
q = new BoostQuery(q, boost);
}
return q;
}
});
parsers.put("edismax_always_aqp", new AqpSubqueryParserFull() { // will use edismax to create top query, but the rest is done by aqp
public Query parse(FunctionQParser fp) throws SyntaxError {
final String original = fp.getString();
QParser eqp = fp.subQuery("xxx", "adismax");
fp.setString(original);
Query q = eqp.getQuery();
return simplify(reParse(q, fp, (Class<?>)null));
}
protected Query swimDeep(DisjunctionMaxQuery query) throws SyntaxError {
List<Query> parts = query.getDisjuncts();
for (int i=0;i<parts.size();i++) {
Query oldQ = parts.get(i);
String field = null;
if (oldQ instanceof TermQuery) {
field = ((TermQuery)oldQ).getTerm().field();
}
else if(oldQ instanceof BooleanQuery) {
List<BooleanClause>clauses = ((BooleanQuery) oldQ).clauses();
if (clauses.size()>0) {
Query firstQuery = clauses.get(0).getQuery();
if (firstQuery instanceof TermQuery) {
field = ((TermQuery) firstQuery).getTerm().field();
}
}
}
if (field!=null) {
parts.set(i, reAnalyze(field, getParser().getString(),
oldQ.getClass().isInstance(BoostQuery.class) ? ((BoostQuery)oldQ).getBoost() : null));
}
else {
parts.set(i, swimDeep(oldQ));
}
}
return query;
}
private Query reAnalyze(String field, String value, Float boost) throws SyntaxError {
QParser fParser = getParser();
QParser aqp = fParser.subQuery(field+ ":"+fParser.getString(), "aqp");
Query q = aqp.getQuery();
if (boost != null && boost != 1.0f) {
q = new BoostQuery(q, boost);
}
return q;
}
});
parsers.put("tweak", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
String configuration = fp.parseId();
Query q = fp.parseNestedQuery();
MultiMapSolrParams params = SolrRequestParsers.parseQueryString(configuration);
if (params.get("collector_final_value", null) != null) {
String cfv = params.get("collector_final_value", "avg");
if (q instanceof SecondOrderQuery) {
SecondOrderCollector collector = ((SecondOrderQuery) q).getcollector();
try {
collector.setFinalValueType(SecondOrderCollector.FinalValueType.valueOf(cfv));
}
catch (IllegalArgumentException e) {
throw new SyntaxError("Wrong parameter: " + e.getMessage(), e);
}
}
}
return q;
}
});
// helper method; SOLR is not warming up caches when index is opened first time
// so we have to do it ourselves
parsers.put("warm_cache", new AqpSubqueryParserFull() {
@SuppressWarnings("unchecked")
public Query parse(FunctionQParser fp) throws SyntaxError {
final SolrQueryRequest req = fp.getReq();
@SuppressWarnings("rawtypes")
final CitationLRUCache cache = (CitationLRUCache) req.getSearcher().getCache("citations-cache");
if (!cache.isWarmingOrWarmed()) {
cache.warm(req.getSearcher(), cache);
}
return new MatchNoDocsQuery();
}
});
};
/**
* comment ZZZZZ
*/
public AqpFunctionQueryBuilder getBuilder(String funcName, QueryNode node, QueryConfigHandler config)
throws QueryNodeException {
AqpSubqueryParser provider = parsers.get(funcName);
if (provider == null)
return null;
AqpRequestParams reqAttr = config.get(AqpAdsabsQueryConfigHandler.ConfigurationKeys.SOLR_REQUEST);
SolrQueryRequest req = reqAttr.getRequest();
if (req == null)
return null;
SolrParams localParams = reqAttr.getLocalParams();
if (localParams == null) {
localParams = new ModifiableSolrParams();
}
else {
localParams = new ModifiableSolrParams(localParams);
}
if (localParams.get(QueryParsing.DEFTYPE, null) == null) {
((ModifiableSolrParams) localParams).set(QueryParsing.DEFTYPE, "aqp");
}
AqpFunctionQParser parser = new AqpFunctionQParser("", localParams,
reqAttr.getParams(), req);
return new AqpSubQueryTreeBuilder(provider, parser);
}
/*
private void getSpan(QueryNode node, Integer[] span) {
List<QueryNode> children = node.getChildren();
swimDeep(children.get(0), span);
swimDeep(children.get(children.size()-1), span);
}
private void swimDeep(QueryNode node, Integer[] span) {
if (node instanceof AqpANTLRNode) {
int i = ((AqpANTLRNode) node).getTokenStart();
int j = ((AqpANTLRNode) node).getTokenEnd();
if(j>i) {
if (i != -1 && i < span[0]) {
span[0] = i;
}
if (j != -1 && j > span[1]) {
span[1] = j;
}
}
}
if (!node.isLeaf()) {
for (QueryNode child: node.getChildren()) {
swimDeep(child, span);
}
}
}
*/
}