AqpAdsabsSubQueryProvider.java example

Explorer
montysolr-master
- contrib
package org.apache.lucene.queryparser.flexible.aqp.builders;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.mlt.MoreLikeThisQuery;
import org.apache.lucene.queryparser.flexible.aqp.NestedParseException;
import org.apache.lucene.queryparser.flexible.aqp.config.AqpAdsabsQueryConfigHandler;
import org.apache.lucene.queryparser.flexible.aqp.config.AqpRequestParams;
import org.apache.lucene.queryparser.flexible.aqp.parser.AqpSubqueryParser;
import org.apache.lucene.queryparser.flexible.aqp.parser.AqpSubqueryParserFull;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.LuceneCacheWrapper;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SecondOrderCollector;
import org.apache.lucene.search.SecondOrderCollector.FinalValueType;
import org.apache.lucene.search.SecondOrderCollectorAdsClassicScoringFormula;
import org.apache.lucene.search.SecondOrderCollectorCitedBy;
import org.apache.lucene.search.SecondOrderCollectorCites;
import org.apache.lucene.search.SecondOrderCollectorCitesRAM;
import org.apache.lucene.search.SecondOrderCollectorCitingTheMostCited;
import org.apache.lucene.search.SecondOrderCollectorOperatorExpertsCiting;
import org.apache.lucene.search.SecondOrderCollectorTopN;
import org.apache.lucene.search.SecondOrderQuery;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.SolrCacheWrapper;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.join.JoinUtil;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.spans.SpanPositionRangeQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.MultiMapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.AqpFunctionQParser;
import org.apache.solr.search.BoostQParserPlugin;
import org.apache.solr.search.CitationLRUCache;
import org.apache.solr.search.DisMaxQParserPlugin;
import org.apache.solr.search.ExtendedDismaxQParserPlugin;
import org.apache.solr.search.FieldQParserPlugin;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.FunctionQParserPlugin;
import org.apache.solr.search.FunctionRangeQParserPlugin;
import org.apache.solr.search.LuceneQParserPlugin;
import org.apache.solr.search.NestedQParserPlugin;
import org.apache.solr.search.OldLuceneQParserPlugin;
import org.apache.solr.search.PrefixQParserPlugin;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.RawQParserPlugin;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SortSpecParsing;
import org.apache.solr.search.SpatialBoxQParserPlugin;
import org.apache.solr.search.SpatialFilterQParserPlugin;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.servlet.SolrRequestParsers;
import org.apache.solr.uninverting.UninvertingReader;


/**
 * I know this is confusing. This is called in the building phase,
 * by that time all the parsing was already done. All the parsers
 * here return a QUERY
 * 
 * @see AqpFunctionQueryBuilderProvider
 */
public class AqpAdsabsSubQueryProvider implements
AqpFunctionQueryBuilderProvider {

	
	public static Map<String, AqpSubqueryParser> parsers = new HashMap<String, AqpSubqueryParser>();

	//TODO: make configurable
	static String[] citationSearchIdField = new String[]{"bibcode", "alternate_bibcode"};
	static String citationSearchRefField = "reference";
	
	private static LuceneCacheWrapper<NumericDocValues> getLuceneCache(FunctionQParser fp, String fieldname) throws SyntaxError {
	  LuceneCacheWrapper<NumericDocValues> cacheWrapper;
    SchemaField field = fp.getReq().getSchema().getField(fieldname);
    try {
      cacheWrapper = LuceneCacheWrapper.getFloatCache(
          "cite_read_boost", UninvertingReader.Type.SORTED_SET_FLOAT, 
          fp.getReq().getSearcher().getSlowAtomicReader());
    } catch (IOException e) {
      throw new SyntaxError("Naughty, naughty server error", e);
    }
    return cacheWrapper;
	}
	
	static {
		
		/* @api.doc
		 * 
		 * def lucene(query):
		 * 		"""
		 *    Default Lucene query parser
		 * 		"""
		 */
		parsers.put(LuceneQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), LuceneQParserPlugin.NAME);
				return q.getQuery();
			}
		});
		/**
		 * comment XXX
		 */
		parsers.put(OldLuceneQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), OldLuceneQParserPlugin.NAME);
				return q.getQuery();

			}
		});
		parsers.put(FunctionQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), FunctionQParserPlugin.NAME);
				return q.getQuery();
			}
		});
		parsers.put(PrefixQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), PrefixQParserPlugin.NAME);
				return q.getQuery();
			}
		});
		parsers.put(BoostQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), BoostQParserPlugin.NAME);
				return q.getQuery();
			}
		});
		parsers.put(DisMaxQParserPlugin.NAME, new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), DisMaxQParserPlugin.NAME);
				return simplify(q.getQuery());
			}
		});
		parsers.put(ExtendedDismaxQParserPlugin.NAME, new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), ExtendedDismaxQParserPlugin.NAME);
				return simplify(q.getQuery());
			}
		});
		parsers.put(FieldQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), FieldQParserPlugin.NAME);
				return q.getQuery();
			}
		});
		parsers.put(RawQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {
				String qstr = fp.getString();
				if (!qstr.substring(0,2).equals("{!")) {
					throw new SyntaxError(
							"Raw query parser requires you to specify local params, eg: raw({!f=field}"+fp.getString()+")");
				}
				QParser q = fp.subQuery(qstr, RawQParserPlugin.NAME);
				return q.getQuery();
			}
		});
		parsers.put(NestedQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), NestedQParserPlugin.NAME);
				return q.getQuery();
			}
		});
		parsers.put(FunctionRangeQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), FunctionRangeQParserPlugin.NAME);
				return q.getQuery();
			}
		});
		parsers.put(SpatialFilterQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), SpatialFilterQParserPlugin.NAME);
				return q.getQuery();
			}
		});
		parsers.put(SpatialBoxQParserPlugin.NAME, new AqpSubqueryParser() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser q = fp.subQuery(fp.getString(), SpatialBoxQParserPlugin.NAME);
				return q.getQuery();
			}
		});

		/* @api.doc
		 * 
		 * def trending(query):
		 * 		"""
		 *    Finds the 200 most interesting papers first, then uses
		 *    this initial set to collect *all* readers of these papers
		 *    and then finds other docs these readers read.
		 *    
		 *    Technical note: we are using modified MoreLikeThis
		 *    functionality, with the following parameters:
		 *    
		 *     - setMinTermFrequency(0)
		 *		 - setMinDocFreq(2)
		 *		 - setMaxQueryTerms(200)
		 *     - setBoost(2.0f)
		 *     - setPercentTermsToMatch(0.0f)
		 *     
		 *    @since 40.2.0.0 
		 * 		
		 * 		"""
		 *    return "trending(%s)" % query
		 */
		// coreads(Q) - what people read: MoreLikeThese(topn(200,classic_relevance(Q)))
		parsers.put("trending", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				QParser aqp = fp.subQuery(fp.getString(), "aqp");
				Query innerQuery = aqp.parse();

				SolrQueryRequest req = fp.getReq();
				SolrIndexSearcher searcher = req.getSearcher();

				// find the 200 most interesting papers and collect their readers
				SecondOrderQuery discoverMostReadQ = new SecondOrderQuery(innerQuery, 
						new SecondOrderCollectorTopN(200));
				discoverMostReadQ.getcollector().setFinalValueType(FinalValueType.ABS_COUNT);

				final StringBuilder readers = new StringBuilder();
				final HashSet<String> fieldsToLoad = new HashSet<String>();
				final String fieldName = "reader";
				fieldsToLoad.add(fieldName);

				try {
					searcher.search(discoverMostReadQ, new SimpleCollector() {
						private Document d;
						private LeafReader reader;
						private boolean firstPassed = false;
						@Override
						public void setScorer(Scorer scorer) throws IOException {
							//pass
						}
						@Override
						public void collect(int doc) throws IOException {
							d = reader.document(doc, fieldsToLoad);
							for (String val: d.getValues(fieldName)) {
								if (firstPassed)
									readers.append(" ");
								readers.append(val);
								firstPassed = true;
							}
						}

						@Override
						public void doSetNextReader(LeafReaderContext context)
						throws IOException {
							this.reader = context.reader();
						}
            @Override
            public boolean needsScores() {
              return true;
            }
					});
				} catch (IOException e) {
					throw new SyntaxError(e.getMessage(), e);
				}

				MoreLikeThisQuery mlt = new MoreLikeThisQuery(readers.toString(), new String[] {fieldName}, 
						new WhitespaceAnalyzer(), fieldName);

				// configurable params
				mlt.setMinTermFrequency(0);
				mlt.setMinDocFreq(2);
				mlt.setMaxQueryTerms(200);
				mlt.setPercentTermsToMatch(0.0f);

				//try {
					//  Query q = mlt.rewrite(req.getSearcher().getIndexReader());
				//  System.out.println(q);
				//} catch (IOException e) {
				//}

				return new BoostQuery(mlt, 2.0f);
			}
		});

		
		/* @api.doc
		 * 
		 * def pos(query, start, end=None):
		 * 		"""
		 *    Positional search; returns only documents that
		 *    are in the given position (range).
		 *    
		 *    Example: 
		 *    
		 *    	```pos(author:accomazzi, 1)``` finds the papers
		 *    			where 'accomazzi' is the first author
		 *      
		 *      ```pos(author:accomazzi, 1, 1)``` finds the papers
		 *          where 'accomazzi' is the only author
		 *          
		 *      ```pos(author:accomazzi, 1, 5)``` finds the papers
		 *          where 'accomazzi' is listed as 1st-5th author
		 *      
		 *    Technical note:
		 *    
		 *    This query will work only for indexes that contain
		 *    positional information, such as: title, author. It
		 *    will not work for other indexes, such as bibcode,
		 *    keyword. Though we'll still allow you to query 
		 *    them (even if it is useless).
		 *    
		 *    
		 *    Syntax note:
		 *    
		 *    The old ADS Classic syntax was: ```^accomazzi$```
		 *    where ```^``` means *first* and ```$``` means *last*.
		 *    ADS Classic cannot search for position ranges, but
		 *    the new system cannot search for the last (yet). It
		 *    is low priority now. 
		 *     
		 *    @since 40.2.0.0 
		 * 		
		 * 		"""
		 *    return "pos(%s, %s, %s)" % (query, start, end or start)
		 */
		parsers.put("pos", new AqpSubqueryParserFull() {
			@Override
			public Query parse(FunctionQParser fp) throws SyntaxError {
				Query query = fp.parseNestedQuery();
				int start = fp.parseInt();
				int end = start;

				if (fp.hasMoreArguments()) {
					end = fp.parseInt();
				}

				if (fp.hasMoreArguments()) {
					throw new NestedParseException("Wrong number of arguments");
				}

				assert start > 0;
				assert start <= end;

				SpanConverter converter = new SpanConverter();
				converter.setWrapNonConvertible(true);
				
				// a field can have a different positionIncrementGap
				int positionIncrementGap = 1;
				if (fp.getReq() != null) {
					IndexSchema schema = fp.getReq().getSchema();
					SchemaField field = schema.getFieldOrNull(query.toString().split(":")[0]);
					if (field != null) {
						FieldType fType = field.getType();
						//if (!fType.isMultiValued()) {
						//	throw new SyntaxError("The positional search doesn't make sense for: " + query);
						//}
						positionIncrementGap = fType.getIndexAnalyzer().getPositionIncrementGap(field.getName());
						if (positionIncrementGap == 0)
							positionIncrementGap = 1;
					}
				}
				

				SpanQuery spanQuery;
				try {
					spanQuery = converter.getSpanQuery(new SpanConverterContainer(query, 1, true));
				} catch (QueryNodeException e) {
					SyntaxError ex = new SyntaxError(e.getMessage(), e);
					ex.setStackTrace(e.getStackTrace());
					throw ex;
				}

				return new SpanPositionRangeQuery(spanQuery, (start-1)*positionIncrementGap , end*positionIncrementGap); //lucene counts from zeroes
			}
		});

		/* @api.doc
		 * 
		 * def classic_relevance(query, ratio=0.5):
		 * 		"""
		 *    Toy-implementation of the ADS Classic relevance score
		 *    algorithm. You can wrap any query and obtain the 
		 *    hits sorted in the ADS Classic ways (sort of)
		 *    
		 *    Technical note:
		 *    
		 *    This is inefficient and not to be used in production. 
		 *    We apply the **boost factor** that was computed beforehand
		 *    by ADS Classic to each document that matches. (We are not
		 *    scoring docs that are not selected by Lucene). 
		 *    The boost factor is inside ```cite_read_boost``` field - 
		 *    we'll use cache to retrieve these values fast, 
		 *    but it is still inefficient 
		 *    
		 *
		 *    ADS Classic score is implemented as:
		 * 
		 *    ```new_score = (0.5 * norm(lucene_score)) + (0.5 * cite_read_boost)```
		 * 
		 *    where:
		 *    
		 *       norm(LS) = normalized score (in this case it will be a Lucene
		 *                  score, normalized to be in the range 1-0, where 
		 *                  1 = the first, best hit; LS/MaximumLuceneScore
		 *                  
		 *       cite_read_boost = the document boosts are combination of 
		 *                  normalized reads and cites: 
		 *                  ```cite_read_boost = log(1 + cites + norm_reads)```
		 *                  
		 *                  where:
		 *                  
		 *                  	```norm_reads``` are normalized values for 
		 *                    reads over the past two years
		 *                    
		 *                    
		 *     
		 *    @experimental
		 *    @synonym cr()
		 *    @since 40.2.2.0
		 *    @since 40.3.0.1 - added parameter to configure ratio
		 * 		
		 * 		"""
		 *    return "classic_relevance(%s, %0.2f)" % (query,ratio)
		 */
		parsers.put("classic_relevance", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {

				Query innerQuery = fp.parseNestedQuery();
				float ratio = 0.5f;
				if (fp.hasMoreArguments()) {
					ratio = fp.parseFloat();
				}
				
				if (ratio < 0 || ratio > 1.0f) {
					throw new SyntaxError("The ratio must be in the range 0.0-1.0");
				}
				
				@SuppressWarnings("unchecked")
				SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
						(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
				
				LuceneCacheWrapper<NumericDocValues> boostWrapper = getLuceneCache(fp, "cite_read_boost");
				
				return new SecondOrderQuery(innerQuery, 
						new SecondOrderCollectorAdsClassicScoringFormula(citationsWrapper, boostWrapper, ratio));
			}
		});
		parsers.put("cr", parsers.get("classic_relevance"));


		/* @api.doc
		 * 
		 * def topn(max, query, spec=None):
		 * 		"""
		 *    Limit results to the best top N (by their ranking or sort order)
		 *    
		 *    @param max
		 *    	- integer, how many results should be considered
		 *    @param query
		 *    	- query object
		 *    @param spec
		 *    	- str, can be either 'relevance' or
		 *        sort specification in the SOLR format
		 *    
		 *    Example: 
		 *    
		 *    	```topn(200, title:hubble)``` returns only the
		 *         first 200 papers based on the relevancy score
		 *         
		 *      ```topn(200, citations(title:hubble), citation_count desc)``` 
		 *         returns only the
		 *         first 200 papers, but because the results are 
		 *         sorted by number of citations, you will get the first
		 *         200 most cited papers
		 *         
		 *      
		 *    Technical note:
		 *    
		 *    We do not impose limit of hits that you can return with 
		 *    this operator. But you must be aware that the query is
		 *    going to be slower than normal queries.
		 *    
		 *    @since 40.2.2.0
		 *    """
		 *    return "topn(%s, %s, '%s')" % (int(max), query, spec or 'score')
		 *    
		 */
		parsers.put("topn", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {
				int topN = -1;
				try {
					topN = fp.parseInt();
				}
				catch (NumberFormatException e) {
					throw new SyntaxError("The function signature is topn(int, query, [sort order]). Error: " + e.getMessage());
				}

				if (topN < 1) {  //|| topN > 50000 - previously, i was limiting the fields
					throw new SyntaxError("Hmmm, the first argument of your operator must be a positive number.");
				}

				QParser eqp = fp.subQuery(fp.parseId(), "aqp");
				Query innerQuery = eqp.getQuery();

				if (innerQuery == null) {
					throw new SyntaxError("This query is empty: " + eqp.getString());
				}

				String sortOrRank = "score"; 
				if (fp.hasMoreArguments()) {
					sortOrRank = fp.parseId();
				}

				sortOrRank = sortOrRank.toLowerCase();

				if (sortOrRank.contains("\"") || sortOrRank.contains("\'")) {
					sortOrRank = sortOrRank.substring(1, sortOrRank.length()-1);
				}


				if (sortOrRank.equals("score")) {
					return new SecondOrderQuery(innerQuery, 
							new SecondOrderCollectorTopN(topN));
				}
				else {
					SortSpec sortSpec = SortSpecParsing.parseSortSpec(sortOrRank, fp.getReq());

					SolrIndexSearcher searcher = fp.getReq().getSearcher();

					TopFieldCollector collector;
					try {
						collector = TopFieldCollector.create(searcher.weightSort(sortSpec.getSort()), topN, false, true, true);
					} catch (IOException e) {
						throw new SyntaxError("I am sorry, you can't use " + sortOrRank + " for topn() sorting. Reason: " + e.getMessage());
					}

					return new SecondOrderQuery(innerQuery, 
							new SecondOrderCollectorTopN(sortOrRank, topN, collector));
				}
			}
		});

		/* @api.doc
		 * 
		 * def citations(query):
		 * 		"""
		 *    Finds set of papers that have **P** in their reference list
		 *    
		 *    'P' is the set of papers that will be selected by the query
		 *    
		 *    Example: 
		 *    
		 *    	```citations(title:hubble)``` returns papers (potentionally
		 *         hundreds of thousands!) that are citing papers P
		 *         
		 *         
		 *      ```citations(citations(author:huchra))``` returns papers 
		 *         (potentionally millions!) that are citing papers that
		 *         are citing papers written by 'huchra'
		 *         
		 *      
		 *    Technical note:
		 *    
		 *    We have optimized this query so that it works well with 
		 *    millions of hits. But don't expect miracles. 0.5M hits
		 *    takes few hundred milliseconds; 2M hits will take seconds
		 *    (but less than 10s, since that is the speed the old desktop
		 *    did it)
		 *    
		 *    
		 *    @since 40.1.0.0
		 *    """
		 *    return "citations(%s)" % (query,)
		 *    
		 */
		parsers.put("citations", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				Query innerQuery = fp.parseNestedQuery();
				
				@SuppressWarnings("unchecked")
				SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
						(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
								
				return new SecondOrderQuery(innerQuery, 
						new SecondOrderCollectorCitedBy(citationsWrapper), false);
			}
		});


		/* @api.doc
		 * 
		 * def references(query):
		 * 		"""
		 *    Finds set of papers that **are** in the references list of **P** 
		 *    
		 *    'P' is the set of papers that will be selected by the query
		 *    
		 *    Example: 
		 *    
		 *    	```references(title:hubble)``` returns papers (potentionally
		 *         few hundred) that are **cited by** papers that have 'hubble'
		 *         in their title
		 *         
		 *         
		 *      ```references(author:huchra)``` returns papers 
		 *         that your favorite author cites
		 *         
		 *      
		 *    Technical note:
		 *    
		 *    The same caveats as citations()
		 *    
		 *    
		 *    @since 40.1.0.0
		 *    """
		 *    return "references(%s)" % (query,)
		 *    
		 */
		parsers.put("references", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				Query innerQuery = fp.parseNestedQuery();
				
				@SuppressWarnings("unchecked")
        SolrCacheWrapper<CitationLRUCache<Object, Integer>> referencesWrapper = new SolrCacheWrapper.ReferencesCache(
						(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
				
				
				return new SecondOrderQuery(innerQuery, 
						new SecondOrderCollectorCitesRAM(referencesWrapper), false);
			}
		});

		/* @api.doc
		 * 
		 * def joincitations(query):
		 * 		"""
		 *    Equivalent of citations() but implemented using lucene block-join 
		 *    
		 *    
		 *    @experimental
		 *    @access devel
		 *    @since 40.1.0.0
		 *    """
		 *    return "joincitations(%s)" % (query,) 
		 */
		parsers.put("joincitations", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				Query innerQuery = fp.parseNestedQuery();
				SolrQueryRequest req = fp.getReq();
				try {
					// XXX: not sure if i can use several fields: citationSearchIdField
					return JoinUtil.createJoinQuery("bibcode", false, "reference", innerQuery, 
							req.getSearcher(), ScoreMode.Avg);
				} catch (IOException e) {
					throw new SyntaxError(e.getMessage());
				}
			}
		});

		/* @api.doc
		 * 
		 * def joinreferences(query):
		 * 		"""
		 *    Equivalent of references() but implemented using lucene block-join 
		 *    
		 *    
		 *    @experimental
		 *    @access devel
		 *    @since 40.1.0.0
		 *    """
		 *    return "joinreferences(%s)" % (query,)
		 *     
		 */
		parsers.put("joinreferences", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				Query innerQuery = fp.parseNestedQuery();
				SolrQueryRequest req = fp.getReq();
				try {
					return JoinUtil.createJoinQuery("reference", true, "bibcode", innerQuery, 
							req.getSearcher(), ScoreMode.None); // will not work properly iff mode=Avg|Max
				} catch (IOException e) {
					throw new SyntaxError(e.getMessage());
				}
			}
		});


		/* @api.doc
		 * 
		 * def useful(query):
		 * 		"""
		 *    What experts are citing; this mimics the ADS Classic implementation
		 *    ```references(topn(200, classic_relevance(Q)))```
		 *    
		 *    In other words, this will first find papers using the inner query, 
		 *    it will re-score them using the ADS classic ranking formula,
		 *    then selects 200 top papers. And then get **references from** these
		 *    200 papers.
		 *    
		 *    @experimental
		 *    @since 40.2.0.0
		 *    """
		 *    return "useful(%s)" % (query,)
		 *     
		 */
		parsers.put("useful", new AqpSubqueryParserFull() { // this function values can be analyzed
			public Query parse(FunctionQParser fp) throws SyntaxError {          
				Query innerQuery = fp.parseNestedQuery();
				
				@SuppressWarnings("unchecked")
        SolrCacheWrapper<CitationLRUCache<Object, Integer>> referencesWrapper = new SolrCacheWrapper.ReferencesCache(
						(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
				
				LuceneCacheWrapper<NumericDocValues> boostWrapper = getLuceneCache(fp, "cite_read_boost");
				
				SecondOrderQuery outerQuery = new SecondOrderQuery( // references
						new SecondOrderQuery( // topn
								new SecondOrderQuery(innerQuery, // classic_relevance
										new SecondOrderCollectorAdsClassicScoringFormula(referencesWrapper, boostWrapper)), 
										new SecondOrderCollectorTopN(200)),
										new SecondOrderCollectorCitesRAM(referencesWrapper));
				outerQuery.getcollector().setFinalValueType(FinalValueType.ABS_COUNT_NORM);
				return outerQuery;
			};
		});

		/* @api.doc
		 * 
		 * def useful2(query):
		 * 		"""
		 *    What experts are citing; original implementation of useful() 
		 *    -- using special collector
		 *    
		 *    Technical details:
		 *    
		 *    This function will add the cite_read_boost factor (from the
		 *    1st order set) to the score (of the 2nd order result set).
		 *    If no boost factor is available, doc will be penalized by 
		 *    having its score lowered by 20%
		 *    
		 *    @access devel
		 *    @experimental
		 *    @since 40.1.2.0
		 *    """
		 *    return "useful2(%s)" % (query,)
		 *     
		 */
		parsers.put("useful2", new AqpSubqueryParserFull() { // this function values can be analyzed
			public Query parse(FunctionQParser fp) throws SyntaxError {          
				Query innerQuery = fp.parseNestedQuery();
				
				@SuppressWarnings("unchecked")
				SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
						(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
				
				//TODO: make configurable the name of the field				
				LuceneCacheWrapper<NumericDocValues> boostWrapper = getLuceneCache(fp, "cite_read_boost");

				return new SecondOrderQuery(innerQuery, 
						new SecondOrderCollectorOperatorExpertsCiting(citationsWrapper, boostWrapper));
			}
		});


		/* @api.doc
		 * 
		 * def reviews(query):
		 * 		"""
		 *    What is cited by experts; this mimics the ADS Classic implementation
		 *    is: ```citations(topn(200, classic_relevance(Q)))```
		 *    
		 *    In other words, this will first find papers using the query, 
		 *    it will re-score them using the ADS classic ranking formula,
		 *    then selects 200 top papers. And then get **citations for** these
		 *    200 papers.
		 *    
		 *    @experimental
		 *    @since 40.2.0.0
		 *    """
		 *    return "reviews(%s)" % (query,)
		 *     
		 */
		parsers.put("reviews", new AqpSubqueryParserFull() { // this function values can be analyzed
			public Query parse(FunctionQParser fp) throws SyntaxError {          
				Query innerQuery = fp.parseNestedQuery();
				
				@SuppressWarnings("unchecked")
				SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
						(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
				
				LuceneCacheWrapper<NumericDocValues> boostWrapper = getLuceneCache(fp, "cite_read_boost");
				
				SecondOrderQuery outerQuery = new SecondOrderQuery( // citations
						new SecondOrderQuery( // topn
								new SecondOrderQuery(innerQuery, // classic_relevance
										new SecondOrderCollectorAdsClassicScoringFormula(citationsWrapper, boostWrapper)), 
										new SecondOrderCollectorTopN(200)),
										new SecondOrderCollectorCitedBy(citationsWrapper));
				outerQuery.getcollector().setFinalValueType(FinalValueType.ABS_COUNT);
				return outerQuery;
			};
		});
		
		
		/* @api.doc
		 * 
		 * def instructive(query):
		 * 		"""
		 *    The synonym of @see reviews
		 *    """
		 *    return reviews(query)
		 */
		parsers.put("instructive", parsers.get("reviews"));

		// original impl of reviews() = find papers that cite the most cited papers
		parsers.put("reviews2", new AqpSubqueryParserFull() { // this function values can be analyzed
			public Query parse(FunctionQParser fp) throws SyntaxError {          
				Query innerQuery = fp.parseNestedQuery();
				
				@SuppressWarnings("unchecked")
				SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
						(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
				
				LuceneCacheWrapper<NumericDocValues> boostWrapper = getLuceneCache(fp, "cite_read_boost");
				
				return new SecondOrderQuery(innerQuery, 
						new SecondOrderCollectorCitingTheMostCited(citationsWrapper, boostWrapper));
			}
		});
		parsers.put("citis", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {    		  
				Query innerQuery = fp.parseNestedQuery();
				
				@SuppressWarnings("unchecked")
				SolrCacheWrapper<CitationLRUCache<Object, Integer>> citationsWrapper = new SolrCacheWrapper.CitationsCache(
						(CitationLRUCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));
				
				return new SecondOrderQuery(innerQuery, 
						new SecondOrderCollectorCites(citationsWrapper, new String[] {citationSearchRefField}), false);

			}
		});
		parsers.put("aqp", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {          
				QParser q = fp.subQuery(fp.getString(), "aqp");
				return q.getQuery();
			}
		});

		parsers.put("adismax", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {          
				QParser q = fp.subQuery(fp.getString(), "adismax");
				return simplify(q.getQuery());
			}
		});

		parsers.put("edismax_nonanalyzed", new AqpSubqueryParserFull() { // used for nodes that were already analyzed
			public Query parse(FunctionQParser fp) throws SyntaxError {
				final String original = fp.getString();
				QParser ep = fp.subQuery("xxx", "adismax");
				Query q = ep.getQuery();
				QParser fakeParser = new QParser(original, null, null, null) {
					@Override
					public Query parse() throws SyntaxError {
						String[] parts = getString().split(":");
						return new TermQuery(new Term(parts[0], original));
					}
				};
				return simplify(reParse(q, fakeParser, TermQuery.class));
			}
		});
		parsers.put("edismax_combined_aqp", new AqpSubqueryParserFull() { // will decide whether new aqp() parse is needed
			public Query parse(FunctionQParser fp) throws SyntaxError {
				final String original = fp.getString();
				QParser eqp = fp.subQuery(original, "adismax");
				Query q = eqp.getQuery();
				return simplify(q);
			}
			protected Query swimDeep(DisjunctionMaxQuery query) throws SyntaxError {
				List<Query> parts = query.getDisjuncts();
				for (int i=0;i<parts.size();i++) {
					Query oldQ = parts.get(i);
					String field = null;
					if (oldQ instanceof TermQuery) {
						field = toBeAnalyzedAgain(((TermQuery) oldQ));
					}
					else if(oldQ instanceof BooleanQuery) {
						List<BooleanClause>clauses = ((BooleanQuery) oldQ).clauses();
						if (clauses.size()>0) {
							Query firstQuery = clauses.get(0).getQuery();
							if (firstQuery instanceof TermQuery) {
								field = toBeAnalyzedAgain(((TermQuery) firstQuery));
							}
						}
					}
					if (field!=null) {
						parts.set(i, reAnalyze(field, getParser().getString(), 
						    oldQ.getClass().isInstance(BoostQuery.class) ? ((BoostQuery)oldQ).getBoost() : null));
					}
					else {
						parts.set(i, swimDeep(oldQ));
					}
				}
				return query;
			}

			private String toBeAnalyzedAgain(TermQuery q) {
				//String f = q.getTerm().field();
				//if (f.equals("author")) {
				//  return "author";
				//}
				return null;
				//return f; // always re-analyze
			}
			private Query reAnalyze(String field, String value, Float boost) throws SyntaxError {
				QParser fParser = getParser();
				System.out.println(field+ ":"+fParser.getString() + "|value=" + value);
				QParser aqp = fParser.subQuery(field+ ":"+fParser.getString(), "aqp");
				Query q = aqp.getQuery();
				if (boost != null && boost != 1.0f) {
				  q = new BoostQuery(q, boost);
				}
				return q;
			}
		});
		parsers.put("edismax_always_aqp", new AqpSubqueryParserFull() { // will use edismax to create top query, but the rest is done by aqp
			public Query parse(FunctionQParser fp) throws SyntaxError {
				final String original = fp.getString();
				QParser eqp = fp.subQuery("xxx", "adismax");
				fp.setString(original);
				Query q = eqp.getQuery();
				return simplify(reParse(q, fp, (Class<?>)null));
			}
			protected Query swimDeep(DisjunctionMaxQuery query) throws SyntaxError {
				List<Query> parts = query.getDisjuncts();
				for (int i=0;i<parts.size();i++) {
					Query oldQ = parts.get(i);
					String field = null;
					if (oldQ instanceof TermQuery) {
						field = ((TermQuery)oldQ).getTerm().field();
					}
					else if(oldQ instanceof BooleanQuery) {
						List<BooleanClause>clauses = ((BooleanQuery) oldQ).clauses();
						if (clauses.size()>0) {
							Query firstQuery = clauses.get(0).getQuery();
							if (firstQuery instanceof TermQuery) {
								field = ((TermQuery) firstQuery).getTerm().field();
							}
						}
					}
					if (field!=null) {
						parts.set(i, reAnalyze(field, getParser().getString(), 
						    oldQ.getClass().isInstance(BoostQuery.class) ? ((BoostQuery)oldQ).getBoost() : null));
					}
					else {
						parts.set(i, swimDeep(oldQ));
					}
				}
				return query;
			}

			private Query reAnalyze(String field, String value, Float boost) throws SyntaxError {
				QParser fParser = getParser();
				QParser aqp = fParser.subQuery(field+ ":"+fParser.getString(), "aqp");
				Query q = aqp.getQuery();
				if (boost != null && boost != 1.0f) {
          q = new BoostQuery(q, boost);
        }
				return q;
			}
		});

		parsers.put("tweak", new AqpSubqueryParserFull() {
			public Query parse(FunctionQParser fp) throws SyntaxError {

				String configuration = fp.parseId();
				Query q = fp.parseNestedQuery();

				MultiMapSolrParams params = SolrRequestParsers.parseQueryString(configuration);

				if (params.get("collector_final_value", null) != null) {
					String cfv = params.get("collector_final_value", "avg");
					if (q instanceof SecondOrderQuery) {
						SecondOrderCollector collector = ((SecondOrderQuery) q).getcollector();
						try {
							collector.setFinalValueType(SecondOrderCollector.FinalValueType.valueOf(cfv));
						}
						catch (IllegalArgumentException e) {
							throw new SyntaxError("Wrong parameter: " + e.getMessage(), e);
						}
					}
				}
				return q;
			}
		});
		
	  // helper method; SOLR is not warming up caches when index is opened first time
		// so we have to do it ourselves
		parsers.put("warm_cache", new AqpSubqueryParserFull() {
			@SuppressWarnings("unchecked")
      public Query parse(FunctionQParser fp) throws SyntaxError {

				final SolrQueryRequest req = fp.getReq();
				@SuppressWarnings("rawtypes")
				final CitationLRUCache cache = (CitationLRUCache) req.getSearcher().getCache("citations-cache");
				if (!cache.isWarmingOrWarmed()) {
					cache.warm(req.getSearcher(), cache);
				}
				return new MatchNoDocsQuery();
			}
		});
			
	};
	
	/**
	 * comment ZZZZZ
	 */
	public AqpFunctionQueryBuilder getBuilder(String funcName, QueryNode node, QueryConfigHandler config) 
	throws QueryNodeException {


		AqpSubqueryParser provider = parsers.get(funcName);
		if (provider == null)
			return null;

		AqpRequestParams reqAttr = config.get(AqpAdsabsQueryConfigHandler.ConfigurationKeys.SOLR_REQUEST);

		SolrQueryRequest req = reqAttr.getRequest();
		if (req == null)
			return null;


		SolrParams localParams = reqAttr.getLocalParams();
		if (localParams == null) {
			localParams = new ModifiableSolrParams();
		}
		else {
			localParams = new ModifiableSolrParams(localParams);
		}

		if (localParams.get(QueryParsing.DEFTYPE, null) == null) {
			((ModifiableSolrParams) localParams).set(QueryParsing.DEFTYPE, "aqp");
		}
		AqpFunctionQParser parser = new AqpFunctionQParser("", localParams, 
				reqAttr.getParams(), req);
		return new AqpSubQueryTreeBuilder(provider, parser);

	}

	/*
	private void getSpan(QueryNode node, Integer[] span) {
		List<QueryNode> children = node.getChildren();
		swimDeep(children.get(0), span);
		swimDeep(children.get(children.size()-1), span);
	}
	

	private void swimDeep(QueryNode node, Integer[] span) {

		if (node instanceof AqpANTLRNode) {
			int i = ((AqpANTLRNode) node).getTokenStart();
			int j = ((AqpANTLRNode) node).getTokenEnd();

			if(j>i) {
				if (i != -1 && i < span[0]) {
					span[0] = i;
				}
				if (j != -1 && j > span[1]) {
					span[1] = j;
				}
			}
		}
		if (!node.isLeaf()) {
			for (QueryNode child: node.getChildren()) {
				swimDeep(child, span);
			}
		}

	}
	*/

}