package org.apache.lucene.chunk;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.WeakHashMap;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreDocComparator;
import org.apache.lucene.search.SortComparatorSource;
import org.apache.lucene.search.SortField;
/*
* Similar to a normal string comparator, except that it is optimized for
* the case where most documents (or chunks, in our case) will be null.
* Thus, instead of a huge array indexed by document ID, we keep an array
* list in ID order and binary search it.
*/
public class SparseStringComparator implements SortComparatorSource
{
private static final WeakHashMap cache = new WeakHashMap();
private static final EntryComparator entryComparator = new EntryComparator();
/** Make a comparator for the given field using the given reader */
public ScoreDocComparator newComparator(IndexReader reader, String fieldName)
throws IOException
{
// Check if we have a cache for this reader yet. If not, make one.
Map readerCache = (Map)cache.get(reader);
if (readerCache == null) {
readerCache = new HashMap();
cache.put(reader, readerCache);
}
// Now check if we have a comparator already for this field. If not,
// make one.
//
fieldName = fieldName.intern();
SparseComp comp = (SparseComp)readerCache.get(fieldName);
if (comp == null) {
comp = new SparseComp(reader, fieldName);
readerCache.put(fieldName, comp);
}
// Return the resulting comparator.
return comp;
} // newComparator()
private class SparseComp implements ScoreDocComparator
{
ArrayList entries = new ArrayList(500);
boolean flipEmpty = false;
SparseComp(IndexReader reader, String field)
throws IOException
{
// Grab the flipEmpty modifier if present
if (field.endsWith(":flipEmpty")) {
flipEmpty = true;
field = field.replace(":flipEmpty", "");
}
field = field.intern();
// Iterators for documents and terms.
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms(new Term(field, ""));
int t = 0; // current term number
// Make an entry for each document and each term. Ensure that
// there is only one term in this field per document.
//
HashMap docs = new HashMap();
try
{
if (termEnum.term() == null)
throw new RuntimeException("no terms in field " + field);
do
{
Term term = termEnum.term();
if (term.field() != field)
break;
String termText = term.text();
termDocs.seek(termEnum);
while (termDocs.next())
{
int docId = termDocs.doc();
Integer key = new Integer(docId);
if (docs.get(key) != null) {
throw new RuntimeException(
"A document has more than one term ('" + termText + "', '" +
(String)docs.get(key) + "') in field " + field);
}
docs.put(key, termText);
Entry ent = new Entry();
ent.docId = termDocs.doc();
ent.termText = termText;
ent.order = t;
entries.add(ent);
}
t++;
} while (termEnum.next());
}
finally {
termDocs.close();
termEnum.close();
}
// Now sort the array by document ID.
Collections.sort(entries, entryComparator);
} // constructor
/** Retrieve the entry for a given document, or null if not found. Uses
* an efficient binary search over the array. */
private Entry findEntry(int docId)
{
Entry toFind = new Entry();
toFind.docId = docId;
int index = Collections.binarySearch(entries, toFind, entryComparator);
if (index < 0 || index >= entries.size())
return null;
Entry got = (Entry)entries.get(index);
if (got.docId != docId)
return null;
return got;
} // findEntry()
/**
* Compares two ScoreDoc objects and returns a result indicating their
* sort order.
* @param d1 First ScoreDoc
* @param d2 Second ScoreDoc
* @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
* @see java.util.Comparator
*/
public int compare(ScoreDoc d1, ScoreDoc d2)
{
Entry e1 = findEntry(d1.doc);
Entry e2 = findEntry(d2.doc);
int o1 = (e1 != null) ? e1.order : (flipEmpty ? Integer.MIN_VALUE : Integer.MAX_VALUE);
int o2 = (e2 != null) ? e2.order : (flipEmpty ? Integer.MIN_VALUE : Integer.MAX_VALUE);
if (o1 < o2)
return -1;
else if (o1 > o2)
return 1;
else
return 0;
}
/**
* Returns the value used to sort the given document. The
* object returned must implement the java.io.Serializable
* interface. This is used by multisearchers to determine how to collate results from their searchers.
* @param i Document
* @return Serializable object
*/
public Comparable sortValue(ScoreDoc i) {
Entry ent = findEntry(i.doc);
if (ent != null)
return ent.termText;
return "";
}
/**
* Returns the type of sort. Should return <code>SortField.SCORE</code>, <code>SortField.DOC</code>, <code>SortField.STRING</code>, <code>SortField.INTEGER</code>,
* <code>SortField.FLOAT</code> or <code>SortField.CUSTOM</code>. It is not valid to return <code>SortField.AUTO</code>.
* This is used by multisearchers to determine how to collate results from their searchers.
* @return One of the constants in SortField.
* @see SortField
*/
public int sortType() {
return SortField.CUSTOM;
}
} // class SparseComp
/** A single entry in the sorting table */
class Entry {
int docId;
String termText;
int order;
}
/** Compare two entries for sorting purposes */
static class EntryComparator implements Comparator
{
public int compare(Object o1, Object o2) {
int d1 = ((Entry)o1).docId;
int d2 = ((Entry)o2).docId;
if (d1 < d2)
return -1;
else if (d1 > d2)
return 1;
else
return 0;
}
} // class EntryComparator
} // class SparseStringComparator