package org.cdlib.xtf.textEngine.facet;
/**
* Copyright (c) 2006, Regents of the University of California
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the University of California nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Acknowledgements:
*
* A significant amount of new and/or modified code in this module
* was made possible by a grant from the Andrew W. Mellon Foundation,
* as part of the Melvyl Recommender Project.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.IntList;
import org.apache.lucene.util.Prime;
import org.cdlib.xtf.util.FloatList;
import org.cdlib.xtf.util.TagChars;
import org.cdlib.xtf.util.Trace;
/**
* Implements a dynamic mapping from document to a FRBR-style title/author key.
*
* @author Martin Haye
*/
public class FRBRGroupData extends DynamicGroupData
{
/** Original parameter string */
@SuppressWarnings("unused")
private String params;
/** Tag/doc data for the specified fields */
private FRBRData data;
/** IDs of matching documents */
private IntList docs = new IntList();
/** Highest doc ID encountered */
private int maxDoc = 0;
/** Score of each matching document */
private FloatList docScores = new FloatList();
/** Mapping of documents to groups */
private IntList docGroups;
/** First document in each group (for sorting purposes) */
private IntList groupDocs;
/** Number of documents in each group */
private IntList groupDocCounts;
/** Score of each group */
private FloatList groupScores;
/** Number of groups created so far */
private int nGroups = 1; // group 0 is always the root
/** Primary field to sort by */
private int primarySort = FRBRData.TYPE_TITLE;
/** Whether primary sort is in reverse order */
private boolean reversePrimarySort = false;
/**
* Read in the FRBR data for the a delimited list of fields.
*/
public void init(IndexReader indexReader, Set tokFields, String params)
throws IOException
{
// Record the input
this.params = params;
// Break the string of parameters into a list of fields.
StringTokenizer t = new StringTokenizer(params, " \t,;|");
ArrayList<String> fields = new ArrayList<String>(t.countTokens());
while (t.hasMoreTokens())
{
String tok = t.nextToken();
if (tok.startsWith("["))
{
if (tok.equals("[sort=title]"))
primarySort = FRBRData.TYPE_TITLE;
else if (tok.equals("[sort=author]"))
primarySort = FRBRData.TYPE_AUTHOR;
else if (tok.equals("[sort=date]"))
primarySort = FRBRData.TYPE_DATE;
else if (tok.equals("[sort=-date]")) {
primarySort = FRBRData.TYPE_DATE;
reversePrimarySort = true;
}
else if (tok.equals("[sort=id]"))
primarySort = FRBRData.TYPE_ID;
else
throw new RuntimeException("Unknown control marker: " + tok);
}
else
{
// Our algorithms fail badly on tokenized fields, so flag that.
if (tokFields.contains(tok))
throw new RuntimeException("XTF's FRBR algorithms cannot work with tokenized fields, e.g. '" + tok + "'");
// Field is okay, add it to our list.
fields.add(tok);
}
}
// And fetch the doc/tag data for those fields.
data = FRBRData.getCachedTags(indexReader, fields.toArray(new String[fields.size()]));
}
/**
* Add a document (that matched the query) to our data.
*/
public void collect(int doc, float score) {
assert docs.isEmpty() || docs.getLast() < doc : "docs out of order";
docs.add(doc);
docScores.add(score);
maxDoc = Math.max(maxDoc, doc);
} // collect()
/**
* Form the final FRBR groups for the document set.
*/
public void finish()
{
Trace.debug("Building FRBR groups for " + docs.size() + " docs...");
Trace.tab();
// Save space in the document and score lists.
docs.compact();
docScores.compact();
// Figure out a group for each document.
docGroups = new IntList(maxDoc + 1);
docGroups.fill(-1);
for (int i = 0; i < docs.size(); i++)
{
int doc = docs.get(i);
// Skip docs that already have a group assigned.
if (docGroups.get(doc) >= 0)
continue;
// Go looking...
findGroup(doc);
}
Trace.debug(nGroups + " groups. Inverting map...");
// Form the count and score lists.
groupDocs = new IntList(nGroups);
groupDocCounts = new IntList(nGroups);
groupScores = new FloatList(nGroups);
for (int i = 0; i < docs.size(); i++)
{
int doc = docs.get(i);
float score = docScores.get(i);
int group = docGroups.get(doc);
assert group >= 0 : "group should have been assigned";
if (groupDocs.get(group) == 0)
groupDocs.set(group, doc);
groupDocCounts.set(group, groupDocCounts.get(group) + 1);
groupScores.set(group, Math.max(groupScores.get(group), score));
groupScores.set(0, Math.max(groupScores.get(0), score));
}
groupDocCounts.set(0, docs.size());
Trace.debug("Done.");
Trace.untab();
} // finish()
/**
* Figure out a group to put the document in. If it matches other documents,
* the group will contain all of them; otherwise, it'll be a singleton.
*
* @param mainDoc Document to put into a group
*/
private void findGroup(int mainDoc)
{
// This document will be its own group, but hopefully we can add more
// documents to that group.
//
docGroups.set(mainDoc, nGroups++);
// Our starting point is the title(s) of the current document.
for (int pos = data.docTags.firstPos(mainDoc); pos >= 0;
pos = data.docTags.nextPos(pos))
{
int mainTitle = data.docTags.getValue(pos);
if (data.tags.getType(mainTitle) != FRBRData.TYPE_TITLE)
continue;
// Scan forward looking for matching titles. Do compare the main title,
// since other documents may match that title exactly.
//
int compTitle = mainTitle;
while (compTitle >= 0) {
if (!matchOnTitle(mainDoc, mainTitle, compTitle))
break;
compTitle = data.tags.next(compTitle);
}
// Scan backward through the titles in like manner.
compTitle = data.tags.prev(mainTitle);
while (compTitle >= 0) {
if (!matchOnTitle(mainDoc, mainTitle, compTitle))
break;
compTitle = data.tags.prev(compTitle);
}
} // for title
} // findGroup()
/**
* Determines if the two titles match enough to warrant further examination,
* and if so, continues the matching process on documents from the
* comparable title.
*
* @param mainDoc main document being matched
* @param mainTitle main doc's title tag
* @param compTitle title tag to compare
* @return true if title iteration should continue.
*/
private boolean matchOnTitle(int mainDoc, int mainTitle, int compTitle)
{
// If they don't match exactly, check for match before colon. If that
// doesn't match either, stop the iteration.
///
if (mainTitle != compTitle && !matchPartialTitle(mainTitle, compTitle))
return false;
// Okay, iterate all the documents that match on title (except the main
// doc which of course matches itself.)
//
for (int pos = data.tagDocs.firstPos(compTitle); pos >= 0;
pos = data.tagDocs.nextPos(pos))
{
int compDoc = data.tagDocs.getValue(pos);
if (compDoc == mainDoc)
continue;
// If the document isn't in our query set, skip it.
if (docs.binarySearch(compDoc) < 0)
continue;
// If it's already in a group, skip it (hopefully this is rare)
if (docGroups.get(compDoc) >= 0)
{
if (docGroups.get(compDoc) != docGroups.get(mainDoc))
{
// hopefully rare
}
continue;
}
// See if it's close enough to call it a match.
if (!multiFieldMatch(mainDoc, compDoc))
continue;
// Okay, we got a live one. Put it in the same group as the main doc.
int group = docGroups.get(mainDoc);
docGroups.set(compDoc, group);
}
// Continue title iteration, since the title matched (even if no docs
// matched).
//
return true;
} // matchOnTitle()
// Instance variables to avoid re-allocation for each iteration.
private IntList matchTags1 = new IntList();
private IntList matchTags2 = new IntList();
/**
* Compare the fields of two documents to determine if they should be in
* the same FRBR group.
*
* @param doc1 First document
* @param doc2 Second document
* @return true if they're equivalent
*/
private boolean multiFieldMatch(int doc1, int doc2)
{
int titleScore = 0;
int authorScore = 0;
int dateScore = 0;
int idScore = 0;
int p1 = data.docTags.firstPos(doc1);
int tag1 = (p1 >= 0) ? data.docTags.getValue(p1) : -1;
int type1 = (p1 >= 0) ? data.tags.getType(tag1) : 99;
int p2 = data.docTags.firstPos(doc2);
int tag2 = (p2 >= 0) ? data.docTags.getValue(p2) : -1;
int type2 = (p2 >= 0) ? data.tags.getType(tag2) : 99;
// Iterate through each type in turn
while (p1 >= 0 || p2 >= 0)
{
// Pick the next available type to work on.
int curType = Math.min(type1, type2);
assert curType != 99;
// Collect tags from the first doc for the current type.
matchTags1.clear();
while (type1 == curType) {
matchTags1.add(tag1);
p1 = data.docTags.nextPos(p1);
tag1 = (p1 >= 0) ? data.docTags.getValue(p1) : -1;
type1 = (p1 >= 0) ? data.tags.getType(tag1) : 99;
}
// Collect tags from the second doc for the same type.
matchTags2.clear();
while (type2 == curType) {
matchTags2.add(tag2);
p2 = data.docTags.nextPos(p2);
tag2 = (p2 >= 0) ? data.docTags.getValue(p2) : -1;
type2 = (p2 >= 0) ? data.tags.getType(tag2) : 99;
}
// And calculate an appropriate score.
switch (curType)
{
case FRBRData.TYPE_TITLE:
debugFieldMatch("title", doc1, doc2);
titleScore = scoreTitleMatch(matchTags1, matchTags2);
break;
case FRBRData.TYPE_AUTHOR:
debugFieldMatch("author", doc1, doc2);
authorScore = scoreAuthorMatch(matchTags1, matchTags2);
break;
case FRBRData.TYPE_DATE:
debugFieldMatch("date", doc1, doc2);
dateScore = scoreDateMatch(matchTags1, matchTags2);
break;
case FRBRData.TYPE_ID:
debugFieldMatch("id", doc1, doc2);
idScore = scoreIdMatch(matchTags1, matchTags2);
break;
}
} // while
assert p1 < 0 && p2 < 0;
// Is the total score high enough?
int totalScore = titleScore + authorScore + dateScore + idScore;
//if (totalScore >= 150) {
if (false) {
outputDisplayKey("Match: ", doc1);
outputDisplayKey(" vs: ", doc2);
Trace.debug(
" = " + titleScore + "t + " + authorScore + "a + " + dateScore +
"d + " + idScore + "i = " + totalScore);
}
if (totalScore < 150)
return false;
return true;
}
private void debugFieldMatch(String field, int doc1, int doc2)
{
if (true || Trace.getOutputLevel() != Trace.debug)
return;
Trace.debug("Match " + field + ":");
Trace.tab();
Trace.debug("Doc " + doc1);
Trace.tab();
for (int i = 0; i < matchTags1.size(); i++)
Trace.debug(
data.tags.getString(matchTags1.get(i)) + " {tag=" + matchTags1.get(i) +
"}");
Trace.untab();
Trace.debug("Doc " + doc2);
Trace.tab();
for (int i = 0; i < matchTags2.size(); i++)
Trace.debug(
data.tags.getString(matchTags2.get(i)) + " {tag=" + matchTags2.get(i) +
"}");
Trace.untab();
Trace.untab();
}
private void outputDisplayKey(String title, int doc)
{
int nToSkip = 0;
int[] fieldMax = { 0, 50, 40, 4, 30 };
final String spaces = " ";
int found = 0;
do
{
StringBuffer buf = new StringBuffer();
found = 0;
for (int t = FRBRData.FIRST_TYPE; t <= FRBRData.LAST_TYPE; t++)
{
int skipped = 0;
String value = "";
for (int pos = data.docTags.firstPos(doc); pos >= 0;
pos = data.docTags.nextPos(pos))
{
int tag = data.docTags.getValue(pos);
int type = data.tags.getType(tag);
int subType = data.tags.getSubType(tag);
if (type != t)
continue;
if (skipped++ == nToSkip) {
value = data.tags.getString(tag) + " [" + subType + "]";
found++;
}
}
int lenToKeep = Math.min(value.length(), fieldMax[t]);
if (buf.length() > 0)
buf.append(" | ");
buf.append(value.substring(0, lenToKeep) +
spaces.substring(0, fieldMax[t] - lenToKeep));
} // for
if (found > 0 || nToSkip == 0) {
Trace.debug(title + buf);
title = spaces.substring(0, title.length());
++nToSkip;
}
} while (found > 0);
} // outputDisplayKey()
private TagChars chars1 = new TagChars();
private TagChars chars2 = new TagChars();
/**
* Score the potential match of two lists of titles.
*/
private int scoreTitleMatch(IntList list1, IntList list2)
{
// If both lists are empty, it's no foul, no score.
if (list1.isEmpty() && list2.isEmpty())
return 0;
// See how many match exactly, and how many we need to skip.
int p1 = 0;
// See how many match exactly, and how many we need to skip.
int p2 = 0;
final int size1 = list1.size();
final int size2 = list2.size();
int nMatches = 0;
int skipped1 = 0;
int skipped2 = 0;
int maxScore = 100;
while (p1 < size1 && p2 < size2)
{
int tag1 = list1.get(p1);
int tag2 = list2.get(p2);
int subType1 = data.tags.getSubType(tag1);
int subType2 = data.tags.getSubType(tag2);
// If they match exactly, advance.
if (subType1 == subType2)
{
if (tag1 == tag2) {
++nMatches;
++p1;
++p2;
continue;
}
// If they match before a colon, advance.
if (matchPartialTitle(tag1, tag2)) {
++nMatches;
++p1;
++p2;
maxScore = 80;
continue;
}
}
// Okay, figure out which one to skip.
if (tag1 < tag2) {
++skipped1;
++p1;
}
else {
++skipped2;
++p2;
}
}
skipped1 += (size1 - p1);
skipped2 += (size2 - p2);
// Are the lists identical?
if (skipped1 == 0 && skipped2 == 0) {
assert nMatches > 0;
return maxScore;
}
// Is one a subset of the other?
if (nMatches > 0 && (skipped1 == 0 || skipped2 == 0))
return 80;
// Okay, even if there were some matches, there was at least one mismatch.
return -100;
} // scoreTitleMatch()
/**
* Check if one title matches the other without a colon.
*/
private boolean matchPartialTitle(int tag1, int tag2)
{
data.tags.getChars(tag1, chars1);
data.tags.getChars(tag2, chars2);
// If at least 10 chars don't match, don't even try.
int prefixMatch = chars1.prefixMatch(chars2);
if (prefixMatch < 10)
return false;
// Which one has the colon?
int colonPos = chars1.indexOf(':');
if (colonPos >= 10)
return prefixMatch == chars2.length() && prefixMatch >= colonPos;
colonPos = chars2.indexOf(':');
if (colonPos >= 10)
return prefixMatch == chars1.length() && prefixMatch >= colonPos;
return false;
}
/**
* Score the potential match of two lists of authors.
*/
private int scoreAuthorMatch(IntList list1, IntList list2)
{
// If both lists are empty, consider that a bit of good.
if (list1.isEmpty() && list2.isEmpty())
return 75;
// See how many match exactly, and how many we have to skip.
int p1 = 0;
// See how many match exactly, and how many we have to skip.
int p2 = 0;
final int size1 = list1.size();
final int size2 = list2.size();
int nMatches = 0;
int skipped1 = 0;
int skipped2 = 0;
int maxScore = 100;
while (p1 < size1 && p2 < size2)
{
int tag1 = list1.get(p1);
int tag2 = list2.get(p2);
int subType1 = data.tags.getSubType(tag1);
int subType2 = data.tags.getSubType(tag2);
// If they match exactly, advance.
if (subType1 == subType2)
{
if (tag1 == tag2) {
++nMatches;
++p1;
++p2;
continue;
}
// If they match out-of-order, advance.
if (matchPartialAuthor(tag1, tag2)) {
++nMatches;
++p1;
++p2;
maxScore = 80;
continue;
}
}
// Okay, figure out which one to skip.
if (tag1 < tag2) {
++skipped1;
++p1;
}
else {
++skipped2;
++p2;
}
}
skipped1 += (size1 - p1);
skipped2 += (size2 - p2);
// Are the lists identical?
if (skipped1 == 0 && skipped2 == 0) {
assert nMatches > 0;
return maxScore;
}
// Is one a subset of the other?
if (nMatches > 0 && (skipped1 == 0 || skipped2 == 0))
return 80;
// Okay, even if there were some matches, there was at least one mismatch.
return -100;
} // scoreAuthorMatch()
private int wordHashKey = 0;
private static final int WORD_HASH_SIZE = Prime.findAfter(1000000);
private int[] wordHash = new int[WORD_HASH_SIZE];
private static final char[] charType = new char[0x10000];
static
{
// Whitespace
charType[' '] = 'p';
charType['\t'] = 'p';
charType['\n'] = 'p';
charType['\r'] = 'p';
charType['\f'] = 'p';
// Punctuation
charType['\''] = 'p';
charType['"'] = 'p';
charType['.'] = 'p';
charType['&'] = 'p';
charType['@'] = 'p';
charType['-'] = 'p';
charType['/'] = 'p';
charType[','] = 'p';
charType[':'] = 'p';
charType[';'] = 'p';
charType['('] = 'p';
charType[')'] = 'p';
charType['['] = 'p';
charType[']'] = 'p';
};
/**
* Compare two author names to see if the keywords from one are completely
* contained within the other.
*/
private boolean matchPartialAuthor(int tag1, int tag2)
{
// Pick the longer one to start with
data.tags.getChars(tag1, chars1);
data.tags.getChars(tag2, chars2);
if (chars2.length() > chars1.length())
{
int tmp = tag1;
tag1 = tag2;
tag2 = tmp;
TagChars cTmp = chars1;
chars1 = chars2;
chars2 = cTmp;
}
// Advance to the next key value, so we can distinguish old hash values
// from new ones.
//
++wordHashKey;
// Add all the words from the first author to the hash
int i = 0;
while (i < chars1.length())
{
int hashCode = 0;
int nChars = 0;
for (; i < chars1.length(); i++)
{
char c = chars1.charAt(i);
if (charType[c] == 'p') {
i++;
break;
}
hashCode = (hashCode * 31) + c;
++nChars;
}
if (hashCode != 0 && nChars > 3)
wordHash[(hashCode & 0x7FFFFFFF) % WORD_HASH_SIZE] = wordHashKey;
}
// Now check all the words from the second (shorter) author to see if
// they're present
//
i = 0;
int nWords2 = 0;
int nMatch2 = 0;
while (i < chars2.length())
{
int hashCode = 0;
int nChars = 0;
for (; i < chars2.length(); i++)
{
char c = chars2.charAt(i);
if (charType[c] == 'p') {
i++;
break;
}
hashCode = (hashCode * 31) + c;
++nChars;
}
if (hashCode != 0 && nChars > 3) {
++nWords2;
if (wordHash[(hashCode & 0x7FFFFFFF) % WORD_HASH_SIZE] == wordHashKey)
++nMatch2;
}
} // while
// If all the words from the shorter author matched (and there were at least
// two words found), call it good.
return (nWords2 == nMatch2 && nWords2 >= 2);
} // matchPartialAuthor()
/**
* Compare two dates for a match.
*/
@SuppressWarnings("unused")
private int scoreDateMatch(IntList list1, IntList list2)
{
// If no date, don't consider it a problem.
if (list1.isEmpty() || list2.isEmpty())
return 0;
// Since at the moment we're using sort-year, there should be only one.
assert list1.size() == 1;
assert list2.size() == 1;
int tag1 = list1.get(0);
int tag2 = list2.get(0);
// If they're exactly equal, great.
if (tag1 == tag2)
return 50;
// Parse the years
data.tags.getChars(tag1, chars1);
data.tags.getChars(tag2, chars2);
int year1 = parseYear(chars1);
int year2 = parseYear(chars2);
// If either is missing, no match.
if (year1 < 0 || year2 < 0)
return 0;
// If the years are equal, considert that only slightly bad.
if (year1 == year2)
return -20;
// If not equal but still within 2 years, that's a bit worse.
if (Math.abs(year1 - year2) <= 2)
return -40;
// All other cases: no match.
return -60;
} // scoreDateMatch
/**
* Search characters for a series of 4 digits, and consider that a year.
*/
private int parseYear(TagChars chars)
{
int num = 0;
for (int i=0; i<chars.length(); i++) {
char ch = chars.charAt(i);
if (ch >= '0' && ch <= '9') {
num = (num * 10) + (ch - '0');
if (num > 1800 && num < 2100)
return num;
}
else
num = 0;
}
return -99;
}
/**
* Score the potential match of two lists of identifiers.
*/
private int scoreIdMatch(IntList list1, IntList list2)
{
// If both lists are empty, it's no foul, no score.
if (list1.isEmpty() && list2.isEmpty())
return 0;
// See how many match exactly, and how many we need to skip.
int p1 = 0;
int p2 = 0;
final int size1 = list1.size();
final int size2 = list2.size();
int nMatches = 0;
int skipped1 = 0;
int skipped2 = 0;
int maxScore = 100;
while (p1 < size1 && p2 < size2)
{
int tag1 = list1.get(p1);
int tag2 = list2.get(p2);
int subType1 = data.tags.getSubType(tag1);
int subType2 = data.tags.getSubType(tag2);
// If they match exactly, advance.
if (subType1 == subType2)
{
if (tag1 == tag2) {
++nMatches;
++p1;
++p2;
continue;
}
// If they match before a paren, advance.
if (matchPartialId(tag1, tag2)) {
++nMatches;
++p1;
++p2;
maxScore = 80;
continue;
}
}
// Okay, figure out which one to skip.
if (tag1 < tag2) {
++skipped1;
++p1;
}
else {
++skipped2;
++p2;
}
}
skipped1 += (size1 - p1);
skipped2 += (size2 - p2);
// Are the lists identical?
if (skipped1 == 0 && skipped2 == 0) {
assert nMatches > 0;
return maxScore;
}
// Is one a subset of the other?
if (nMatches > 0 && (skipped1 == 0 || skipped2 == 0))
return 80;
// Okay, even if there were some matches, there was at least one mismatch.
// This is pretty common with identifiers, so don't count this as a
// negative.
//
return 0;
} // scoreIdMatch()
/**
* Check if two identifiers match before parentheses
*/
private boolean matchPartialId(int tag1, int tag2)
{
data.tags.getChars(tag1, chars1);
data.tags.getChars(tag2, chars2);
// If at least 6 chars don't match, don't even try.
int prefixMatch = chars1.prefixMatch(chars2);
if (prefixMatch < 6)
return false;
// Which one has the parenthesis?
int parenPos = chars1.indexOf('(');
if (parenPos >= 6)
return prefixMatch == chars2.length() && prefixMatch >= parenPos;
parenPos = chars2.indexOf('(');
if (parenPos >= 6)
return prefixMatch == chars1.length() && prefixMatch >= parenPos;
return false;
}
/**
* Get the field name (synthetic in our case)
*/
public String field() {
return "dynamicFRBR";
}
// inherit JavaDoc
public String name(int groupId) {
return "group-" + groupId;
}
// inherit JavaDoc
public int findGroup(String name) {
if (!name.startsWith("group-"))
return -1;
return Integer.parseInt(name.substring("group-".length()));
}
// inherit JavaDoc
public int child(int groupId) {
return (groupId == 0 && nGroups > 1) ? 1 : -1;
}
// inherit JavaDoc
public int sibling(int groupId) {
return (groupId == 0 || groupId == nGroups - 1) ? -1 : (groupId + 1);
}
// inherit JavaDoc
public int parent(int groupId) {
return (groupId == 0) ? -1 : 0;
}
// inherit JavaDoc
public int nChildren(int groupId) {
return (groupId == 0) ? (nGroups - 1) : 0;
}
// inherit JavaDoc
public int firstLink(int docId) {
return docGroups.get(docId);
}
// inherit JavaDoc
public int nextLink(int linkId) {
return -1;
}
// inherit JavaDoc
public int linkGroup(int linkId) {
return linkId;
}
// inherit JavaDoc
public int nGroups() {
return nGroups;
}
// inherit JavaDoc
public boolean isDynamic() {
return true;
}
// inherit JavaDoc
public int nDocHits(int groupId) {
return groupDocCounts.get(groupId);
}
// inherit JavaDoc
public float score(int groupId) {
return groupScores.get(groupId);
}
// inherit JavaDoc
public final int compare(int group1, int group2)
{
// Are they exactly equal?
if (group1 == group2)
return 0;
// Get the first document in each group.
int doc1 = groupDocs.get(group1);
int doc2 = groupDocs.get(group2);
// First, compare the primary field.
int x;
if ((x = compareField(primarySort, doc1, doc2, reversePrimarySort)) != 0)
return x;
// Now compare the secondary fields, in order.
for (int t = FRBRData.FIRST_TYPE; t <= FRBRData.LAST_TYPE; ++t) {
if (t != primarySort && (x = compareField(t, doc1, doc2, false)) != 0)
return x;
}
// No differences found.
return 0;
}
/** Find the title of a document */
@SuppressWarnings("unused")
private String docTitle(int doc) {
for (int pos = data.docTags.firstPos(doc); pos >= 0;
pos = data.docTags.nextPos(pos)) {
int tag = data.docTags.getValue(pos);
int type = data.tags.getType(tag);
if (type != FRBRData.TYPE_TITLE)
continue;
return data.tags.getString(tag);
}
return "";
}
/** Compare a particular field of two groups */
private int compareField(int type, int doc1, int doc2, boolean reverse)
{
// Locate this field in the first doc.
int tag1 = 0;
for (int pos = data.docTags.firstPos(doc1); pos >= 0 && tag1 == 0;
pos = data.docTags.nextPos(pos)) {
int tag = data.docTags.getValue(pos);
if (data.tags.getType(tag) == type)
tag1 = tag;
}
// ... and locate it in the second doc.
int tag2 = 0;
for (int pos = data.docTags.firstPos(doc2); pos >= 0 && tag2 == 0;
pos = data.docTags.nextPos(pos)) {
int tag = data.docTags.getValue(pos);
if (data.tags.getType(tag) == type)
tag2 = tag;
}
// Make sure docs that don't have an entry sort at the end, not the beginning.
if (tag1 == 0)
tag1 = reverse ? Integer.MIN_VALUE : Integer.MAX_VALUE;
if (tag2 == 0)
tag2 = reverse ? Integer.MIN_VALUE : Integer.MAX_VALUE;
// Now a simple numerical comparison on the tags will do.
if (reverse)
return (tag1 < tag2) ? +1 : ((tag1 > tag2) ? -1 : 0);
else
return (tag1 < tag2) ? -1 : ((tag1 > tag2) ? +1 : 0);
} // compareField
} // class FRBRGroupData