package org.cdlib.xtf.textEngine;
/**
* Copyright (c) 2009, Regents of the University of California
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the University of California nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
import java.io.File;
import java.io.IOException;
import javax.servlet.ServletException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.cdlib.xtf.crossQuery.test.TestableCrossQuery;
import org.cdlib.xtf.dynaXML.test.TestableDynaXML;
import org.cdlib.xtf.util.EasyNode;
import org.cdlib.xtf.util.Trace;
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
/**
* This class performs the validation steps for a specified index, checking that
* the results are acceptable. This is used at index time to decide whether
* to rotate in a new index, and also by the servlets to "warm up" a new index
* before presenting it to the user.
*
* @author Martin Haye
*/
public class IndexValidator
{
private TestableCrossQuery crossQuery;
private TestableDynaXML dynaXML;
private int nErrs;
/**
* Run validations for the given index.
*
* @param baseDir XTF home directory
* @param indexPath path to the index data
* @param indexReader Lucene reader for the index
* @return true iff all validations passed
* @throws IOException if the index can't be read
*/
public boolean validate(String baseDir, String indexPath, IndexReader indexReader)
throws IOException
{
// Create the servlets we'll use for testing.
try {
crossQuery = new TestableCrossQuery(baseDir);
crossQuery.overrideIndexDir(indexPath);
dynaXML = new TestableDynaXML(baseDir);
dynaXML.overrideIndexDir(indexPath);
} catch (ServletException e) {
throw new IOException(e.getMessage());
}
// Fetch the index information chunk.
Hits match = new IndexSearcher(indexReader).search(new TermQuery(new Term("indexInfo", "1")));
if (match.length() == 0)
throw new IOException("Index missing indexInfo doc");
assert match.id(0) == 0 : "indexInfo chunk must be first in index";
Document doc = match.doc(0);
// If no validation is specified, we're done.
String validationName = doc.get("validation");
if (validationName == null || validationName.length() == 0)
return true;
// Let the user know we're validating now.
Trace.info(String.format("Validating Index:"));
Trace.tab();
try
{
// Read the validation file.
File validationFile = new File(indexPath, validationName);
EasyNode root = EasyNode.readXMLFile(validationFile);
// Run the validation.
nErrs = 0;
traverse(root, 0);
// If any errors, abort.
if (nErrs > 0) {
Trace.untab();
Trace.error(String.format("Validation failed: %d error(s)", nErrs));
return false;
}
// All checks passed... the index is golden.
Trace.untab();
Trace.info("Done.");
return true;
}
catch (NumberFormatException err) {
Trace.untab();
Trace.error("Validation failed: non-numeric attribute found in validation specification");
return false;
}
catch (Exception err) {
Trace.untab();
Trace.error("Validation failed: " + err.getMessage());
return false;
}
}
/**
* Traverse the validation specification document, visiting each node.
*/
private void traverse(EasyNode node, int level)
throws ValidationError, ServletException, IOException
{
visit(node, level);
for (EasyNode child : node.children())
traverse(child, level+1);
}
/**
* Process one node of the validation specification document.
*/
private void visit(EasyNode node, int level)
throws ValidationError, ServletException, IOException
{
// Forget the outer doc wrapper
if (level == 0)
return;
// Check the root level
if (level == 1) {
String lookFor = "index-validation";
if (!node.name().equals(lookFor))
throw new ValidationError("Root element of validation file must be '<index-validation>'");
return;
}
// The meat is at the second level, so there shouldn't be things at any other level.
else if (level > 2) {
if (!node.isText())
throw new ValidationError("Element '%s' not recognized at level %s", node.name(), level);
}
// Validate the attributes.
int minHits = 0;
for (String attrName : node.attrNames())
{
if (attrName.equals("minHits"))
minHits = Integer.parseInt(node.attrValue(attrName));
else
throw new ValidationError("Attribute '%s' not recognized on '%s' element", attrName, node.name());
}
// And perform the validation (use the appropriate servlet).
String url = node.toString();
int nHits = 0;
int prevTraceLevel = Trace.getOutputLevel();
if (node.name().equals("crossQuery")) {
Trace.info("crossQuery: [%s] ...", url);
Trace.setOutputLevel(Trace.warnings);
crossQuery.service(url);
nHits = crossQuery.nHits();
}
else if (node.name().equals("dynaXML")) {
Trace.info("dynaXML: [%s] ...", url);
Trace.setOutputLevel(Trace.warnings);
dynaXML.service(url);
nHits = dynaXML.nHits();
}
else if (node.isText())
return;
Trace.setOutputLevel(prevTraceLevel);
if (minHits != 0 && nHits < minHits) {
Trace.more(Trace.info, " Failed:");
Trace.error(" Validation required at least %d hits, but query returned %d", minHits, nHits);
++nErrs;
}
else
Trace.more(Trace.info, " Done.");
}
/** Internal exception for quickly passing errors up the call chain. */
public static class ValidationError extends Exception {
ValidationError(String msg, Object ... args) {
super(String.format(msg, args));
}
}
}