package edu.indiana.lib.twinpeaks.search.singlesearch;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import edu.indiana.lib.twinpeaks.util.*;
public class CqlParser extends org.xml.sax.helpers.DefaultHandler {
private static org.apache.commons.logging.Log _log = LogUtils.getLog(CqlParser.class);
//
// Index mappings (CQL -> Sirsi)
//
private static final java.util.Map INDEX_MAP = new java.util.HashMap();
static {
INDEX_MAP.put("keyword", " ");
INDEX_MAP.put("title", ":TITLE");
INDEX_MAP.put("author", ":CREATOR");
INDEX_MAP.put("subject", ":SUBJECT");
INDEX_MAP.put("year", ":DATE");
}
//
// Boolean mappings (CQL -> Sirsi)
//
private static final java.util.Map BOOL_RELATION_MAP = new java.util.HashMap();
static
{
BOOL_RELATION_MAP.put("and", " AND ");
BOOL_RELATION_MAP.put("or", " OR ");
}
//
// SAX Parsing
//
SAXParser saxParser;
StringBuilder textBuffer;
StringBuilder searchClause;
boolean inSearchClause;
java.util.Stack cqlStack;
//
// Treat all non-keyword fields as phrases?
//
final static boolean TREAT_ALL_FIELDS_AS_PHRASE = true;
//
// Are we currently parsing a keyword field?
//
boolean inKeyword;
/**
* Constructor
*/
public CqlParser()
{
// initialize stack
cqlStack = new java.util.Stack();
// initialize SAX Parser
SAXParserFactory factory;
factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
try
{
saxParser = factory.newSAXParser();
}
catch (org.xml.sax.SAXException e)
{
_log.error("SAX exception: " + e);
}
catch (ParserConfigurationException e)
{
_log.error("Parse failed: " + e);
}
}
/**
* Converts a CQL-formatted search query into a format that the Web2 Bridge
* can understand. Uses org.z3950.zing.cql.CQLNode.toXCQL() and SAX Parsing
* to convert the cqlSearchQuery into a find_command.
*
* @param cqlSearchQuery CQL-formatted search query.
* @return X-Server find_command or null if cqlSearchQuery is null or empty.
* @see org.z3950.zing.cql.CQLNode.toXCQL()
*/
public String doCQL2MetasearchCommand( String cqlSearchQuery )
{
if ( cqlSearchQuery == null || cqlSearchQuery.equals( "" ) )
{
return null;
}
org.z3950.zing.cql.CQLParser parser = new org.z3950.zing.cql.CQLParser();
org.z3950.zing.cql.CQLNode root = null;
try
{
// parse the criteria
root = parser.parse( cqlSearchQuery );
}
catch( java.io.IOException ioe )
{
_log.error("CQL parse exception: " + ioe);
}
catch( org.z3950.zing.cql.CQLParseException e )
{
_log.error("CQL parse exception: " + e);
}
if (root == null)
{
return null;
}
String cqlXml = root.toXCQL( 0 );
_log.debug("CQL XML:");
_log.debug(cqlXml);
// get cqlXml as a stream
java.io.ByteArrayInputStream byteInputStream = null;
try
{
byteInputStream = new java.io.ByteArrayInputStream(cqlXml.getBytes( "UTF8" ));
}
catch( java.io.UnsupportedEncodingException uee )
{
_log.error("Encoding exception: " + uee);
}
if (byteInputStream == null)
{
return null;
}
// clear the stack
cqlStack.removeAllElements();
// run the parser
try
{
saxParser.parse( byteInputStream, this );
byteInputStream.close();
}
catch( java.io.IOException ioe )
{
_log.error("IO exception: " + ioe);
}
catch( org.xml.sax.SAXException spe )
{
_log.error("SAX exception: " + spe);
}
String cqlResult = ( String ) cqlStack.pop();
return cqlResult.trim();
}
//----------------------------------
// DEFAULT HANDLER IMPLEMENTATIONS -
//----------------------------------
/**
* Receive notification of the beginning of an element.
*
* @see org.xml.sax.helpers.DefaultHandler
*/
public void startElement( String namespaceURI,
String sName,
String qName,
Attributes attrs ) throws SAXException
{
// set flags to avoid overwriting duplicate tag data
if( qName.equals( "searchClause" ) )
{
inSearchClause = true;
inKeyword = false;
}
}
/**
* Receive notification of the end of an element.
*
* @see org.xml.sax.helpers.DefaultHandler
*/
public void endElement( String namespaceURI, String sName, String qName )
throws SAXException
{
// extract data
extractDataFromText( qName );
// clear flags
if( qName.equals( "searchClause" ) )
{
inSearchClause = false;
inKeyword = false;
}
}
/**
* Receive notification of character data inside an element.
*
* @see org.xml.sax.helpers.DefaultHandler
*/
public void characters( char[] buf, int offset, int len )
throws SAXException
{
// store character data
String text = new String( buf, offset, len );
if( textBuffer == null ) {
textBuffer = new StringBuilder( text );
} else {
textBuffer.append( text );
}
}
//-------------------------
// PRIVATE HELPER METHODS -
//-------------------------
private void extractDataFromText(String element)
{
if (textBuffer == null)
{
return;
}
String text = textBuffer.toString().trim();
if (text.equals("") && !element.equals("triple"))
{
return;
}
//
// check for a boolean relation value
//
if (!inSearchClause && element.equals( "value" ))
{
cqlStack.push(text);
}
//
// Construct a search clause
//
if (inSearchClause)
{
if (searchClause == null)
{
searchClause = new StringBuilder();
}
//
// General syntax: title=macbeth
//
// (title is the index, = is the value, macbeth is the term)
//
if (element.equals("index"))
{
String field = translateIndex(text);
inKeyword = ((String) INDEX_MAP.get("keyword")).equals(field);
searchClause.append(field);
}
else if (element.equals("value"))
{
//
// The relation value is always supplied as '='. The Muse syntax employed
// by the Web2 bridge doesn't need it. Use a space instead.
//
searchClause.append(' ');
}
else if (element.equals("term"))
{
//
// Search term processing:
//
// * Honor '+' encoding for embedded spaces
// * Add double quotes
//
// Example:
// aa+bb+cc <becomes> "aa bb cc"
//
if (TREAT_ALL_FIELDS_AS_PHRASE || inKeyword)
{
searchClause.append('"');
}
searchClause.append(text.replaceAll("\\+", " ").trim());
if (TREAT_ALL_FIELDS_AS_PHRASE || inKeyword)
{
searchClause.append('"');
}
cqlStack.push(searchClause.toString().trim());
searchClause = null;
}
}
//
// evaluate expression so far if we hit a </triple>
//
if( element.equals( "triple" ) )
{
String rightOperand = ( String ) cqlStack.pop();
String leftOperand = ( String ) cqlStack.pop();
String booleanRelation = ( String ) cqlStack.pop();
cqlStack.push(leftOperand.replaceAll("\\+", " ").trim()
+ translateBooleanRelation(booleanRelation)
+ rightOperand.replaceAll("\\+", " ").trim());
}
textBuffer = null;
}
/**
* Translate a CQL index to the appropriate Sirsi/Muse field name
* @param cqlIndex CQL index name
* @return Sirsi/Muse field name
*/
private String translateIndex(String cqlIndex)
{
String sirsiIndex = ( String ) INDEX_MAP.get(cqlIndex);
if (sirsiIndex == null || sirsiIndex.equals( "" ))
{
_log.error("translateIndex(): bad index, using KEYWORD");
sirsiIndex = (String) INDEX_MAP.get("keyword");
}
return sirsiIndex;
}
/**
* Translate a CQL boolean term to the appropriate Sirsi/Muse syntax
* @param booleanRelation CQL boolean term
* @return Sirsi/Muse boolean operation
*/
private String translateBooleanRelation(String booleanRelation)
{
String sirsiBoolean = (String) BOOL_RELATION_MAP.get(booleanRelation);
if (sirsiBoolean == null || sirsiBoolean.equals( "" ))
{
_log.error("translateBooleanRelation(): bad boolean relation, using AND" );
sirsiBoolean = (String) BOOL_RELATION_MAP.get("and");
}
return sirsiBoolean;
}
/**
* Main(): test
*/
public static void main(String[] args)
{
CqlParser parser = new CqlParser();
String query;
query = "title=\"My Title\"";
System.out.println();
System.out.println("CQL: " + query);
System.out.println("Sirsi: " + parser.doCQL2MetasearchCommand(query));
query = "title=\"\\\"My Title\\\"\"";
System.out.println();
System.out.println("CQL: " + query);
System.out.println("Sirsi: " + parser.doCQL2MetasearchCommand(query));
query = "title=\"My Title\" and keyword=\"some keywords\"";
System.out.println();
System.out.println("CQL: " + query);
System.out.println("Sirsi: " + parser.doCQL2MetasearchCommand(query));
query = "title=\"My Title\" and keyword=\"some keywords\" and year=\"1999\"";
System.out.println();
System.out.println("CQL: " + query);
System.out.println("Sirsi: " + parser.doCQL2MetasearchCommand(query));
}
}