package edu.indiana.lib.twinpeaks.search.sru;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import edu.indiana.lib.twinpeaks.util.*;
public class CqlParser extends org.xml.sax.helpers.DefaultHandler
{
private static org.apache.commons.logging.Log _log = LogUtils.getLog(CqlParser.class);
//
// Index mappings (CQL -> 360 Search) (this is pretty much CQL to CQL)
//
private static final java.util.Map INDEX_MAP = new java.util.HashMap();
static
{
INDEX_MAP.put("keyword", " ");
INDEX_MAP.put("title", "cs.title=");
INDEX_MAP.put("author", "cs.author=");
INDEX_MAP.put("subject", "cs.subject=");
INDEX_MAP.put("year", "cs.date=");
}
//
// Boolean mappings (CQL -> 360 Search)
//
private static final java.util.Map BOOL_RELATION_MAP = new java.util.HashMap();
static
{
BOOL_RELATION_MAP.put("and", " and ");
BOOL_RELATION_MAP.put("or", " or ");
BOOL_RELATION_MAP.put("not", " not ");
}
//
// SAX Parsing
//
SAXParser saxParser;
StringBuilder textBuffer;
StringBuilder searchClause;
boolean inSearchClause;
java.util.Stack cqlStack;
//
// Treat all non-keyword fields as phrases?
//
final static boolean TREAT_ALL_FIELDS_AS_PHRASE = true;
//
// Are we currently parsing a keyword field?
//
boolean inKeyword;
/**
* Constructor
*/
public CqlParser()
{
// initialize stack
cqlStack = new java.util.Stack();
// initialize SAX Parser
SAXParserFactory factory;
factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
try
{
saxParser = factory.newSAXParser();
}
catch (org.xml.sax.SAXException e)
{
_log.error("SAX exception: " + e);
}
catch (ParserConfigurationException e)
{
_log.error("Parse failed: " + e);
}
}
/**
* Converts a CQL-formatted search query into a format that 360 Search
* can understand. Uses org.z3950.zing.cql.CQLNode.toXCQL() and SAX Parsing
* to convert the cqlSearchQuery into an almost identical query parameter.
*
* @param cqlSearchQuery CQL-formatted search query.
* @return A 360 Search query
* @see org.z3950.zing.cql.CQLNode.toXCQL()
*/
public String doCQL2MetasearchCommand( String cqlSearchQuery )
{
if (cqlSearchQuery == null || cqlSearchQuery.equals( "" ))
{
return null;
}
org.z3950.zing.cql.CQLParser parser = new org.z3950.zing.cql.CQLParser();
org.z3950.zing.cql.CQLNode root = null;
// Parse the user criteria
try
{
root = parser.parse(cqlSearchQuery);
}
catch (Exception exception)
{
_log.error("Exception in CQL parse: " + exception);
}
if (root == null)
{
return null;
}
String cqlXml = root.toXCQL( 0 );
_log.debug("CQL XML:");
_log.debug(cqlXml);
// Open the CQL XML string as a stream
java.io.ByteArrayInputStream byteInputStream = null;
try
{
byteInputStream = new java.io.ByteArrayInputStream(cqlXml.getBytes("UTF8"));
}
catch (Exception exception)
{
_log.error("Exception opening input stream: " + exception);
}
if (byteInputStream == null)
{
return null;
}
// clear the stack
cqlStack.removeAllElements();
// Parse
try
{
saxParser.parse(byteInputStream, this);
byteInputStream.close();
}
catch (Exception exception)
{
_log.error("Exception in parse: " + exception);
}
String cqlResult = ( String ) cqlStack.pop();
return cqlResult.trim();
}
//----------------------------------
// DEFAULT HANDLER IMPLEMENTATIONS -
//----------------------------------
/**
* Receive notification of the beginning of an element.
*
* @see org.xml.sax.helpers.DefaultHandler
*/
public void startElement(String namespaceURI,
String sName,
String qName,
Attributes attrs) throws SAXException
{
// set flags to avoid overwriting duplicate tag data
if (qName.equals( "searchClause" ))
{
inSearchClause = true;
inKeyword = false;
}
}
/**
* Receive notification of the end of an element.
*
* @see org.xml.sax.helpers.DefaultHandler
*/
public void endElement(String namespaceURI, String sName, String qName)
throws SAXException
{
// extract data
extractDataFromText(qName);
// clear flags
if (qName.equals("searchClause"))
{
inSearchClause = false;
inKeyword = false;
}
}
/**
* Receive notification of character data inside an element.
*
* @see org.xml.sax.helpers.DefaultHandler
*/
public void characters(char[] buf, int offset, int len)
throws SAXException
{
// store character data
String text = new String(buf, offset, len);
if (textBuffer == null)
{
textBuffer = new StringBuilder(text);
} else {
textBuffer.append(text);
}
}
//-------------------------
// PRIVATE HELPER METHODS -
//-------------------------
private void extractDataFromText(String element)
{
if (textBuffer == null)
{
return;
}
String text = textBuffer.toString().trim();
if (text.equals("") && !element.equals("triple"))
{
return;
}
//
// check for a boolean relation value
//
if (!inSearchClause && element.equals( "value"))
{
cqlStack.push(text);
}
//
// Construct a search clause
//
if (inSearchClause)
{
if (searchClause == null)
{
searchClause = new StringBuilder();
}
//
// General syntax: title=macbeth
//
// (title is the index, = is the value, macbeth is the term)
//
if (element.equals("index"))
{
String field = translateIndex(text);
inKeyword = ((String) INDEX_MAP.get("keyword")).equals(field);
searchClause.append(field);
}
else if (element.equals("value"))
{
//
// The relation value is always supplied as '='.
//
// We don't need it - do nothing.
//
}
else if (element.equals("term"))
{
//
// Search term processing:
//
// * Honor '+' encoding for embedded spaces
// * Add double quotes
//
// Example:
// aa+bb+cc <becomes> "aa bb cc"
//
if (TREAT_ALL_FIELDS_AS_PHRASE || inKeyword)
{
searchClause.append('"');
}
searchClause.append(text.replaceAll("\\+", " ").trim());
if (TREAT_ALL_FIELDS_AS_PHRASE || inKeyword)
{
searchClause.append('"');
}
cqlStack.push(searchClause.toString().trim());
searchClause = null;
}
}
//
// evaluate expression so far if we hit a </triple>
//
if( element.equals( "triple" ) )
{
String rightOperand = (String) cqlStack.pop();
String leftOperand = (String) cqlStack.pop();
String booleanRelation = (String) cqlStack.pop();
cqlStack.push(leftOperand.replaceAll("\\+", " ").trim()
+ translateBooleanRelation(booleanRelation)
+ rightOperand.replaceAll("\\+", " ").trim());
}
textBuffer = null;
}
/**
* Translate a CQL index to the appropriate 360 Search field name
* @param cqlIndex CQL index name
* @return 360 Search field name
*/
private String translateIndex(String cqlIndex)
{
String s360Index = ( String ) INDEX_MAP.get(cqlIndex);
if (s360Index == null || s360Index.equals( "" ))
{
_log.error("translateIndex(): bad index, using KEYWORD");
s360Index = (String) INDEX_MAP.get("keyword");
}
return s360Index;
}
/**
* Translate a CQL boolean term to the appropriate 360 Search syntax
* @param booleanRelation CQL boolean term
* @return A boolean operation
*/
private String translateBooleanRelation(String booleanRelation)
{
String s360Boolean = (String) BOOL_RELATION_MAP.get(booleanRelation);
if (s360Boolean == null || s360Boolean.equals( "" ))
{
_log.error("translateBooleanRelation(): bad boolean relation, using AND" );
s360Boolean = (String) BOOL_RELATION_MAP.get("and");
}
return s360Boolean;
}
/**
* Main(): test
*/
public static void main(String[] args)
{
CqlParser parser = new CqlParser();
String query;
query = "title=\"My Title\"";
System.out.println();
System.out.println("CQL: " + query);
System.out.println("360: " + parser.doCQL2MetasearchCommand(query));
query = "title=\"\\\"My Title\\\"\"";
System.out.println();
System.out.println("CQL: " + query);
System.out.println("360: " + parser.doCQL2MetasearchCommand(query));
query = "title=\"My Title\" and keyword=\"some keywords\"";
System.out.println();
System.out.println("CQL: " + query);
System.out.println("360: " + parser.doCQL2MetasearchCommand(query));
query = "title=\"My Title\" and keyword=\"some keywords\" and year=\"1999\"";
System.out.println();
System.out.println("CQL: " + query);
System.out.println("360: " + parser.doCQL2MetasearchCommand(query));
}
}