/******************************************************************************* * Copyright (c) 2013 aegif. * * This file is part of NemakiWare. * * NemakiWare is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * NemakiWare is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with NemakiWare. * If not, see <http://www.gnu.org/licenses/>. * * Contributors: * linzhixing(https://github.com/linzhixing) - initial API and implementation ******************************************************************************/ package jp.aegif.nemaki.cmis.aspect.query.solr; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import jp.aegif.nemaki.businesslogic.ContentService; import jp.aegif.nemaki.model.Folder; import org.antlr.runtime.tree.Tree; import org.apache.chemistry.opencmis.commons.PropertyIds; import org.apache.chemistry.opencmis.commons.definitions.PropertyDefinition; import org.apache.chemistry.opencmis.commons.definitions.TypeDefinition; import org.apache.chemistry.opencmis.commons.enums.Cardinality; import org.apache.chemistry.opencmis.commons.enums.PropertyType; import org.apache.chemistry.opencmis.commons.exceptions.CmisRuntimeException; import org.apache.chemistry.opencmis.server.support.query.CmisQlStrictLexer; import org.apache.chemistry.opencmis.server.support.query.CmisSelector; import org.apache.chemistry.opencmis.server.support.query.ColumnReference; import org.apache.chemistry.opencmis.server.support.query.QueryObject; import org.apache.chemistry.opencmis.server.support.query.TextSearchLexer; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; /** * CMIS to Solr parser class for WHERE clause of query * * @author linzhixing * */ public class SolrPredicateWalker{ private final String repositoryId; private final SolrUtil solrUtil; private final QueryObject queryObject; private final ContentService contentService; public static final String FLD = "field"; public static final String CND = "cond"; public SolrPredicateWalker(String repositoryId, QueryObject queryObject, SolrUtil solrUtil, ContentService contentService) { this.repositoryId = repositoryId; this.queryObject = queryObject; this.solrUtil = solrUtil; this.contentService = contentService; } public Query walkPredicate(Tree node) { switch (node.getType()) { // Boolean walks case CmisQlStrictLexer.NOT: return walkNot(node.getChild(0)); case CmisQlStrictLexer.AND: return walkAnd(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.OR: return walkOr(node.getChild(0), node.getChild(1)); // Comparison walks case CmisQlStrictLexer.EQ: return walkEquals(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.NEQ: return walkNotEquals(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.GT: return walkGreaterThan(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.GTEQ: return walkGreaterOrEquals(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.LT: return walkLessThan(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.LTEQ: return walkLessOrEquals(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.LIKE: return walkLike(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.NOT_LIKE: return walkNotLike(node.getChild(0), node.getChild(1)); // Multiple value type walks case CmisQlStrictLexer.IN: return walkIn(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.NOT_IN: return walkNotIn(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.ANY: return walkInAny(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.NOT_IN_ANY: return walkNotInAny(node.getChild(0), node.getChild(1)); case CmisQlStrictLexer.IS_NULL: return walkIsNull(node.getChild(0)); case CmisQlStrictLexer.IS_NOT_NULL: return walkIsNotNull(node.getChild(0)); // GetChildren type walks case CmisQlStrictLexer.IN_FOLDER: if (node.getChildCount() == 1) { return walkInFolder(null, node.getChild(0)); } else { return walkInFolder(node.getChild(0), node.getChild(1)); } case CmisQlStrictLexer.IN_TREE: if (node.getChildCount() == 1) { return walkInTree(null, node.getChild(0), repositoryId); } else { return walkInTree(node.getChild(0), node.getChild(1), repositoryId); } // Full-text search type walk case CmisQlStrictLexer.CONTAINS: if (node.getChildCount() == 1) { return walkContains(null, node.getChild(0)); } else { return walkContains(node.getChild(0), node.getChild(1)); } default: return null; } } // ////////////////////////////////////////////////////////////////////////////// // Definition of Boolean walks // ////////////////////////////////////////////////////////////////////////////// private BooleanQuery walkNot(Tree node) { BooleanQuery q = new BooleanQuery(); q.add(walkPredicate(node), Occur.MUST_NOT); return q; } private BooleanQuery walkOr(Tree leftNode, Tree rightNode) { BooleanQuery q = new BooleanQuery(); q.add(walkPredicate(leftNode), Occur.SHOULD); q.add(walkPredicate(rightNode), Occur.SHOULD); return q; } private BooleanQuery walkAnd(Tree leftNode, Tree rightNode) { BooleanQuery q = new BooleanQuery(); q.add(walkPredicate(leftNode), Occur.MUST); q.add(walkPredicate(rightNode), Occur.MUST); return q; } // ////////////////////////////////////////////////////////////////////////////// // Definition of Comparison walks // ////////////////////////////////////////////////////////////////////////////// private Query walkEquals(Tree leftNode, Tree rightNode) { HashMap<String, String> map = walkCompareInternal(leftNode, rightNode); Term term = new Term(map.get(FLD), map.get(CND)); Query q = new TermQuery(term); return q; } private Query walkNotEquals(Tree leftNode, Tree rightNode) { BooleanQuery q = new BooleanQuery(); q.add(walkEquals(leftNode, rightNode), Occur.MUST_NOT); return q; } private Query walkGreaterThan(Tree leftNode, Tree rightNode) { HashMap<String, String> map = walkCompareInternal(leftNode, rightNode); TermRangeQuery t = new TermRangeQuery(map.get(FLD), convertToBytesRef(map.get(CND)), null, false, false); return t; } private Query walkGreaterOrEquals(Tree leftNode, Tree rightNode) { HashMap<String, String> map = walkCompareInternal(leftNode, rightNode); TermRangeQuery t = new TermRangeQuery(map.get(FLD), convertToBytesRef(map.get(CND)), null, true, false); return t; } private Query walkLessThan(Tree leftNode, Tree rightNode) { HashMap<String, String> map = walkCompareInternal(leftNode, rightNode); TermRangeQuery t = new TermRangeQuery(map.get(FLD), null, convertToBytesRef(map.get(CND)), false, false); return t; } private Query walkLessOrEquals(Tree leftNode, Tree rightNode) { HashMap<String, String> map = walkCompareInternal(leftNode, rightNode); TermRangeQuery t = new TermRangeQuery(map.get(FLD), null, convertToBytesRef(map.get(CND)), false, true); return t; } /** * TODO Implement check for each kind of literal * Parse field name & condition value. Field name is prepared for Solr * query. * * @param leftNode * @param rightNode * @return */ private HashMap<String, String> walkCompareInternal(Tree leftNode, Tree rightNode) { HashMap<String, String> map = new HashMap<String, String>(); String left = solrUtil.convertToString(leftNode); String right = walkExpr(rightNode).toString(); map.put(FLD, ClientUtils.escapeQueryChars(solrUtil.getPropertyNameInSolr(left))); map.put(CND, right); return map; } private Query walkLike(Tree colNode, Tree stringNode) { // Check for CMIS SQL specification Object rVal = walkExpr(stringNode); if (!(rVal instanceof String)) { throw new IllegalStateException( "LIKE operator requires String literal on right hand side."); } ColumnReference colRef = getColumnReference(colNode); String colRefName = colRef.getName(); TypeDefinition td = colRef.getTypeDefinition(); Map<String, PropertyDefinition<?>> pds = td.getPropertyDefinitions(); PropertyDefinition<?> pd = pds.get(colRefName); PropertyType propType = pd.getPropertyType(); if (propType != PropertyType.STRING && propType != PropertyType.HTML && propType != PropertyType.ID && propType != PropertyType.URI) { throw new IllegalStateException("Property type " + propType.value() + " is not allowed FOR LIKE"); } if (pd.getCardinality() != Cardinality.SINGLE) { throw new IllegalStateException( "LIKE is not allowed for multi-value properties "); } // Build a statement String field = solrUtil.getPropertyNameInSolr(solrUtil.convertToString(colNode)); String pattern = translatePattern((String) rVal); // Solr wildcard // expression Term t = new Term(field, pattern); TermQuery q = new TermQuery(t); return q; } private Query walkNotLike(Tree colNode, Tree stringNode) { BooleanQuery q = new BooleanQuery(); q.add(walkLike(colNode, stringNode), Occur.MUST); return q; } // ////////////////////////////////////////////////////////////////////////////// // Definition of multiple value type walks // ////////////////////////////////////////////////////////////////////////////// private Query walkIn(Tree colNode, Tree listNode) { // Check for CMIS SQL specification ColumnReference colRef = getColumnReference(colNode); // Build a statement // Combine queries with "OR" because Solr doesn't have "IN" syntax BooleanQuery q = new BooleanQuery(); String field = solrUtil.getPropertyNameInSolr(colRef.getPropertyQueryName().toString()); List<?> list = (List<?>) walkExpr(listNode); for (Object elm : list) { Term t = new Term(field, elm.toString()); TermQuery tq = new TermQuery(t); q.add(tq, Occur.SHOULD); } return q; } private Query walkNotIn(Tree colNode, Tree listNode) { BooleanQuery q = new BooleanQuery(); q.add(walkIn(colNode, listNode), Occur.MUST_NOT); return q; } private Query walkInAny(Tree leftNode, Tree rightNode) { // Check for CMIS SQL specification ColumnReference colRef = getColumnReference(leftNode); PropertyDefinition<?> pd = colRef.getPropertyDefinition(); if (pd.getCardinality() != Cardinality.MULTI) { throw new IllegalStateException( "Operator ANY...IN only is allowed on multi-value properties "); } // Build a statement // TODO Just set multiValued flag ON on Solr. Syntax is common as that // of wakEquals. Query q = walkEquals(leftNode, rightNode); return q; } private Query walkNotInAny(Tree leftNode, Tree rightNode) { Query q = walkNotEquals(leftNode, rightNode); return q; } private Query walkIsNull(Tree colNode) { String field = walkExpr(colNode).toString(); BooleanQuery q = new BooleanQuery(); TermRangeQuery q1 = new TermRangeQuery(field, null, null, false, false); q.add(q1, Occur.MUST_NOT); return q; } private Query walkIsNotNull(Tree colNode) { String field = walkExpr(colNode).toString(); TermRangeQuery q = new TermRangeQuery(field, null, null, false, false); return q; } // ////////////////////////////////////////////////////////////////////////////// // Definition of getChildren type walks // ////////////////////////////////////////////////////////////////////////////// private Query walkInFolder(Tree qualNode, Tree paramNode) { // Check for CMIS SQL specification Object lit = walkExpr(paramNode); if (!(lit instanceof String)) { throw new IllegalStateException( "Folder id in IN_FOLDER must be of type String"); } // Build a statement String folderId = (String) walkExpr(paramNode); Term t = new Term(solrUtil.getPropertyNameInSolr(PropertyIds.PARENT_ID), folderId); Query q = new TermQuery(t); if (qualNode != null) { // When a table alias exists String qualifier = walkExpr(qualNode).toString(); Term tQual = new Term("type", buildQualField(qualifier)); Query qQual = new TermQuery(tQual); BooleanQuery bq = new BooleanQuery(); bq.add(qQual, Occur.MUST); bq.add(q, Occur.MUST); return bq; } return q; } private Query walkInTree(Tree qualNode, Tree paramNode, String repositoryId) { // Check for CMIS SQL specification Object lit = walkExpr(paramNode); if (!(lit instanceof String)) { throw new IllegalStateException( "Folder id in IN_FOLDER must be of type String"); } // Build a Statement Query q = walkInTreeInternal(paramNode, repositoryId); if (qualNode != null) { String qualifier = walkExpr(qualNode).toString(); Term tQual = new Term("type", buildQualField(qualifier)); Query qQual = new TermQuery(tQual); BooleanQuery bq = new BooleanQuery(); bq.add(qQual, Occur.MUST); bq.add(q, Occur.MUST); return bq; } return q; } private Query walkInTreeInternal(Tree paramNode, String repositoryId) { //Build first query for descendant folders BooleanQuery query1 = new BooleanQuery(); String s = paramNode.getText(); String folderId = s.substring(1, s.length() - 1); Folder folder = contentService.getFolder(repositoryId, folderId); String folderPath = contentService.calculatePath(repositoryId, folder); String _folderPath = folderPath.replaceAll("\\/", "\\\\/"); //escape in Solr query if(contentService.isRoot(repositoryId, folder)){ Term t = new Term(solrUtil.getPropertyNameInSolr(PropertyIds.PATH), _folderPath + "*"); query1.add(new TermQuery(t), Occur.MUST); }else{ String _folderId = folderId.replaceAll("\\/", "\\\\/"); //escape in Solr query Term t1 = new Term(solrUtil.getPropertyNameInSolr(PropertyIds.PARENT_ID), _folderId); String path = folderPath + "/*"; String _path = path.replaceAll("\\/", "\\\\/"); //escape in Solr query Term t2 = new Term(solrUtil.getPropertyNameInSolr(PropertyIds.PATH), _path); query1.add(new TermQuery(t1), Occur.SHOULD); query1.add(new TermQuery(t2), Occur.SHOULD); } // Set Solr server SolrServer solrServer = solrUtil.getSolrServer(); // Get all the descending folder objectIds(including direct children) List<String> descendantIds = new ArrayList<String>(); SolrDocumentList children = null; try { QueryResponse resp = solrServer.query(new SolrQuery(query1.toString())); children = resp.getResults(); } catch (SolrServerException e) { // TODO Auto-generated catch block e.printStackTrace(); } if(children != null && !children.isEmpty()){ Iterator<SolrDocument> it = children.iterator(); while(it.hasNext()){ SolrDocument sd = it.next(); String id = (String) sd.getFieldValue("id"); descendantIds.add(id); } } // Build the second query for getting all the objects under descending folders Iterator<String> iterator = descendantIds.iterator(); BooleanQuery query2 = new BooleanQuery(); while (iterator.hasNext()) { String descendantId = iterator.next(); String _descendantId = descendantId.replaceAll("\\/", "\\\\/"); Term t = new Term(solrUtil.getPropertyNameInSolr(PropertyIds.PARENT_ID), _descendantId); TermQuery tq = new TermQuery(t); query2.add(tq, Occur.SHOULD); } return query2; } // ////////////////////////////////////////////////////////////////////////////// // Definition of full-text search type walk // ////////////////////////////////////////////////////////////////////////////// // Wildcards of CONTAINS() is the same as those of Solr, so leave them as they are. private Query walkContains(Tree qualNode, Tree queryNode) { if (qualNode != null) { //Qualifier isn't needed as long as JOIN isn't supported //String qualifier = walkExpr(qualNode).toString(); //Term tQual = new Term("type", buildQualField(qualifier)); //Query qQual = new TermQuery(tQual); BooleanQuery q = new BooleanQuery(); //q.add(qQual, Occur.MUST); q.add(walkSearchExpr(queryNode), Occur.MUST); return q; } return walkSearchExpr(queryNode); } private Query walkSearchExpr(Tree node) { int t = node.getType(); switch (node.getType()) { case TextSearchLexer.TEXT_AND: return walkTextAnd(node); case TextSearchLexer.TEXT_OR: return walkTextOr(node); case TextSearchLexer.TEXT_MINUS: return walkTextMinus(node); case TextSearchLexer.TEXT_SEARCH_WORD_LIT: return walkTextWord(node); case TextSearchLexer.TEXT_SEARCH_PHRASE_STRING_LIT: return walkTextPhrase(node); default: //walkOtherExpr(node); //return null; return walkTextPhrase(node); } } private Query walkTextAnd(Tree node) { BooleanQuery q = new BooleanQuery(); for (int i = 0; i < node.getChildCount(); i++) { Tree child = node.getChild(i); q.add(walkSearchExpr(child), Occur.MUST); } return q; } private Query walkTextOr(Tree node) { BooleanQuery q = new BooleanQuery(); for (int i = 0; i < node.getChildCount(); i++) { Tree child = node.getChild(i); q.add(walkSearchExpr(child), Occur.SHOULD); } return q; } private Query walkTextMinus(Tree node) { BooleanQuery q = new BooleanQuery(); for (int i = 0; i < node.getChildCount(); i++) { Tree child = node.getChild(i); q.add(walkSearchExpr(child), Occur.MUST); } return q; } private Query walkTextWord(Tree node) { Term term = new Term("text", node.toString()); TermQuery q = new TermQuery(term); return q; } private Query walkTextPhrase(Tree node) { Term term = new Term("text", node.toString()); TermQuery q = new TermQuery(term); return q; } // ////////////////////////////////////////////////////////////////////////////// // Definition of walkExpr and its subwalks // These are used from various walks to evaluate a node value. // ////////////////////////////////////////////////////////////////////////////// private Object walkExpr(Tree node) { switch (node.getType()) { case CmisQlStrictLexer.BOOL_LIT: return walkBoolean(node); case CmisQlStrictLexer.NUM_LIT: return walkNumber(node); case CmisQlStrictLexer.STRING_LIT: return walkString(node); case CmisQlStrictLexer.TIME_LIT: return walkTimestamp(node); case CmisQlStrictLexer.IN_LIST: return walkList(node); case CmisQlStrictLexer.COL: return walkCol(node); case CmisQlStrictLexer.ID: return walkId(node); default: return walkOtherExpr(node); } } private Object walkBoolean(Tree node) { String s = node.getText(); return Boolean.valueOf(s); } private Object walkNumber(Tree node) { String s = node.getText(); if (s.contains(".") || s.contains("e") || s.contains("E")) { return Double.valueOf(s); } else { return Long.valueOf(s); } } private Object walkString(Tree node) { String s = node.getText(); s = s.substring(1, s.length() - 1); //return "\"" + ClientUtils.escapeQueryChars(s) + "\""; return ClientUtils.escapeQueryChars(s); } private Object walkTimestamp(Tree node) { String s = node.getText(); s = s.substring(s.indexOf('\'') + 1, s.length() - 1); return s; } private Object walkList(Tree node) { int n = node.getChildCount(); List<Object> res = new ArrayList<Object>(n); for (int i = 0; i < n; i++) { res.add(walkExpr(node.getChild(i))); } return res; } private Object walkCol(Tree node) { return null; } private Object walkId(Tree node) { String s; s = node.toStringTree(); return s; } private Object walkOtherExpr(Tree node) { throw new CmisRuntimeException("Unknown node type: " + node.getType() + " (" + node.getText() + ")"); } // ////////////////////////////////////////////////////////////////////////////// /** * Utility methods */ /** * Convert String to BytesRef for Lucene TermRangeQuery * * @param s * @return */ private BytesRef convertToBytesRef(String s) { byte[] bytes = s.getBytes(); BytesRef bytesRef = new BytesRef(bytes); return bytesRef; } /** * Translate a full-text search expression from SQL style to Solr style * * @param wildcardString * @return */ private static String translatePattern(String wildcardString) { int index = 0; int start = 0; StringBuffer res = new StringBuffer(); while (index >= 0) { index = wildcardString.indexOf('%', start); if (index < 0) { res.append(wildcardString.substring(start)); } else if (index == 0 || index > 0 && wildcardString.charAt(index - 1) != '\\') { res.append(wildcardString.substring(start, index)); res.append("*"); } else { res.append(wildcardString.substring(start, index + 1)); } start = index + 1; } wildcardString = res.toString(); index = 0; start = 0; res = new StringBuffer(); while (index >= 0) { index = wildcardString.indexOf('_', start); if (index < 0) { res.append(wildcardString.substring(start)); } else if (index == 0 || index > 0 && wildcardString.charAt(index - 1) != '\\') { res.append(wildcardString.substring(start, index)); res.append("?"); // } else { res.append(wildcardString.substring(start, index + 1)); } start = index + 1; } return res.toString(); } private ColumnReference getColumnReference(Tree columnNode) { CmisSelector sel = queryObject.getColumnReference(columnNode .getTokenStartIndex()); if (null == sel) { throw new IllegalStateException("Unknown property query name " + columnNode.getChild(0)); } else if (sel instanceof ColumnReference) { return (ColumnReference) sel; } else { throw new IllegalStateException( "Unexpected numerical value function in where clause"); } } /** * Look up a Solr name of a table from alias * * @param alias * @return */ private String buildQualField(String alias) { String cmisName = queryObject.getTypeQueryName(alias); String solrName = solrUtil.getPropertyNameInSolr(cmisName); return solrName; } /** * Get all subfolder ids by connecting to Solr recursively * * @param folderId * @param solrServer * @return */ private List<String> getDescendantFolderId(String folderId, SolrServer solrServer) { List<String> list = new ArrayList<String>(); list.add(folderId); // Add oneself to the list in advance SolrQuery query = new SolrQuery(); query.setQuery(solrUtil.getPropertyNameInSolr(PropertyIds.PARENT_ID) + ":" + folderId + " AND " + solrUtil.getPropertyNameInSolr(PropertyIds.BASE_TYPE_ID) + ":cmis\\:folder"); // only "folder" nodes // Connect to SolrServer and add subfolder ids to the list try { QueryResponse resp = solrServer.query(query); SolrDocumentList children = resp.getResults(); // END NODE case: Do nothing but return oneself if (children.getNumFound() == 0) { return list; // Other than END NODE case: collect descendants values // recursively } else { Iterator<SolrDocument> iterator = resp.getResults().iterator(); while (iterator.hasNext()) { SolrDocument child = iterator.next(); String childId = (String) child.getFieldValue("id"); // Recursive call to this method List<String> l = getDescendantFolderId(childId, solrServer); list.addAll(l); } return list; } } catch (SolrServerException e) { e.printStackTrace(); return null; } } }