/* See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * Esri Inc. licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.esri.gpt.catalog.lucene; import com.esri.gpt.framework.util.Val; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Stack; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.lucene.queryParser.ParseException; /** * Resolves term through a chain of different proxies. Each prozy can change * passed term into equivalent Lucene term. The example of using resolver is * ontology service. */ /**package*/class TermResolver { /** The Logger. */ private static Logger LOGGER = Logger.getLogger(TermResolver.class.getName()); /** services stream pattern */ private static final Pattern streamPattern = Pattern.compile("([a-zA-Z0-9_]+\\|)+"); /** quote pattern */ private static final Pattern quotePatern = Pattern.compile("(^|[^\\\\])\""); /** map of proxies */ private Map<String, IParserProxy> proxies; /** * Creates instance of the streamer. * @param proxies proxies */ public TermResolver(Map<String, IParserProxy> proxies) { this.proxies = proxies!=null? proxies: new HashMap<String, IParserProxy>(); } /** * Resolves term with equivalent lucene syntax compliant string. * @param serviceNames term string to alter * @return lucene syntax compliant string * @throws org.apache.lucene.queryParser.ParseException if altering term fails */ public String resolve(String termStr) throws ParseException { if (proxies.isEmpty()) return termStr; String orgTermStr = termStr = Val.chkStr(termStr); // replaceableSections Sections sections = createSections(termStr); // alter each sectionText with the string received from the external parsing // mechanizm for (Section group : sections) { List<String> chainStr = Arrays.asList(group.toArray()); Section termSection = advanceTerm(termStr, group.end); String term = termSection.toString(); termStr = termStr.substring(0, group.start) + translate(chainStr,term) + (termSection.end < termStr.length() ? termStr.substring(termSection.end) : ""); } while (termStr.startsWith("(") && termStr.endsWith(")")) { termStr = termStr.replaceAll("^\\(|\\)$",""); } if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine("Streaming: "+orgTermStr+" -> "+termStr); } return termStr; } /** * Advances term to the end * @param termStr term to advance * @param start starting point of advancing * @return section describing term */ private Section advanceTerm(String termStr, int start) { boolean escapeMode = false; boolean quotationMode = false; Stack<Character> expectedBrackets = new Stack<Character>(); while (start < termStr.length() && Character.isWhitespace(termStr.charAt(start))) { start++; } int index = start; loop: while (index < termStr.length()) { char c = termStr.charAt(index); if (Character.isWhitespace(c)) { if (!quotationMode && expectedBrackets.empty()) { break; } } else { if (!escapeMode) { if (c == '\\') { escapeMode = true; } else { if (!quotationMode) { if (c == '\"') { quotationMode = true; } else { if (!expectedBrackets.empty() && c == expectedBrackets.peek()) { expectedBrackets.pop(); } else { switch (c) { case '(': expectedBrackets.push(')'); break; case '<': expectedBrackets.push('>'); break; case '{': expectedBrackets.push('}'); break; case ')': case '>': case '}': break loop; } } } } else { if (c == '\"') { quotationMode = false; } } } } else { escapeMode = false; } } index++; } return new Section(termStr, start, index); } /** * Finds quoted sections of the string. * @param termStr term to translate * @return quoted sections */ private Sections findQuotedSections(String termStr) { // collection of quoted sections Sections quotedSections = new Sections(); Matcher quoteMatcher = quotePatern.matcher(termStr); int quoteIndex = 0; while (quoteIndex < termStr.length()) { if (quoteIndex >= termStr.length() || !quoteMatcher.find(quoteIndex)) { break; } int start = quoteMatcher.start(); quoteIndex = quoteMatcher.end() + 1; if (quoteIndex >= termStr.length() || !quoteMatcher.find(quoteIndex)) { break; } int end = quoteMatcher.end(); quoteIndex = quoteMatcher.end() + 1; Section section = new Section(termStr, start, end); quotedSections.add(section); } return quotedSections; } /** * Create replaceableSections of expressions to replace by external parser. * @param termStr term to translate * @return collection of sections */ private Sections createSections(String termStr) { // collection of quoted sections Sections quotedSections = findQuotedSections(termStr); // collection of replaceable replaceable sections Sections replaceableSections = new Sections(); // create serviceProxy stream matcher Matcher streamMatcher = streamPattern.matcher(termStr); // find all replaceableSections having serviceProxy stream definition within the string int findStart = 0; while (findStart < termStr.length() && streamMatcher.find(findStart)) { int start = streamMatcher.start(); int end = streamMatcher.end(); Section section = new Section(termStr, start, end); if (!quotedSections.contains(section)) { replaceableSections.add(section); } findStart = end + 1; } // reverse collection of replaceableSections Collections.reverse(replaceableSections); return replaceableSections; } /** * Delegates term to the serviceProxy. * @param serviceNames chain of service names * @param termStr term to translate * @return term term to translate * @throws org.apache.lucene.queryParser.ParseException if unable to delegate term */ private String translate(List<String> serviceNames, String termStr) throws ParseException { if (serviceNames.size()>0) { String serviceName = serviceNames.get(serviceNames.size()-1); serviceNames = serviceNames.subList(0, serviceNames.size()-1); termStr = Val.chkStr(termStr).replaceAll("^\"|\"$", ""); String newTermStr = translate(serviceNames, translate(serviceName, termStr)); if (!newTermStr.equals(termStr)) { termStr = "(" + newTermStr + ")"; } } return termStr; } /** * Translate term by the serviceName. Sends a term into translating service * through the corresponding proxy. * @param serviceName serviceName name * @param term term to translate * @return translated term * @throws ParseException if unable to translate term */ private String translate(String serviceName, String term) throws ParseException { serviceName = Val.chkStr(serviceName); if (serviceName.length()==0) { throw new ParseException("Invalid service name: \""+Val.chkStr(serviceName)+"\"."); } IParserProxy serviceProxy = proxies.get(serviceName); if (serviceProxy==null) { throw new ParseException("Unrecognized service name: \""+Val.chkStr(serviceName)+"\"."); } return serviceProxy.parse(term); } /** * Section of the string. */ private class Section { /** omplete string */ String str; /** start of sectionText */ int start; /** end of sectionText */ int end; /** * Creates instance of the sectionText. * @param complete string from which a section is being cut off * @param start start of sectionText * @param end end of sectionText */ Section(String str, int start, int end) { this.str = str; this.start = start; this.end = end; } /** * Checks if section contains another section. * @param section another section which might be contained within the current section * @return <code>true</code> if current section contains given section */ boolean contains(Section section) { return start <= section.start && end >= section.end; } @Override public String toString() { return str.substring(start, end); } /** * Changes into array of strings. * @return array of strings */ public String [] toArray() { return toString().replaceAll("^\\||\\|$", "").split("\\|+"); } } /** * Collection of sections. */ private static class Sections extends ArrayList<Section> { /** * Checks if is there any section which contains given section. * @param section section to check * @return <code>true</code> if there is at least one section which contains given section */ boolean contains(Section section) { for (Section s : this) { if (s.contains(section)) { return true; } } return false; } } }