/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch; import java.io.IOException; import java.io.Writer; import java.net.MalformedURLException; import java.net.URL; import java.text.ParseException; import java.util.ArrayList; import java.util.Hashtable; import java.util.List; import java.util.Map; import org.apache.commons.lang.StringEscapeUtils; import org.apache.log4j.Logger; import com.bizosys.hsearch.common.AccessDefn; import com.bizosys.hsearch.common.Account; import com.bizosys.hsearch.common.BucketIsFullException; import com.bizosys.hsearch.common.Field; import com.bizosys.hsearch.common.HDocument; import com.bizosys.hsearch.common.SField; import com.bizosys.hsearch.common.WhoAmI; import com.bizosys.hsearch.common.Account.AccountInfo; import com.bizosys.hsearch.dictionary.DictEntry; import com.bizosys.hsearch.dictionary.DictionaryManager; import com.bizosys.hsearch.index.Doc; import com.bizosys.hsearch.index.IndexReader; import com.bizosys.hsearch.index.IndexWriter; import com.bizosys.hsearch.lang.Stemmer; import com.bizosys.hsearch.loader.RowEventProcessor; import com.bizosys.hsearch.loader.DataLoader; import com.bizosys.hsearch.loader.RowEventProcessorHSearch; import com.bizosys.hsearch.query.QueryContext; import com.bizosys.hsearch.query.QueryResult; import com.bizosys.hsearch.schema.SchemaManager; import com.bizosys.oneline.ApplicationFault; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.conf.Configuration; import com.bizosys.oneline.pipes.PipeIn; import com.bizosys.oneline.services.InvalidRequestException; import com.bizosys.oneline.services.Request; import com.bizosys.oneline.services.Response; import com.bizosys.oneline.services.Service; import com.bizosys.oneline.services.ServiceMetaData; import com.bizosys.oneline.util.StringUtils; import com.bizosys.oneline.util.XmlUtils; public class SearchService implements Service { public static Logger l = Logger.getLogger(SearchService.class.getName()); Configuration conf = null; public boolean init(Configuration conf, ServiceMetaData meta) { this.conf = conf; try { l.info("Initializing Scheme."); SchemaManager.getInstance().init(conf, meta); l.info("Scheme initialized."); XmlUtils.xstream.alias("hdoc", HDocument.class); XmlUtils.xstream.alias("field", SField.class); XmlUtils.xstream.alias("aword", DictEntry.class); XmlUtils.xstream.alias("user", WhoAmI.class); XmlUtils.xstream.alias("access", AccessDefn.class); return true; } catch (Exception ex) { l.fatal("Search Service Initialization Failed."); return false; } } public void stop() { } public String getName() { return "SearchService"; } public void process(Request req, Response res) { l.info(">> Searchservice ENTER"); String action = req.action; try { AccountInfo account = Account.getActiveAccountInfo(req, res); if ( null == account) return; if ( "document.add".equals(action) ) { this.addDocument(req, res, account); } else if ( "document.get".equals(action) ) { this.getDocuemnt(req, res, account); } else if ( "document.addXml".equals(action) ) { this.addXmlDocument(req, res, account); } else if ( "document.load".equals(action) ) { this.loadDocument(req, res, account); } else if ( "document.batch".equals(action) ) { this.doBatchAdd(req, res, account); } else if ( "document.delete".equals(action) ) { deleteDocument(req, res, account); } else if ( "document.deletes".equals(action) ) { deleteDocuments(req, res, account); } else if ( "document.search".equals(action) ) { this.searchDocument(req, res, account); } else if ( "dictionary.lookup".equals(action) ) { this.lookupDictionary(req,res, account); } else if ( "dictionary.refresh".equals(action) ) { this.refreshDictionary(req,res, account); } else if ( "dictionary.list".equals(action) ) { this.listDictionary(req,res, account); } else if ( "dictionary.add".equals(action) ) { this.addDictionary(req,res, account); } else if ( "dictionary.addAll".equals(action) ) { this.addBatchDictionary(req,res, account); } else if ( "dictionary.spell".equals(action) ) { this.spell(req,res, account); } else if ( "dictionary.regex".equals(action) ) { this.regex(req,res, account); } else if ( "dictionary.delete".equals(action) ) { this.delete(req,res, account); } else if ( "dictionary.delete-special".equals(action) ) { this.deleteSpecial(req,res, account); } else { res.error("Failed Unknown operation : " + action); } } catch (InvalidRequestException ix) { l.fatal("SearchService > ", ix); res.error( ix.getMessage()); } catch (Exception ix) { l.fatal("SearchService > ", ix); res.error( ix.getMessage()); } } /** * Gets a document given the {id} * @param req * @param res * @throws ApplicationFault * @throws SystemFault */ private void getDocuemnt(Request req, Response res, AccountInfo account) throws ApplicationFault, SystemFault{ String docId = req.getString("id", true, true, false); Doc d = IndexReader.getInstance().get(account.name, docId); try { d.toXml(res.getWriter()); } catch (IOException ex) { throw new SystemFault(ex); } } /** * Indexes a String. For indexing along with File Upload * happens through fileuploadservlet.xml * @param req * @param res * @throws ApplicationError * @throws ApplicationFault * @throws IOException * @throws ParseException */ private void addDocument(Request req, Response res, AccountInfo acc) throws SystemFault, ApplicationFault, BucketIsFullException { String hDocXml = req.getString("hdoc", true, true, false); HDocument hdoc = (HDocument) XmlUtils.xstream.fromXML(StringEscapeUtils.unescapeXml(hDocXml)); hdoc.tenant = acc.name; String runPlan = req.getString("runplan", false,true,true); if ( null != hdoc) { if ( StringUtils.isEmpty(hdoc.ipAddress)) { hdoc.ipAddress = req.clientIp; } } if ( StringUtils.isEmpty(runPlan) ) { IndexWriter.getInstance().insert(hdoc,acc,true); } else { IndexWriter.getInstance().insert(hdoc, acc, IndexWriter.getInstance().getPipes(runPlan),true); } res.writeXml("<id>OK</id>"); } /** */ private void addXmlDocument(Request req, Response res, AccountInfo acc) throws SystemFault, ApplicationFault, BucketIsFullException { String hDocXml = req.getString("hdoc", true, true, false); HDocument hdoc = (HDocument) XmlUtils.xstream.fromXML(StringEscapeUtils.unescapeXml(hDocXml)); String xmlDoc = req.getString("xmldoc", true, true, false); String titleFields = req.getString("title.fields", false, true, false); String runPlan = req.getString("runplan", false,true,true); String separator = req.getString("separator", false,true,true); boolean generateCacheText = req.getBoolean("textify", false); separator = parseSeparator(separator); if ( null != hdoc) { if ( StringUtils.isEmpty(hdoc.ipAddress)) { hdoc.ipAddress = req.clientIp; } } StringBuilder sb = null; if ( generateCacheText ) sb = new StringBuilder(); XmlToFields xtf = new XmlToFields(separator); Map<String, String> flds = xtf.toMap(xmlDoc); if ( StringUtils.isEmpty(hdoc.docType)) hdoc.docType = xtf.recordType; boolean isFirst = true; if ( null != flds) { if ( null == hdoc.fields ) hdoc.fields = new ArrayList<Field>(flds.size()); for (String key : flds.keySet()) { Field fld = new SField(key, flds.get(key)); hdoc.fields.add(fld); if (generateCacheText) { if (isFirst) isFirst = false; else sb.append(separator); sb.append(flds.get(key)); } } } if ( generateCacheText ) hdoc.cacheText = sb.toString(); if ( ! StringUtils.isEmpty(titleFields)) { StringBuilder titleSb = new StringBuilder(); List<String> lstTitleFields = StringUtils.fastSplit(titleFields, ','); isFirst = true; for (String aFld : lstTitleFields) { String fldValue = flds.get(aFld); if ( StringUtils.isEmpty(fldValue)) continue; if ( isFirst ) isFirst = false; else titleSb.append(separator); titleSb.append(fldValue); } } if ( StringUtils.isEmpty(runPlan) ) { IndexWriter.getInstance().insert(hdoc,acc,true); } else { IndexWriter.getInstance().insert(hdoc, acc, IndexWriter.getInstance().getPipes(runPlan),true); } res.writeXml("<id>OK</id>"); } /** * Loads a File * @param req * @param res * @param acc * @throws SystemFault * @throws ApplicationFault * @throws BucketIsFullException */ @SuppressWarnings("unchecked") private void loadDocument(Request req, Response res, AccountInfo acc) throws SystemFault, ApplicationFault, BucketIsFullException { String hDocXml = req.getString("document.prestine", true, true, false); HDocument pristineDoc = (HDocument) XmlUtils.xstream.fromXML(StringEscapeUtils.unescapeXml(hDocXml)); pristineDoc.tenant = acc.name; String docUrlStr = req.getString("document.url", true, true, false); URL docUrl = null; try { docUrl = new URL(docUrlStr); } catch (MalformedURLException ex) { throw new ApplicationFault("Bad File Url : " + docUrlStr); } String docType = req.getString("document.type", true, true, false); String idPrefix = req.getString("id.prefix", false,true,true); if ( StringUtils.isEmpty(idPrefix) ) idPrefix = null; int idColumn = (StringUtils.isEmpty(req.getString("id.column", false,true,true)) ) ? -1 : req.getInteger("id.column", false); String separator = req.getString("columns.separator", false,true,true); separator = parseSeparator(separator); String linebreak = req.getString("linebreak", false,true,true); String colFormats = req.getString("columns.format", true,true,false); String[] colFormatsA = StringUtils.getStrings(colFormats, ","); int colFormatsT = colFormatsA.length; int [] columnFormats = new int[colFormatsT]; for ( int i=0; i<colFormatsT; i++ ) { columnFormats[i] = new Integer(colFormatsA[i]); } String nonEmptyCells = req.getString("columns.nonempty", false,true,true); String[] nonEmptyCellsA = StringUtils.getStrings(nonEmptyCells, ","); int nonEmptyCellsT = nonEmptyCellsA.length; int[] nonEmptyCellsI = new int[nonEmptyCellsT]; for ( int i=0; i<nonEmptyCellsT; i++ ) { nonEmptyCellsI[i] = new Integer(nonEmptyCellsA[i]); } String titleCells = req.getString("columns.title", false,true,true); int[] titleCellsI = null; if ( StringUtils.isEmpty(titleCells)) { titleCellsI = new int[0]; } else { String[] titleCellsA = StringUtils.getStrings(titleCells, ","); int titleCellsT = titleCellsA.length; titleCellsI = new int[titleCellsT]; for ( int i=0; i<titleCellsT; i++ ) { titleCellsI[i] = new Integer(titleCellsA[i]); } } int keywordColumn = (StringUtils.isEmpty(req.getString("keyword.column", false,true,true)) ) ? -1 : req.getInteger("keyword.column", false); int urlColumn = (StringUtils.isEmpty(req.getString("url.column", false,true,true)) ) ? -1 : req.getInteger("url.column", false); int weightColumn = (StringUtils.isEmpty(req.getString("weight.column", false,true,true)) ) ? -1 : req.getInteger("weight.column", false); String previewCells = req.getString("columns.preview", false,true,true); int[] previewCellsI = null; if ( ! StringUtils.isEmpty(previewCells)) { String[] previewCellsA = StringUtils.getStrings(previewCells, ","); int previewCellsT = previewCellsA.length; previewCellsI = new int[previewCellsT]; for ( int i=0; i<previewCellsT; i++ ) { previewCellsI[i] = new Integer(previewCellsA[i]); } } String descCells = req.getString("columns.desc", false,true,true); int[] descCellsI = null; if ( ! StringUtils.isEmpty(descCells)) { String[] descCellsA = StringUtils.getStrings(descCells, ","); int descCellsT = descCellsA.length; descCellsI = new int[descCellsT]; for ( int i=0; i<descCellsT; i++ ) { descCellsI[i] = new Integer(descCellsA[i]); } } String strColumnsAllowed = req.getString("columns.values.allowed", false,true,true); Object optionCheckO = null; if (! StringUtils.isEmpty(strColumnsAllowed)) { optionCheckO = req.getObject("columns.values.allowed", false); } Map<Integer, String[]> optionalCheck = ( null == optionCheckO ) ? null : (Map<Integer, String[]>) optionCheckO; String strColumnsMax = req.getString("columns.values.max", false,true,true); Object maxCheckO = null; if (! StringUtils.isEmpty(strColumnsMax)) { maxCheckO = req.getObject("columns.values.max", false); } Map<Integer, Double> maxCheck = ( null == maxCheckO ) ? null : (Map<Integer, Double>) maxCheckO; String strColumnsMin = req.getString("columns.values.min", false,true,true); Object minCheckO = null; if (! StringUtils.isEmpty(strColumnsMin)) { minCheckO = req.getObject("columns.values.min", false); } Map<Integer, Double> minCheck = ( null == minCheckO ) ? null : (Map<Integer, Double>) minCheckO; String indexableCells = req.getString("columns.indexable", false,true,true); String[] indexableCellsA = StringUtils.getStrings(indexableCells, ","); int indexableCellsT = indexableCellsA.length; int[] indexableCellsI = new int[indexableCellsT]; for ( int i=0; i<indexableCellsT; i++ ) { indexableCellsI[i] = new Integer(indexableCellsA[i]); } String runPlan = req.getString("index.runplan", false,true,true); List<PipeIn> runSteps = ( StringUtils.isEmpty(runPlan) ) ? null : IndexWriter.getInstance().getPipes(runPlan); int startIndex = req.getInteger("index.start", 0); Boolean xmlPreview = req.getBoolean("index.preview.xml", false); if ( null == xmlPreview) xmlPreview = true; int endIndex = (StringUtils.isEmpty(req.getString("index.end", false,true,true)) ) ? -1 : req.getInteger("index.end", -1); int batchSize = (StringUtils.isEmpty(req.getString("index.batch.size", false,true,true)) ) ? 300 : req.getInteger("index.batch.size", 300); RowEventProcessor handler = new RowEventProcessorHSearch( acc, pristineDoc, runSteps, idPrefix, idColumn, urlColumn,weightColumn, titleCellsI,keywordColumn, previewCellsI, descCellsI, docType, indexableCellsI, startIndex,endIndex,batchSize, xmlPreview, res.getWriter(), linebreak); try { res.writeHeader(); DataLoader.load(docUrl, true, handler, separator, columnFormats, nonEmptyCellsI, optionalCheck, minCheck, maxCheck); } catch (Exception ex) { res.writeText("Error> " + StringUtils.stringifyException(ex)); } finally { res.writeFooter(); } } /** * Update a specific Field * @param req * @throws Exception * @throws ApplicationFault * @throws ParseException * @throws ApplicationError */ @SuppressWarnings("unchecked") private void doBatchAdd(Request req, Response res, AccountInfo acc) throws SystemFault, ApplicationFault, BucketIsFullException { String hDocsXml = req.getString("hdocs", true, true, false); List<HDocument> hdocs = (List<HDocument>) XmlUtils.xstream.fromXML(StringEscapeUtils.unescapeXml(hDocsXml)); String runPlan = req.getString("runplan", false,true,true); boolean concurrency = true; for (HDocument hdoc : hdocs) { if ( null == hdoc) continue; hdoc.tenant = acc.name; if ( StringUtils.isEmpty(hdoc.ipAddress)) { hdoc.ipAddress = req.clientIp; } } if ( StringUtils.isEmpty(runPlan) ) { IndexWriter.getInstance().insertBatch(hdocs, acc, concurrency); } else { IndexWriter.getInstance().insertBatch(hdocs, acc, IndexWriter.getInstance().getPipes(runPlan), concurrency); } res.writeXml("OK"); } /** * Deletes a document for the given Id. * @param req * @param res * @throws ApplicationFault */ private void deleteDocument(Request req, Response res, AccountInfo acc) throws SystemFault, ApplicationFault{ String id = req.getString("key", true, true, false); boolean concurrency = true; IndexWriter.getInstance().delete(acc.name, id, concurrency); res.writeXml("OK"); } /** * Deletes a document for the given Id. * @param req * @param res * @throws ApplicationFault */ private void deleteDocuments(Request req, Response res, AccountInfo acc) throws SystemFault, ApplicationFault{ String ids = req.getString("keys", true, true, false); List<String> allIds = StringUtils.fastSplit(ids, ','); IndexWriter.getInstance().delete(acc.name, allIds); res.writeXml("OK"); } /** * Searches for a query * @param req * @param res * @throws ApplicationError * @throws ApplicationFault * @throws IOException */ private void searchDocument(Request req, Response res, AccountInfo acc) throws ApplicationFault, SystemFault { String query = req.getString("query", true, true, false); QueryContext ctx = new QueryContext(acc, query); Boolean matchTags = req.getBoolean("tags", false); if ( null != matchTags ) ctx.matchTags = true; if (! StringUtils.isEmpty(req.getString("user", false,true,true)) ) { Object userObj = req.getObject("user", false); if ( null != userObj) ctx.user = (WhoAmI) userObj; } if ( ! StringUtils.isEmpty(req.clientIp) ) ctx.ipAddress = req.clientIp; QueryResult results = null; results = IndexReader.getInstance().search(ctx); int size = ( null == results) ? 0 : ( null == results.teasers) ? 0 : results.teasers.length; if ( 0 == size) { res.writeXml("<list></list>"); return; } results.toXml(res.getWriter()); } private void lookupDictionary(Request req, Response res, AccountInfo acc) throws ApplicationFault, SystemFault { String word = req.getString("word", true, true, false); word = word.toLowerCase(); String stemWord = Stemmer.getInstance().stem(word); DictEntry entry = DictionaryManager.getInstance().get(acc.name, stemWord); if ( null == entry ) { entry = DictionaryManager.getInstance().get(acc.name, word); if ( null == entry ) { res.writeXml("<r>none</r>"); return; } } try { entry.toXml(res.getWriter()); } catch (IOException ex) { throw new SystemFault(ex); } } private void refreshDictionary(Request req, Response res, AccountInfo acc) throws ApplicationFault, SystemFault { DictionaryManager.getInstance().refresh(acc.name); res.writeXml("OK"); } private void listDictionary(Request req, Response res, AccountInfo acc) throws ApplicationFault, SystemFault { String indexLetter = req.getString("index.letter", false, false, true); if ( StringUtils.isEmpty(indexLetter)) { indexLetter = StringUtils.Empty; } Writer writer = res.getWriter(); res.writeHeader(); DictionaryManager.getInstance().getKeywords(acc.name,indexLetter, writer); res.writeFooter(); } private void addDictionary(Request req, Response res, AccountInfo acc) throws ApplicationFault, SystemFault { DictEntry entry = (DictEntry) req.getObject("entry", true); String separator = req.getString("separator", false,true,false); separator = parseSeparator(separator); entry.type = entry.type.replace(separator, DictEntry.TYPE_SEPARATOR); DictionaryManager.getInstance().add(acc.name, entry); res.writeXml("OK"); } @SuppressWarnings("unchecked") private void addBatchDictionary(Request req, Response res, AccountInfo acc) throws ApplicationFault, SystemFault { String entriesXml = req.getString("entries", true, true, false); List<DictEntry> entries = (List<DictEntry>) XmlUtils.xstream.fromXML(StringEscapeUtils.unescapeXml(entriesXml)); Hashtable<String, DictEntry> hashEntries = new Hashtable<String, DictEntry>(entries.size()); for (DictEntry aEntry : entries) { hashEntries.put(aEntry.word, aEntry); } DictionaryManager.getInstance().add(acc.name, hashEntries); res.writeXml("OK"); } private void spell(Request req, Response res, AccountInfo acc) throws ApplicationFault, SystemFault { String word = req.getString("word", true, true, false); List<String> words = DictionaryManager.getInstance().getSpelled(acc.name, word); if ( null == words ) { res.writeXml("<r>none</r>"); return; } res.writeXml(words); } private void regex(Request req, Response res, AccountInfo acc) throws ApplicationFault, SystemFault { String word = req.getString("word", true, true, false); List<String> words = DictionaryManager.getInstance().getWildCard(acc.name, word); if ( null == words ) { res.writeXml("<r>none</r>"); return; } res.writeXml(words); } private void delete(Request req, Response res, AccountInfo acc) throws ApplicationFault, SystemFault { String words = req.getString("words", true, true, false); List<String> wordL = StringUtils.fastSplit(words, ','); DictionaryManager.getInstance().delete(acc.name, wordL); res.writeXml("OK"); } private void deleteSpecial(Request req, Response res, AccountInfo acc) throws ApplicationFault, SystemFault { String word = req.getString("word", true, true, false); List<String> words = DictionaryManager.getInstance().getWildCard(acc.name, word); DictionaryManager.getInstance().delete(acc.name, words); res.writeXml("OK"); } private String parseSeparator(String separator) { if ( StringUtils.isEmpty(separator) ) return ", "; String separatorLower = separator.toLowerCase(); if ( "tab".equals(separatorLower)) separator = "\t"; else if ( "newline".equals(separatorLower)) separator = "\n"; return separator; } }