/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch; import org.apache.log4j.Logger; import com.bizosys.hsearch.common.HDocument; import com.bizosys.hsearch.index.IndexWriter; import com.bizosys.oneline.ApplicationFault; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.conf.Configuration; import com.bizosys.oneline.services.Request; import com.bizosys.oneline.services.Response; import com.bizosys.oneline.services.Service; import com.bizosys.oneline.services.ServiceMetaData; import com.bizosys.oneline.util.StringUtils; public class CrawlerService implements Service { public static Logger l = Logger.getLogger(CrawlerService.class.getName()); Configuration conf = null; public boolean init(Configuration conf, ServiceMetaData meta) { this.conf = conf; l.info("Initializing Tab Fetcher."); return true; } public void stop() { } public String getName() { return "TabfetcherService"; } public void process(Request req, Response res) { String action = req.action; try { if ( "tabfile".equals(action) ) { this.indexTabfile(req, res); } else { res.error("Failed Unknown operation : " + action); } } catch (Exception ix) { l.fatal("SearchService > ", ix); res.error("Failure : SearchService:" + action + " " + ix.getMessage()); } } /** * Gets a document given the {id} * @param req * @param res * @throws ApplicationFault * @throws SystemFault */ private void indexTabfile(Request req, Response res) throws ApplicationFault, SystemFault{ String file = req.getString("file", true, true, false); Object pristineO = req.getObject("hdoc", false); HDocument hdoc = ( null == pristineO) ? new HDocument() : (HDocument) pristineO; String idFldName = req.getString("idFldName", true, true, false); String titleFldNames = req.getString("titleFldNames", true, true, false); String previewFields = req.getString("previewFields", true, true, false); String runPlan = req.getString("runPlan", true, true, false); int startIndex = req.getInteger("startIndex", true); int endIndex = req.getInteger("endIndex", true); int batchSize = req.getInteger("batchSize", true); TabFileCrawler crawler = new TabFileCrawler( "anonymous", file,hdoc,idFldName, StringUtils.getStrings(titleFldNames, ","), StringUtils.getStrings(previewFields, ","), IndexWriter.getInstance().getPipes(runPlan), startIndex, endIndex, batchSize); crawler.fetchAndIndex(); } }