/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.solbase; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.ResourceBundle; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicLong; import javax.naming.OperationNotSupportedException; import net.rubyeye.xmemcached.exception.MemcachedException; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.util.Bytes; import org.apache.log4j.Logger; import org.apache.lucene.document.Document; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.core.SolrCore; import org.apache.solr.schema.SchemaField; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.CommitUpdateCommand; import org.apache.solr.update.DeleteUpdateCommand; import org.apache.solr.update.MergeIndexesCommand; import org.apache.solr.update.RollbackUpdateCommand; import org.apache.solr.update.UpdateHandler; import org.solbase.cache.CachedObjectWrapper; import org.solbase.cache.LayeredCache; import org.solbase.common.SolbaseException; import org.solbase.indexer.ParsedDoc; import org.solbase.indexer.SolbaseIndexUtil; import org.solbase.lucenehbase.ReaderCache; import org.solbase.lucenehbase.TermDocMetadata; public class SolbaseIndexWriter extends UpdateHandler { // To manage cached reads private static final LinkedBlockingQueue<String> flushQueue = new LinkedBlockingQueue<String>(); private final ExecutorService flushMonitor = Executors.newSingleThreadExecutor(); private final org.solbase.lucenehbase.IndexWriter writer; private final static Logger logger = Logger.getLogger(SolbaseIndexWriter.class); private static SolbaseIndexUtil indexUtil; // stats AtomicLong addCommands = new AtomicLong(); AtomicLong addCommandsCumulative = new AtomicLong(); AtomicLong deleteByIdCommands = new AtomicLong(); AtomicLong deleteByIdCommandsCumulative = new AtomicLong(); AtomicLong deleteByQueryCommands = new AtomicLong(); AtomicLong deleteByQueryCommandsCumulative = new AtomicLong(); AtomicLong expungeDeleteCommands = new AtomicLong(); AtomicLong mergeIndexesCommands = new AtomicLong(); AtomicLong commitCommands = new AtomicLong(); AtomicLong optimizeCommands = new AtomicLong(); AtomicLong rollbackCommands = new AtomicLong(); AtomicLong numDocsPending = new AtomicLong(); AtomicLong numErrors = new AtomicLong(); AtomicLong numErrorsCumulative = new AtomicLong(); public SolbaseIndexWriter(SolrCore core) { super(core); // set SolbaseIndexUtil if(ResourceBundle.getBundle("solbase") != null){ String className = ResourceBundle.getBundle("solbase").getString("class.solbaseIndexUtil"); if(className != null){ try { indexUtil = (SolbaseIndexUtil) Class.forName(className).newInstance(); } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } } try { writer = new org.solbase.lucenehbase.IndexWriter(); flushMonitor.execute(new Runnable() { public void run() { Map<String, Long> lastCoreFlush = new HashMap<String, Long>(); while (true) { try { String core = flushQueue.take(); Long lastFlush = lastCoreFlush.get(core); if (lastFlush == null || lastFlush <= (System.currentTimeMillis() - SolbaseUtil.cacheInvalidationInterval)) { flush(core); lastCoreFlush.put(core, System.currentTimeMillis()); logger.info("Flushed cache: " + core); } } catch (InterruptedException e) { continue; } catch (IOException e) { logger.error(e); } } } private void flush(String core) throws IOException { HTableInterface table = SolbaseUtil.getSchemaInfoTable(); try { Put schemaPut = new Put(Bytes.toBytes("cache")); schemaPut.add(Bytes.toBytes("info"), Bytes.toBytes("schema"), Bytes.toBytes("")); table.put(schemaPut); } finally { SolbaseUtil.releaseTable(table); } } }); } catch (Exception e) { throw new RuntimeException(e); } } public void setIndexUtil(SolbaseIndexUtil util) { indexUtil = util; } public int addDoc(AddUpdateCommand cmd) throws IOException { addCommands.incrementAndGet(); addCommandsCumulative.incrementAndGet(); int rc = -1; // no duplicates allowed SchemaField uniqueField = core.getSchema().getUniqueKeyField(); if (uniqueField == null) throw new IOException("Solbase requires a unique field"); // if there is no ID field, use allowDups if (idField == null) { throw new IOException("Solbase requires a unique field"); } try { String indexName = core.getName(); writer.setIndexName(indexName); Document doc = cmd.getLuceneDocument(schema); String idFieldName = idTerm.field(); // solbase specific fields. should remove it after using boolean updateStore = false; String updateVal = doc.get("updateStore"); if(updateVal != null){ // updating hbase after cache is updated updateStore = true; } int docNumber = Integer.parseInt(doc.get(idFieldName)); // if edit field is present, it's for modification instead of blind add String editVal = doc.get("edit"); // we don't need following fields. only used for update api doc.removeField("docId"); doc.removeField("edit"); doc.removeField("updateStore"); // set indexutil to writer writer.setIndexUtil(indexUtil); String globaId = doc.getField("global_uniq_id").stringValue(); int shardNum = SolbaseShardUtil.getShardNum(indexName); int startDocId = SolbaseShardUtil.getStartDocId(shardNum); int endDocId = SolbaseShardUtil.getEndDocId(shardNum); if(editVal != null){ logger.info("updating doc: " + docNumber); if(editDoc(doc, indexName, docNumber, updateStore)){ rc = 1; } } else { try { logger.info("adding doc: " + docNumber); ParsedDoc parsedDoc = writer.parseDoc(doc, schema.getAnalyzer(), indexName, docNumber, indexUtil.getSortFieldNames()); List<TermDocMetadata> termDocMetas = parsedDoc.getTermDocMetadatas(); // TODO: possible problem // doc is not in cache, cluster isn't responsible for update store // doc never gets updated in hbase, nor cache // for loop below will update tv with this new doc. // when searched, it will throw null point exception on this doc // therefore, update store first if adding doc (replication can still cause this issue if back'd up) ReaderCache.updateDocument(docNumber, parsedDoc, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId); for (TermDocMetadata termDocMeta : termDocMetas) { ReaderCache.updateTermDocsMetadata(termDocMeta.getTerm(), termDocMeta, indexName, writer, LayeredCache.ModificationType.ADD, updateStore, startDocId, endDocId); } rc = 1; logger.info("adding doc: " + docNumber); } catch (NumberFormatException e) { logger.info("adding doc failed: " + docNumber); logger.info(e.toString()); } catch (InterruptedException e) { logger.info("adding doc failed: " + docNumber); logger.info(e.toString()); } catch (MemcachedException e) { logger.info("adding doc failed: " + docNumber); logger.info(e.toString()); } catch (TimeoutException e) { logger.info("adding doc failed: " + docNumber); logger.info(e.toString()); } catch (SolbaseException e) { logger.info("adding doc failed: " + docNumber); logger.info(e.toString()); } } } finally { if (rc != 1) { numErrors.incrementAndGet(); numErrorsCumulative.incrementAndGet(); } } return rc; } /** * Doing edit logic here. instead of blindingly inserting, we need to compare new doc with old doc and do appropriate modification * to tv and doc * @param newDoc * @param indexName * @return */ public boolean editDoc(Document newDoc, String indexName, int docNumber, boolean updateStore){ try { CachedObjectWrapper<Document, Long> cachedObj = ReaderCache.getDocument(docNumber, null, indexName, 0, 0); if(cachedObj == null || cachedObj.getValue() == null) { // document doesn't exist, so let's just bail out here return true; } ParsedDoc parsedDoc = new ParsedDoc(newDoc); parsedDoc.setIndexName(indexName); parsedDoc.setIndexUtil(indexUtil); parsedDoc.setIndexWriter(writer); parsedDoc.setUpdateStore(updateStore); int shardNum = SolbaseShardUtil.getShardNum(indexName); int startDocId = SolbaseShardUtil.getStartDocId(shardNum); int endDocId = SolbaseShardUtil.getEndDocId(shardNum); ReaderCache.updateDocument(docNumber, parsedDoc, indexName, writer, LayeredCache.ModificationType.UPDATE, updateStore, startDocId, endDocId); return true; } catch (IOException e) { logger.info("edit doc failed: " + docNumber); logger.info(e.toString()); } catch (InterruptedException e) { logger.info("edit doc failed: " + docNumber); logger.info(e.toString()); } catch (MemcachedException e) { logger.info("edit doc failed: " + docNumber); logger.info(e.toString()); } catch (TimeoutException e) { logger.info("edit doc failed: " + docNumber); logger.info(e.toString()); } catch (SolbaseException e) { logger.info("edit doc failed: " + docNumber); logger.info(e.toString()); } return false; } public void close() throws IOException { // no-op } public void commit(CommitUpdateCommand cmd) throws IOException { // no-op } public void delete(DeleteUpdateCommand cmd) throws IOException { deleteByIdCommands.incrementAndGet(); deleteByIdCommandsCumulative.incrementAndGet(); if (!cmd.fromPending && !cmd.fromCommitted) { numErrors.incrementAndGet(); numErrorsCumulative.incrementAndGet(); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "meaningless command: " + cmd); } if (!cmd.fromPending || !cmd.fromCommitted) { numErrors.incrementAndGet(); numErrorsCumulative.incrementAndGet(); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "operation not supported" + cmd); } // Delete all terms/fields/etc String indexName = core.getName(); writer.setIndexName(indexName); writer.setIndexUtil(indexUtil); int docId = Integer.parseInt(cmd.id); logger.info("deleting doc: " + docId); try { CachedObjectWrapper<Document, Long> wrapper = ReaderCache.getDocument(docId, null, indexName, 0, 0); boolean updateStore = cmd.getUpdateStore(); ParsedDoc parsedDoc = new ParsedDoc(); parsedDoc.setIndexName(indexName); parsedDoc.setIndexUtil(indexUtil); parsedDoc.setIndexWriter(writer); parsedDoc.setUpdateStore(updateStore); int shardNum = SolbaseShardUtil.getShardNum(indexName); int startDocId = SolbaseShardUtil.getStartDocId(shardNum); int endDocId = SolbaseShardUtil.getEndDocId(shardNum); ReaderCache.updateDocument(docId, parsedDoc, indexName, writer, LayeredCache.ModificationType.DELETE, updateStore, startDocId, endDocId); } catch (InterruptedException e) { logger.info("delete doc failed: " + docId); logger.info(e.toString()); } catch (MemcachedException e) { logger.info("delete doc failed: " + docId); logger.info(e.toString()); } catch (TimeoutException e) { logger.info("delete doc failed: " + docId); logger.info(e.toString()); } catch (SolbaseException e) { logger.info("delete doc failed: " + docId); logger.info(e.toString()); } } public void deleteByQuery(DeleteUpdateCommand cmd) throws IOException { try { throw new OperationNotSupportedException(); } catch (OperationNotSupportedException e) { e.printStackTrace(); } } public int mergeIndexes(MergeIndexesCommand cmd) throws IOException { return 0; } public void rollback(RollbackUpdateCommand cmd) throws IOException { // TODO - implement rollback in case of failure } public Category getCategory() { return Category.UPDATEHANDLER; } public String getDescription() { return "Update handler for Solbase"; } public URL[] getDocs() { return null; } public String getName() { return SolbaseIndexWriter.class.getName(); } public String getSource() { return null; } public String getSourceId() { return null; } public NamedList<Long> getStatistics() { NamedList<Long> lst = new SimpleOrderedMap<Long>(); lst.add("rollbacks", rollbackCommands.get()); lst.add("adds", addCommands.get()); lst.add("deletesById", deleteByIdCommands.get()); lst.add("deletesByQuery", deleteByQueryCommands.get()); lst.add("errors", numErrors.get()); lst.add("cumulative_adds", addCommandsCumulative.get()); lst.add("cumulative_deletesById", deleteByIdCommandsCumulative.get()); lst.add("cumulative_deletesByQuery", deleteByQueryCommandsCumulative.get()); lst.add("cumulative_errors", numErrorsCumulative.get()); return lst; } public String getVersion() { return core.getVersion(); } @SuppressWarnings("unused") private void clearCache(String core) { SolbaseIndexWriter.flushQueue.add(core); } public static void main(String[] args){ try { @SuppressWarnings("deprecation") /* HBaseConfiguration conf = new HBaseConfiguration(); //conf.set("hbase.zookeeper.quorum", "den2zksb001"); conf.set("hbase.zookeeper.quorum", "den3dhdptk01.int.photobucket.com"); conf.set("hbase.zookeeper.property.clientPort", "2181"); conf.setInt("hbase.client.retries.number", 7); conf.setInt("ipc.client.connect.max.retries", 3); HTablePool hTablePool = new HTablePool(conf, 10); HTableInterface seq = hTablePool.getTable("DocKeyIdMap"); String globalId = "0089210673:0000540572:309AB023-orig.jpg"; Get get = new Get(Bytes.toBytes(globalId)); Result result = seq.get(get); byte[] docId = result.getValue(Bytes.toBytes("docId"), Bytes.toBytes("")); int docNumber = 384900472; SolrInputDocument doc = new SolrInputDocument(); if(docId != null) { // we've indexed this doc, so it is edit System.out.println(Bytes.toInt(docId)); docNumber = Bytes.toInt(docId); doc.addField("edit", true); } */ // using seperate connector to leverage different http thread pool for updates CommonsHttpSolrServer solbaseServer = new CommonsHttpSolrServer("http://den2sch21:8080/solbase/pbimages~4"); /* doc.addField("docId", docNumber); doc.addField("global_uniq_id", globalId); doc.addField("title", "tom"); doc.addField("description", "Uploaded with Snapbucket"); doc.addField("tags", "Snapbucket"); doc.addField("path", "/albums/tt262/koh_tester/309AB021-orig.jpg"); doc.addField("subdomain", "i618"); doc.addField("lastModified", new Integer(SolbaseUtil.getEpochSinceSolbase(System.currentTimeMillis() / 60000)).toString()); doc.addField("media_type", new Integer(1).toString()); doc.addField("total_view_count", new Long(10).toString()); doc.addField("sevendays_view_count", new Integer(5).toString()); doc.addField("total_likes_count", new Long(5).toString()); doc.addField("sevendays_likes_count", new Integer(1).toString()); doc.addField("total_comments_count", new Long(5).toString()); doc.addField("sevendays_comments_count", new Integer(1).toString()); doc.addField("contents", "audi tom solbase Uploaded with Snapbucket "); // whether we want to store to hbase or not doc.addField("updateStore", true); solbaseServer.add(doc); */ // for delete only List<String> ids = new ArrayList<String>(); ids.add(127995479 + ""); // term vector didn't get deleted doc id ids.add(134876977 + ""); // term vector did get deleted doc id solbaseServer.deleteById(ids, true); } catch (MalformedURLException e) { } catch (SolrServerException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }