/********************************************************************************** * $URL: https://source.sakaiproject.org/svn/search/trunk/search-impl/impl/src/java/org/sakaiproject/search/index/impl/ClusterFSIndexStorage.java $ * $Id: ClusterFSIndexStorage.java 111640 2012-08-20 12:58:11Z david.horwitz@uct.ac.za $ *********************************************************************************** * * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009 The Sakai Foundation * * Licensed under the Educational Community License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.opensource.org/licenses/ECL-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **********************************************************************************/ package org.sakaiproject.search.index.impl; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MultiReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.sakaiproject.search.index.ClusterFilesystem; import org.sakaiproject.search.index.SegmentInfo; /** * Implementation of IndexStorage using a Cluster File system. This * implementation performs all index write operations in a new temporary segment. * On completion of the index operation it is merged with the current segment. * If the current segment is larger than the threshold, a new segment is * created. Managing the segments and how they relate to the cluster is * delegated to the ClusterFilesystem * * @author ieb */ public class ClusterFSIndexStorage extends BaseIndexStorage { private static final Log log = LogFactory.getLog(ClusterFSIndexStorage.class); /** * Maximum size of a segment on write */ private long segmentThreshold = 1024 * 1024 * 20; // Maximum Segment size // is 20M private ClusterFilesystem clusterFS = null; // maximum size of a segment during merge private long maxSegmentSize = 1024L * 1024L * 1500L; // just short of // 1.5G // maximum size of a segment considered for merge operations private long maxMegeSegmentSize = 1024L * 1024L * 1200L; // 1.2G public void init() { } public IndexReader getIndexReader() throws IOException { return getIndexReader(true); } private IndexReader getIndexReader(boolean withLock) throws IOException { if (withLock) { clusterFS.getLock(); } List<SegmentInfo> segments = clusterFS.updateSegments(); if (log.isDebugEnabled()) log.debug("Found " + segments.size() + " segments "); IndexReader[] readers = new IndexReader[segments.size()]; int j = 0; for (Iterator<SegmentInfo> i = segments.iterator(); i.hasNext();) { SegmentInfo segment = i.next(); try { if (!segment.checkSegmentValidity(diagnostics, "getIndexReader ")) { log.warn("Checksum Failed on " + segment); segment.checkSegmentValidity(true, "getIndexReader Failed"); } readers[j] = IndexReader.open(FSDirectory.open(segment.getSegmentLocation()), false); } catch (Exception ex) { try { if (readers[j] != null) { try { readers[j].close(); readers[j] = null; } catch (Exception e) { log.debug(e); } } if (log.isDebugEnabled()) log.debug("Invalid segment ", ex); log .warn( "Found corrupted segment (" + segment.getName() + ") in Local store, attempting to recover from DB. Reason: " + ex.getClass().getName() + ":" + ex.getMessage(), ex); clusterFS.recoverSegment(segment); readers[j] = IndexReader.open(FSDirectory.open(segment.getSegmentLocation()), false); log .warn("Recovery complete, resuming normal operations having restored, ignore previous problems with this segment " + segment.getName()); } catch (Exception e) { if (readers[j] != null) { try { readers[j].close(); readers[j] = null; } catch (Exception ex2) { log.debug(e); } } log .error( "---Problem recovering corrupted segment from the DB,\n" + "--- it is probably that there has been a local hardware\n" + "--- failure on this node or that the backup in the DB is missing\n" + "--- or corrupt. To recover, remove the segment from the db, and rebuild the index \n" + "--- eg delete from search_segments where name_ = '" + segment.getName() + "'; \n", ex); } } j++; } List<IndexReader> l = new ArrayList<IndexReader>(); for (int i = 0; i < readers.length; i++) { if (readers[i] != null) { l.add(readers[i]); } } if (l.size() != readers.length) { log .warn(" Opening index reader with a partial index set, this may result in a smallere search set than otherwise expected"); } readers = l.toArray(new IndexReader[0]); if (readers.length > 0) { IndexReader indexReader = new MultiReader(readers); return indexReader; } throw new IOException("No Index available to open "); } public IndexWriter getIndexWriter(boolean create) throws IOException { if (log.isDebugEnabled()) log.debug("+++++++++++++++++Start Index Writer Cycle "); // to ensure that we dont dammage the index due to OutOfMemory, if it // should ever happen // we will open a temporary index, which will be merged on completion IndexWriter indexWriter = null; File tempIndex = clusterFS.getTemporarySegment(true); indexWriter = new IndexWriter(FSDirectory.open(tempIndex), getAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.setUseCompoundFile(true); // indexWriter.setInfoStream(System.out); indexWriter.setMaxMergeDocs(50); indexWriter.setMergeFactor(50); if (log.isDebugEnabled()) log.debug("Using Temp Index Writer " + tempIndex.getPath()); return indexWriter; } protected IndexSearcher getIndexSearcher() throws IOException { IndexSearcher indexSearcher = null; try { long reloadStart = System.currentTimeMillis(); log.debug("Open Search"); indexSearcher = new IndexSearcher(getIndexReader(false)); if (indexSearcher == null) { log.warn("No search Index exists at this time"); } long reloadEnd = System.currentTimeMillis(); if (log.isDebugEnabled()) log.debug("Reload Complete " + indexSearcher.maxDoc() + " in " + (reloadEnd - reloadStart)); } catch (FileNotFoundException e) { try { if (indexSearcher != null) { indexSearcher.close(); } } catch (Exception ex) { log.debug(e); } indexSearcher = null; log.error("There has been a major poblem with the" + " Search Index which has become corrupted ", e); } catch (IOException e) { try { if (indexSearcher != null) { indexSearcher.close(); } } catch (Exception ex) { log.debug(e); } indexSearcher = null; log.error("There has been a major poblem with the " + "Search Index which has become corrupted", e); } return indexSearcher; } public boolean indexExists() { List<SegmentInfo> segments = clusterFS.updateSegments(); return (segments.size() > 0); } public void doPreIndexUpdate() throws IOException { if (log.isDebugEnabled()) log.debug("Start Index Cycle"); // dont enable locks //FSDirectory.setDisableLocks(true); } public void doPostIndexUpdate() throws IOException { } private void mergeAndUpdate(boolean merge) throws IOException { if (merge) { //FSDirectory.setDisableLocks(true); // get the tmp index File tmpSegment = clusterFS.getTemporarySegment(false); Directory[] tmpDirectory = new Directory[1]; tmpDirectory[0] = FSDirectory.open(tmpSegment); // Need to fix checksums before merging.... is that really true, // List<SegmentInfo> segments = clusterFS.updateSegments(); if (log.isDebugEnabled()) log.debug("Merge Phase 1: Starting on " + segments.size() + " segments "); // merge it with the current index SegmentInfo currentSegment = null; if (log.isDebugEnabled()) log.debug("Found " + segments.size() + " segments "); if (segments.size() > 0) { currentSegment = segments.get(segments.size() - 1); if (currentSegment != null) { if (!currentSegment.isClusterSegment() || (currentSegment.getTotalSize() > segmentThreshold) || currentSegment.isDeleted()) { if (diagnostics) { log .info("Curre nt Segment not suitable, generating new segment " + (currentSegment.isDeleted() ? "deleted," : "") + (!currentSegment.isClusterSegment() ? "non-cluster," : "") + ((currentSegment.getTotalSize() > segmentThreshold) ? "toobig," : "")); } currentSegment = null; } } } if (currentSegment == null) { if (tmpDirectory[0].fileExists("segments.gen")) { currentSegment = clusterFS.saveTemporarySegment(); /* * We must add the new current segment to the list of * segments so if it gets merged in the next step is is not * left out */ segments.add(currentSegment); /* * We should touch the segment to notify that it has been * updated */ currentSegment.touchSegment(); } else { log .warn("No Segment Created during indexing process, this should not happen, although it is possible tha the indexing operation did not find any files to index."); } } else { IndexWriter indexWriter = null; try { if (log.isDebugEnabled()) log.debug("Using Existing Segment " + currentSegment.getName()); currentSegment.touchSegment(); Directory dir = FSDirectory.open(currentSegment.getSegmentLocation()); indexWriter = new IndexWriter(dir, getAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.setUseCompoundFile(true); // indexWriter.setInfoStream(System.out); indexWriter.setMaxMergeDocs(50); indexWriter.setMergeFactor(50); if (tmpDirectory[0].fileExists("segments.gen")) { if (log.isDebugEnabled()) log.debug("Merging Temp segment " + tmpSegment.getPath() + " with current segment " + currentSegment.getSegmentLocation().getPath()); indexWriter.addIndexesNoOptimize(tmpDirectory); indexWriter.optimize(); } else { log.warn("No Merge performed, no tmp segment"); } } finally { try { indexWriter.close(); currentSegment.touchSegment(); } catch (Exception ex) { // dotn care if this fails log.debug(ex); } } } /* * segments in now a list of all segments including the current * segment */ // create a size sorted list if (segments.size() > 10) { if (log.isDebugEnabled()) log.debug("Merge Phase 0 : Stating"); // long[] segmentSize = new long[segments.size() - 1]; // File[] segmentName = new File[segments.size() - 1]; for (Iterator<SegmentInfo> i = segments.iterator(); i.hasNext();) { i.next().loadSize(); } Collections.sort(segments, new Comparator<SegmentInfo>() { public int compare(SegmentInfo o1, SegmentInfo o2) { long l = o1.getSize() - o2.getSize(); if (l == 0) { return 0; } else if (l < 0) { return -1; } else { return 1; } } }); long sizeBlock = 0; int ninblock = 0; int mergegroupno = 1; int[] mergegroup = new int[segments.size()]; int[] groupstomerge = new int[segments.size()]; mergegroup[0] = mergegroupno; { int j = 0; for (int i = 0; i < mergegroup.length; i++) { if (segments.get(i).getSize() < maxMegeSegmentSize) { groupstomerge[i] = 0; if (ninblock == 0) { sizeBlock = segments.get(0).getSize(); ninblock = 1; if (log.isDebugEnabled()) log.debug("Order Size = " + sizeBlock); } if (segments.get(i).getSize() > sizeBlock / 10) { ninblock++; // count up blocks that have the same order of // size } else { // if there are more than 2 in the block force a // merge if (ninblock >= 2) { groupstomerge[j++] = mergegroupno; } // reset for the next order of magnitude down ninblock = 1; mergegroupno++; sizeBlock = segments.get(i).getSize(); } mergegroup[i] = mergegroupno; } } // catch the merge all case if (ninblock >= 2) { groupstomerge[j++] = mergegroupno; } if (j > 0) { StringBuilder status = new StringBuilder(); for (int i = 0; i < segments.size(); i++) { SegmentInfo si = segments.get(i); status.append("Segment ").append(i).append(" n").append( si.getName()).append(" s").append(si.getSize()) .append(" g").append(mergegroup[i]).append("\n"); } for (int i = 0; i < groupstomerge.length; i++) { status.append("Merge group ").append(i).append(" m").append( groupstomerge[i]).append("\n"); } if (log.isDebugEnabled()) log.debug("Search Merge \n" + status); } } // groups to merge contains a list of group numbers that need to // be // merged. // mergegroup marks each segment with a group number. for (int i = 0; i < groupstomerge.length; i++) { if (groupstomerge[i] != 0) { StringBuilder status = new StringBuilder(); status.append("Group ").append(i).append(" Merge ").append( groupstomerge[i]).append("\n"); // merge the old segments into a new segment. SegmentInfo mergeSegment = clusterFS.newSegment(); IndexWriter mergeIndexWriter = null; boolean mergeOk = false; try { mergeIndexWriter = new IndexWriter(FSDirectory.open( mergeSegment.getSegmentLocation()), getAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED); mergeIndexWriter.setUseCompoundFile(true); // indexWriter.setInfoStream(System.out); mergeIndexWriter.setMaxMergeDocs(50); mergeIndexWriter.setMergeFactor(50); List<Directory> indexes = new ArrayList<Directory>(); long currentSize = 0L; for (int j = 0; j < mergegroup.length; j++) { // find if this segment is in the current merge // group SegmentInfo si = segments.get(j); if (mergegroup[j] == groupstomerge[i]) { // if we merge this segment will the result // probably remain small enough if (si.isDeleted()) { status .append( " Skipped, Segment is already deleted ") .append(" ").append(si.getName()).append( " || ").append( mergeSegment.getName()).append( "\n"); } else if ((currentSize + si.getSize()) < maxSegmentSize) { currentSize += si.getSize(); Directory d = FSDirectory.open(si .getSegmentLocation()); if (d.fileExists("segments.gen")) { status.append(" Merge ").append( si.getName()).append(" >> ").append( mergeSegment.getName()).append("\n"); indexes.add(d); } else { status .append( " Ignored segment as it does not exist ") .append(mergeSegment.getName()) .append("\n"); } } else { status.append(" Skipped, size > ").append( maxSegmentSize).append(" ").append( si.getName()).append(" || ").append( mergeSegment.getName()).append("\n"); // Dont merge this segment this time mergegroup[j] = -10; } } } // merge in the list of segments that we have // waiting to be merged if (diagnostics) { log.info("Merging \n" + status); } mergeIndexWriter.addIndexesNoOptimize((Directory[]) indexes .toArray(new Directory[indexes.size()])); mergeIndexWriter.optimize(); if (diagnostics) { log.info("Merged Segment contians " + mergeIndexWriter.maxDoc() + " documents "); } // mark the segment as create and ready of upload mergeSegment.setCreated(); mergeSegment.touchSegment(); if (log.isDebugEnabled()) log.debug("Done " + groupstomerge[i]); // remove old segments mergeOk = true; } catch (IOException ex) { log.error("Failed to merge search segments " + ex.getMessage()); try { clusterFS.removeLocalSegment(mergeSegment); } catch (Exception ex2) { log .error("Failed to remove merge segment " + mergeSegment.getName() + " " + ex2.getMessage()); } } finally { try { mergeIndexWriter.close(); } catch (Exception ex) { } } if (mergeOk) { for (int j = 0; j < mergegroup.length; j++) { if (mergegroup[j] == groupstomerge[i]) { clusterFS.removeLocalSegment(segments.get(j)); } } } } } } } else { log.debug("Merge Not requested "); } clusterFS.removeTemporarySegment(); clusterFS.saveSegments(); if (log.isDebugEnabled()) log.debug("+++++++++++++++++++++++++++++++++++++End Index Cycle"); } public void setRecoverCorruptedIndex(boolean recover) { } /** * @return Returns the clusterFS. */ public ClusterFilesystem getClusterFS() { return clusterFS; } /** * @param clusterFS * The clusterFS to set. */ public void setClusterFS(ClusterFilesystem clusterFS) { this.clusterFS = clusterFS; } public long getLastUpdate() { return clusterFS.getLastUpdate(); } public List getSegmentInfoList() { return clusterFS.getSegmentInfoList(); } public void closeIndexReader(IndexReader indexReader) throws IOException { if (indexReader != null) { indexReader.close(); } // only update required, no merge clusterFS.getLock(); mergeAndUpdate(false); clusterFS.releaseLock(); // if a lock was aquired, the lock should be released and the indx // synchronised } public void closeIndexWriter(IndexWriter indexWrite) throws IOException { if (indexWrite != null) { indexWrite.close(); } clusterFS.getLock(); mergeAndUpdate(true); clusterFS.releaseLock(); // we should aquire a lock, merge in the index and sync } public boolean isMultipleIndexers() { return clusterFS.isMultipleIndexers(); } public void closeIndexSearcher(IndexSearcher indexSearcher) { IndexReader indexReader = indexSearcher.getIndexReader(); boolean closedAlready = false; try { if (indexReader != null) { indexReader.close(); closedAlready = true; } } catch (Exception ex) { log.error("Failed to close Index Reader " + ex.getMessage()); } try { indexSearcher.close(); } catch (Exception ex) { if (closedAlready) { log.debug("Failed to close Index Searcher " + ex.getMessage()); } else { log.error("Failed to close Index Searcher " + ex.getMessage()); } } } /** * @return the maxMegeSegmentSize */ public long getMaxMegeSegmentSize() { return maxMegeSegmentSize; } /** * @param maxMegeSegmentSize * the maxMegeSegmentSize to set */ public void setMaxMegeSegmentSize(long maxMegeSegmentSize) { log.info("Max Segment Merge Size set to " + maxMegeSegmentSize); this.maxMegeSegmentSize = maxMegeSegmentSize; } /** * @return the maxSegmentSize */ public long getMaxSegmentSize() { return maxSegmentSize; } /** * @param maxSegmentSize * the maxSegmentSize to set */ public void setMaxSegmentSize(long maxSegmentSize) { log.info("Max Segment Size set to " + maxSegmentSize); this.maxSegmentSize = maxSegmentSize; } /** * @return the segmentThreshold */ public long getSegmentThreshold() { return segmentThreshold; } /** * @param segmentThreshold * the segmentThreshold to set */ public void setSegmentThreshold(long segmentThreshold) { log.info("New Segment Size threshold set to " + segmentThreshold); this.segmentThreshold = segmentThreshold; } /* * (non-Javadoc) * * @see org.sakaiproject.search.index.IndexStorage#centralIndexExists() */ public boolean centralIndexExists() { return clusterFS.centralIndexExists(); } public Directory getSpellDirectory() { // TODO Auto-generated method stub return null; } }