/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jackrabbit.core.query.lucene; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.LinkedList; import java.util.List; import java.util.Queue; import java.util.SortedMap; import java.util.TreeMap; import org.apache.jackrabbit.core.query.lucene.directory.DirectoryManager; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FilterIndexReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermPositions; import org.apache.lucene.index.UpgradeIndexMergePolicy; import org.apache.lucene.store.Directory; import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * <code>IndexMigration</code> implements a utility that migrates a Jackrabbit * 1.4.x index to version 1.5. Until version 1.4.x, indexes used the character * '\uFFFF' to separate the name of a property from the value. As of Lucene * 2.3 this does not work anymore. See LUCENE-1221. Jackrabbit >= 1.5 uses * the character '[' as a separator. Whenever an index is opened from disk, a * quick check is run to find out whether a migration is required. See also * JCR-1363 for more details. */ public class IndexMigration { /** * The logger instance for this class. */ private static final Logger log = LoggerFactory.getLogger(IndexMigration.class); /** * Checks if the given <code>index</code> needs to be migrated. * * @param index the index to check and migration if needed. * @param directoryManager the directory manager. * @param oldSeparatorChar the old separator char that needs to be replaced. * @throws IOException if an error occurs while migrating the index. */ public static void migrate(PersistentIndex index, DirectoryManager directoryManager, char oldSeparatorChar) throws IOException { Directory indexDir = index.getDirectory(); log.debug("Checking {} ...", indexDir); ReadOnlyIndexReader reader = index.getReadOnlyIndexReader(); try { if (IndexFormatVersion.getVersion(reader).getVersion() >= IndexFormatVersion.V3.getVersion()) { // index was created with Jackrabbit 1.5 or higher // no need for migration log.debug("IndexFormatVersion >= V3, no migration needed"); return; } // assert: there is at least one node in the index, otherwise the // index format version would be at least V3 TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, "")); try { Term t = terms.term(); if (t.text().indexOf(oldSeparatorChar) == -1) { log.debug("Index already migrated"); return; } } finally { terms.close(); } } finally { reader.release(); index.releaseWriterAndReaders(); } // if we get here then the index must be migrated log.debug("Index requires migration {}", indexDir); String migrationName = index.getName() + "_v36"; if (directoryManager.hasDirectory(migrationName)) { directoryManager.delete(migrationName); } Directory migrationDir = directoryManager.getDirectory(migrationName); final IndexWriterConfig c = new IndexWriterConfig(Version.LUCENE_36, new JackrabbitAnalyzer()); c.setMergePolicy(new UpgradeIndexMergePolicy(new LogByteSizeMergePolicy())); c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); try { IndexWriter writer = new IndexWriter(migrationDir, c); try { IndexReader r = new MigrationIndexReader(IndexReader.open(index.getDirectory()), oldSeparatorChar); try { writer.addIndexes(r); writer.forceMerge(1); writer.close(); } finally { r.close(); } } finally { writer.close(); } } finally { migrationDir.close(); } directoryManager.delete(index.getName()); if (!directoryManager.rename(migrationName, index.getName())) { throw new IOException("failed to move migrated directory " + migrationDir); } log.info("Migrated " + index.getName()); } //---------------------------< internal helper >---------------------------- /** * An index reader that migrates stored field values and term text on the * fly. */ private static class MigrationIndexReader extends FilterIndexReader { private final char oldSepChar; public MigrationIndexReader(IndexReader in, char oldSepChar) { super(in); this.oldSepChar = oldSepChar; } @Override public IndexReader[] getSequentialSubReaders() { return null; } @Override public FieldInfos getFieldInfos() { return ReaderUtil.getMergedFieldInfos(in); } @Override public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { Document doc = super.document(n, fieldSelector); Fieldable[] fields = doc.getFieldables(FieldNames.PROPERTIES); if (fields != null) { doc.removeFields(FieldNames.PROPERTIES); for (Fieldable field : fields) { String value = field.stringValue(); value = value.replace(oldSepChar, '['); doc.add(new Field(FieldNames.PROPERTIES, false, value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); } } return doc; } @Override public TermEnum terms() throws IOException { List<TermEnum> enums = new ArrayList<TermEnum>(); List<String> fieldNames = new ArrayList<String>(ReaderUtil.getIndexedFields(in)); Collections.sort(fieldNames); for (String fieldName : fieldNames) { if (fieldName.equals(FieldNames.PROPERTIES)) { addPropertyTerms(enums); } else { enums.add(new RangeScan(in, new Term(fieldName, ""), new Term(fieldName, "\uFFFF"))); } } return new MigrationTermEnum(new ChainedTermEnum(enums), oldSepChar); } @Override public TermPositions termPositions() throws IOException { return new MigrationTermPositions(in.termPositions(), oldSepChar); } private void addPropertyTerms(List<TermEnum> enums) throws IOException { SortedMap<String, TermEnum> termEnums = new TreeMap<String, TermEnum>( new Comparator<String>() { public int compare(String s1, String s2) { s1 = s1.replace(oldSepChar, '['); s2 = s2.replace(oldSepChar, '['); return s1.compareTo(s2); } }); // scan through terms and find embedded field names TermEnum terms = new RangeScan(in, new Term(FieldNames.PROPERTIES, ""), new Term(FieldNames.PROPERTIES, "\uFFFF")); String previous = null; while (terms.next()) { Term t = terms.term(); String name = t.text().substring(0, t.text().indexOf(oldSepChar) + 1); if (!name.equals(previous)) { termEnums.put(name, new RangeScan(in, new Term(FieldNames.PROPERTIES, name), new Term(FieldNames.PROPERTIES, name + "\uFFFF"))); } previous = name; } enums.addAll(termEnums.values()); } private static class MigrationTermEnum extends FilterTermEnum { private final char oldSepChar; public MigrationTermEnum(TermEnum in, char oldSepChar) { super(in); this.oldSepChar = oldSepChar; } public Term term() { Term t = super.term(); if (t == null) { return t; } if (t.field().equals(FieldNames.PROPERTIES)) { String text = t.text(); return t.createTerm(text.replace(oldSepChar, '[')); } else { return t; } } TermEnum unwrap() { return in; } } private static class MigrationTermPositions extends FilterTermPositions { private final char oldSepChar; public MigrationTermPositions(TermPositions in, char oldSepChar) { super(in); this.oldSepChar = oldSepChar; } public void seek(Term term) throws IOException { if (term.field().equals(FieldNames.PROPERTIES)) { char[] text = term.text().toCharArray(); text[term.text().indexOf('[')] = oldSepChar; super.seek(term.createTerm(new String(text))); } else { super.seek(term); } } public void seek(TermEnum termEnum) throws IOException { if (termEnum instanceof MigrationTermEnum) { super.seek(((MigrationTermEnum) termEnum).unwrap()); } else { super.seek(termEnum); } } } } static final class ChainedTermEnum extends TermEnum { private Queue<TermEnum> queue = new LinkedList<TermEnum>(); public ChainedTermEnum(Collection<TermEnum> enums) { super(); queue.addAll(enums); } public boolean next() throws IOException { boolean newEnum = false; for (;;) { TermEnum terms = queue.peek(); if (terms == null) { // no more enums break; } if (newEnum && terms.term() != null) { // need to check if enum is already positioned // at first term return true; } if (terms.next()) { return true; } else { queue.remove(); terms.close(); newEnum = true; } } return false; } public Term term() { TermEnum terms = queue.peek(); if (terms != null) { return terms.term(); } return null; } public int docFreq() { TermEnum terms = queue.peek(); if (terms != null) { return terms.docFreq(); } return 0; } public void close() throws IOException { // close remaining while (!queue.isEmpty()) { queue.remove().close(); } } } }