package org.apache.lucene.codecs.lucene3x; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Map; import java.util.Set; import org.apache.lucene.codecs.PerDocProducer; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues.Source; import org.apache.lucene.index.DocValues.Type; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.StringHelper; /** * Reads Lucene 3.x norms format and exposes it via DocValues API * @lucene.experimental * @deprecated Only for reading existing 3.x indexes */ @Deprecated class Lucene3xNormsProducer extends PerDocProducer { /** norms header placeholder */ static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; /** Extension of norms file */ static final String NORMS_EXTENSION = "nrm"; /** Extension of separate norms file */ static final String SEPARATE_NORMS_EXTENSION = "s"; final Map<String,NormsDocValues> norms = new HashMap<String,NormsDocValues>(); // any .nrm or .sNN files we have open at any time. // TODO: just a list, and double-close() separate norms files? final Set<IndexInput> openFiles = Collections.newSetFromMap(new IdentityHashMap<IndexInput,Boolean>()); // points to a singleNormFile IndexInput singleNormStream; final int maxdoc; // note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front. // but we just don't do any seeks or reading yet. public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context) throws IOException { Directory separateNormsDir = info.dir; // separate norms are never inside CFS maxdoc = info.getDocCount(); String segmentName = info.name; boolean success = false; try { long nextNormSeek = NORMS_HEADER.length; //skip header (header unused for now) for (FieldInfo fi : fields) { if (fi.hasNorms()) { String fileName = getNormFilename(info, fi.number); Directory d = hasSeparateNorms(info, fi.number) ? separateNormsDir : dir; // singleNormFile means multiple norms share this file boolean singleNormFile = IndexFileNames.matchesExtension(fileName, NORMS_EXTENSION); IndexInput normInput = null; long normSeek; if (singleNormFile) { normSeek = nextNormSeek; if (singleNormStream == null) { singleNormStream = d.openInput(fileName, context); openFiles.add(singleNormStream); } // All norms in the .nrm file can share a single IndexInput since // they are only used in a synchronized context. // If this were to change in the future, a clone could be done here. normInput = singleNormStream; } else { normInput = d.openInput(fileName, context); openFiles.add(normInput); // if the segment was created in 3.2 or after, we wrote the header for sure, // and don't need to do the sketchy file size check. otherwise, we check // if the size is exactly equal to maxDoc to detect a headerless file. // NOTE: remove this check in Lucene 5.0! String version = info.getVersion(); final boolean isUnversioned = (version == null || StringHelper.getVersionComparator().compare(version, "3.2") < 0) && normInput.length() == maxdoc; if (isUnversioned) { normSeek = 0; } else { normSeek = NORMS_HEADER.length; } } NormsDocValues norm = new NormsDocValues(normInput, normSeek); norms.put(fi.name, norm); nextNormSeek += maxdoc; // increment also if some norms are separate } } // TODO: change to a real check? see LUCENE-3619 assert singleNormStream == null || nextNormSeek == singleNormStream.length() : singleNormStream != null ? "len: " + singleNormStream.length() + " expected: " + nextNormSeek : "null"; success = true; } finally { if (!success) { IOUtils.closeWhileHandlingException(openFiles); } } } @Override public DocValues docValues(String field) throws IOException { return norms.get(field); } @Override public void close() throws IOException { try { IOUtils.close(openFiles); } finally { norms.clear(); openFiles.clear(); } } private static String getNormFilename(SegmentInfo info, int number) { if (hasSeparateNorms(info, number)) { long gen = Long.parseLong(info.getAttribute(Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + number)); return IndexFileNames.fileNameFromGeneration(info.name, SEPARATE_NORMS_EXTENSION + number, gen); } else { // single file for all norms return IndexFileNames.segmentFileName(info.name, "", NORMS_EXTENSION); } } private static boolean hasSeparateNorms(SegmentInfo info, int number) { String v = info.getAttribute(Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + number); if (v == null) { return false; } else { assert Long.parseLong(v) != SegmentInfo.NO; return true; } } static final class NormSource extends Source { protected NormSource(byte[] bytes) { super(Type.FIXED_INTS_8); this.bytes = bytes; } final byte bytes[]; @Override public BytesRef getBytes(int docID, BytesRef ref) { ref.bytes = bytes; ref.offset = docID; ref.length = 1; return ref; } @Override public long getInt(int docID) { return bytes[docID]; } @Override public boolean hasArray() { return true; } @Override public Object getArray() { return bytes; } } private class NormsDocValues extends DocValues { private final IndexInput file; private final long offset; public NormsDocValues(IndexInput normInput, long normSeek) { this.file = normInput; this.offset = normSeek; } @Override public Source load() throws IOException { return new NormSource(bytes()); } @Override public Source getDirectSource() throws IOException { return getSource(); } @Override public Type getType() { return Type.FIXED_INTS_8; } byte[] bytes() throws IOException { byte[] bytes = new byte[maxdoc]; // some norms share fds synchronized(file) { file.seek(offset); file.readBytes(bytes, 0, bytes.length, false); } // we are done with this file if (file != singleNormStream) { openFiles.remove(file); file.close(); } return bytes; } @Override public int getValueSize() { return 1; } } }