/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.codecs.simpletext; import java.io.FileNotFoundException; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.ParseException; import java.util.Arrays; import java.util.Collection; import java.util.Locale; import org.apache.lucene.codecs.CompoundFormat; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.Lock; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.StringHelper; /** * plain text compound format. * <p> * <b>FOR RECREATIONAL USE ONLY</b> * @lucene.experimental */ public class SimpleTextCompoundFormat extends CompoundFormat { /** Sole constructor. */ public SimpleTextCompoundFormat() { } @Override public Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException { String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); final IndexInput in = dir.openInput(dataFile, context); BytesRefBuilder scratch = new BytesRefBuilder(); // first get to TOC: DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT)); long pos = in.length() - TABLEPOS.length - OFFSETPATTERN.length() - 1; in.seek(pos); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLEPOS); long tablePos = -1; try { tablePos = df.parse(stripPrefix(scratch, TABLEPOS)).longValue(); } catch (ParseException e) { throw new CorruptIndexException("can't parse CFS trailer, got: " + scratch.get().utf8ToString(), in); } // seek to TOC and read it in.seek(tablePos); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLE); int numEntries = Integer.parseInt(stripPrefix(scratch, TABLE)); final String fileNames[] = new String[numEntries]; final long startOffsets[] = new long[numEntries]; final long endOffsets[] = new long[numEntries]; for (int i = 0; i < numEntries; i++) { SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLENAME); fileNames[i] = si.name + IndexFileNames.stripSegmentName(stripPrefix(scratch, TABLENAME)); if (i > 0) { // files must be unique and in sorted order assert fileNames[i].compareTo(fileNames[i-1]) > 0; } SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLESTART); startOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLESTART)); SimpleTextUtil.readLine(in, scratch); assert StringHelper.startsWith(scratch.get(), TABLEEND); endOffsets[i] = Long.parseLong(stripPrefix(scratch, TABLEEND)); } return new Directory() { private int getIndex(String name) throws IOException { int index = Arrays.binarySearch(fileNames, name); if (index < 0) { throw new FileNotFoundException("No sub-file found (fileName=" + name + " files: " + Arrays.toString(fileNames) + ")"); } return index; } @Override public String[] listAll() throws IOException { ensureOpen(); return fileNames.clone(); } @Override public long fileLength(String name) throws IOException { ensureOpen(); int index = getIndex(name); return endOffsets[index] - startOffsets[index]; } @Override public IndexInput openInput(String name, IOContext context) throws IOException { ensureOpen(); int index = getIndex(name); return in.slice(name, startOffsets[index], endOffsets[index] - startOffsets[index]); } @Override public void close() throws IOException { in.close(); } // write methods: disabled @Override public IndexOutput createOutput(String name, IOContext context) { throw new UnsupportedOperationException(); } @Override public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) { throw new UnsupportedOperationException(); } @Override public void sync(Collection<String> names) { throw new UnsupportedOperationException(); } @Override public void deleteFile(String name) { throw new UnsupportedOperationException(); } @Override public void rename(String source, String dest) { throw new UnsupportedOperationException(); } @Override public void syncMetaData() { throw new UnsupportedOperationException(); } @Override public Lock obtainLock(String name) { throw new UnsupportedOperationException(); } }; } @Override public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException { String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); int numFiles = si.files().size(); String names[] = si.files().toArray(new String[numFiles]); Arrays.sort(names); long startOffsets[] = new long[numFiles]; long endOffsets[] = new long[numFiles]; BytesRefBuilder scratch = new BytesRefBuilder(); try (IndexOutput out = dir.createOutput(dataFile, context)) { for (int i = 0; i < names.length; i++) { // write header for file SimpleTextUtil.write(out, HEADER); SimpleTextUtil.write(out, names[i], scratch); SimpleTextUtil.writeNewline(out); // write bytes for file startOffsets[i] = out.getFilePointer(); try (IndexInput in = dir.openInput(names[i], IOContext.READONCE)) { out.copyBytes(in, in.length()); } endOffsets[i] = out.getFilePointer(); } long tocPos = out.getFilePointer(); // write CFS table SimpleTextUtil.write(out, TABLE); SimpleTextUtil.write(out, Integer.toString(numFiles), scratch); SimpleTextUtil.writeNewline(out); for (int i = 0; i < names.length; i++) { SimpleTextUtil.write(out, TABLENAME); SimpleTextUtil.write(out, names[i], scratch); SimpleTextUtil.writeNewline(out); SimpleTextUtil.write(out, TABLESTART); SimpleTextUtil.write(out, Long.toString(startOffsets[i]), scratch); SimpleTextUtil.writeNewline(out); SimpleTextUtil.write(out, TABLEEND); SimpleTextUtil.write(out, Long.toString(endOffsets[i]), scratch); SimpleTextUtil.writeNewline(out); } DecimalFormat df = new DecimalFormat(OFFSETPATTERN, DecimalFormatSymbols.getInstance(Locale.ROOT)); SimpleTextUtil.write(out, TABLEPOS); SimpleTextUtil.write(out, df.format(tocPos), scratch); SimpleTextUtil.writeNewline(out); } } // helper method to strip strip away 'prefix' from 'scratch' and return as String private String stripPrefix(BytesRefBuilder scratch, BytesRef prefix) { return new String(scratch.bytes(), prefix.length, scratch.length() - prefix.length, StandardCharsets.UTF_8); } /** Extension of compound file */ static final String DATA_EXTENSION = "scf"; final static BytesRef HEADER = new BytesRef("cfs entry for: "); final static BytesRef TABLE = new BytesRef("table of contents, size: "); final static BytesRef TABLENAME = new BytesRef(" filename: "); final static BytesRef TABLESTART = new BytesRef(" start: "); final static BytesRef TABLEEND = new BytesRef(" end: "); final static BytesRef TABLEPOS = new BytesRef("table of contents begins at offset: "); final static String OFFSETPATTERN; static { int numDigits = Long.toString(Long.MAX_VALUE).length(); char pattern[] = new char[numDigits]; Arrays.fill(pattern, '0'); OFFSETPATTERN = new String(pattern); } }