/* * Copyright 2013 Websquared, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fastcatsearch.ir.document; import java.io.File; import java.io.IOException; import java.util.zip.DataFormatException; import java.util.zip.Deflater; import java.util.zip.Inflater; import org.fastcatsearch.ir.common.IRException; import org.fastcatsearch.ir.common.IndexFileNames; import org.fastcatsearch.ir.io.BufferedFileInput; import org.fastcatsearch.ir.io.BufferedFileOutput; import org.fastcatsearch.ir.io.BytesDataInput; import org.fastcatsearch.ir.io.IOUtil; import org.fastcatsearch.ir.io.IndexInput; import org.fastcatsearch.ir.io.IndexOutput; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class DocumentRestorer { private static Logger logger = LoggerFactory.getLogger(DocumentRestorer.class); private final int BUFFER_SIZE = 3 * 1024 * 1024; private File dir; public DocumentRestorer(File dir) throws IOException, IRException{ this.dir = dir; } public void setSize(int newSize) throws IRException, IOException{ IndexInput docInput = new BufferedFileInput(dir, IndexFileNames.docStored); IndexInput positionInput = new BufferedFileInput(dir, IndexFileNames.docPosition); //check document count and block size int prevDocumentCount = docInput.readInt(); if(newSize >= prevDocumentCount){ //do nothing return; } logger.info("DocumentRestorer size ={} => {}", prevDocumentCount, newSize); int blockSize = docInput.readInt(); int docNo = newSize - 1; int docIndex = docNo / blockSize; int docOffset = docNo % blockSize; positionInput.seek(docIndex * IOUtil.SIZE_OF_LONG); long pos = positionInput.readLong(); docInput.seek(pos); int len = docInput.readInt(); positionInput.close(); if(newSize % blockSize == 0){ docInput.close(); IndexOutput out = new BufferedFileOutput(dir, IndexFileNames.docStored, true); out.seek(0); out.writeInt(newSize); out.setLength(pos + len); logger.info("restored doc.stored file size() = {}", out.length()); out.close(); }else{ //if doc is in last block byte[] infOutput = new byte[BUFFER_SIZE]; byte[] defOutput = new byte[BUFFER_SIZE]; byte[] data = new byte[len]; docInput.readBytes(data, 0, len); docInput.close(); Inflater decompresser = new Inflater(); decompresser.setInput(data); int resultLength = -1; try { resultLength = decompresser.inflate(infOutput); } catch (DataFormatException e) { throw new IOException("DataFormatException"); } finally { decompresser.end(); } // logger.debug("BlockLength="+resultLength); BytesDataInput bai = new BytesDataInput(infOutput, 0, resultLength); for (int i = 0; i <= docOffset; i++) { int docLen = bai.readInt(); // logger.debug("bai.position() = "+bai.position()+", docLen = "+docLen); bai.seek(bai.position() + docLen); } int end = (int) bai.position(); Deflater compresser = new Deflater(Deflater.BEST_SPEED); compresser.setInput(infOutput, 0, end); compresser.finish(); int compressedDataLength = compresser.deflate(defOutput); IndexOutput out = new BufferedFileOutput(dir, IndexFileNames.docStored, true); out.seek(0); out.writeInt(newSize); out.setLength(pos); out.seek(out.length());//move to end of file out.writeInt(compressedDataLength); out.writeBytes(defOutput, 0, compressedDataLength); logger.info("restored doc.stored file size() = {}", out.length()); out.close(); } } }