/* * Copyright 2013 Websquared, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fastcatsearch.ir.document.merge; import java.io.File; import java.io.IOException; import org.fastcatsearch.ir.common.IndexFileNames; import org.fastcatsearch.ir.document.PrimaryKeyIndexBulkReader; import org.fastcatsearch.ir.document.PrimaryKeyIndexBulkWriter; import org.fastcatsearch.ir.io.BitSet; import org.fastcatsearch.ir.io.BufferedFileOutput; import org.fastcatsearch.ir.io.BytesBuffer; import org.fastcatsearch.ir.io.IOUtil; import org.fastcatsearch.ir.io.IndexOutput; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * pk는 1MB의 제약이 있다. * */ public class PrimaryKeyIndexMerger { private static Logger logger = LoggerFactory.getLogger(PrimaryKeyIndexMerger.class); private static final int KEY_MAX_SIZE = 1024 * 1024; private PrimaryKeyIndexBulkWriter w; public PrimaryKeyIndexMerger() { } /* * 증분색인후 이전 revision의 pk와 새 revision의 pk를 머징하면서 동일한 pk가 발견되면 이전 revision의 * 문서번호를 deleteSet에 넣어준다. 이때문에, 동일한 문서를 증분색인하더라도 중복으로 검색되지 않는것이다. For * Document primary key map file2's primary key is appended at file1's ends */ public int merge(File pkFile1, File pkFile2, File newPkFile, int indexInterval, BitSet deleteSet) throws IOException { IndexOutput output = null; IndexOutput indexOutput = null; try { output = new BufferedFileOutput(newPkFile); indexOutput = new BufferedFileOutput(newPkFile.getParentFile(), IndexFileNames.getIndexFileName(newPkFile.getName())); return merge(pkFile1, pkFile2, output, indexOutput, indexInterval, deleteSet); } finally { if (output != null) { output.close(); } if (indexOutput != null) { indexOutput.close(); } } } public int merge(File pkFile1, File pkFile2, IndexOutput pkmapOutput, IndexOutput pkmapIndexOutput, int indexInterval) throws IOException { return merge(pkFile1, pkFile2, pkmapOutput, pkmapIndexOutput, indexInterval, null); } public int merge(File pkFile1, File pkFile2, IndexOutput pkmapOutput, IndexOutput pkmapIndexOutput, int indexInterval, BitSet deleteSet) throws IOException { int inSegmentDocUpdateCount = 0; // 동일세그먼트내에서 이전 rev와 새 rev사이의 중복문서가 발견될 // 경우 update사이즈를 증가시킨다. PrimaryKeyIndexBulkReader r1 = null; PrimaryKeyIndexBulkReader r2 = null; try { r1 = new PrimaryKeyIndexBulkReader(pkFile1); r2 = new PrimaryKeyIndexBulkReader(pkFile2); w = new PrimaryKeyIndexBulkWriter(pkmapOutput, pkmapIndexOutput, indexInterval); BytesBuffer buf1 = new BytesBuffer(KEY_MAX_SIZE); BytesBuffer buf2 = new BytesBuffer(KEY_MAX_SIZE); int docNo1 = r1.next(buf1); int docNo2 = r2.next(buf2); // merge in ascending order while (docNo1 >= 0 && docNo2 >= 0) { int ret = BytesBuffer.compareBuffer(buf1, buf2); if (ret == 0) { // must write doc2 number because doc1 was replaced with // doc2. // prev doc no put to deleteSet if (deleteSet != null) { deleteSet.set(docNo1); inSegmentDocUpdateCount++; // logger.debug("$$ delete docid= {} replace ==> {}", // docNo1, docNo2); } w.write(buf1, docNo2); buf1.clear(); docNo1 = r1.next(buf1); buf2.clear(); docNo2 = r2.next(buf2); } else if (ret < 0) { if (logger.isTraceEnabled()) { int id = IOUtil.readInt(buf1.bytes, 0); // logger.debug("{} / {} -- PK1", docNo1, id); } w.write(buf1, docNo1); buf1.clear(); docNo1 = r1.next(buf1); } else { if (logger.isTraceEnabled()) { int id = IOUtil.readInt(buf2.bytes, 0); // logger.debug("{} / {} -- PK2", docNo2, id); } w.write(buf2, docNo2); buf2.clear(); docNo2 = r2.next(buf2); } } while (docNo1 >= 0) { // if(logger.isTraceEnabled()){ // int id = IOUtil.readInt(buf1.bytes, 0); // logger.debug("{} / {} -- PK1", docNo1, id); // } w.write(buf1, docNo1); buf1.clear(); docNo1 = r1.next(buf1); } while (docNo2 >= 0) { // if(logger.isTraceEnabled()){ // int id = IOUtil.readInt(buf2.bytes, 0); // logger.debug("{} / {} -- PK2", docNo2, id); // } w.write(buf2, docNo2); buf2.clear(); docNo2 = r2.next(buf2); } } finally { if (r1 != null) { r1.close(); } if (r2 != null) { r2.close(); } if (w != null) { w.done(); } } return inSegmentDocUpdateCount; } public int getKeyCount() { return w.getKeyCount(); } public int getKeyIndexCount() { return w.getKeyIndexCount(); } }