/* * Copyright 2013 Websquared, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fastcatsearch.ir.index; import org.fastcatsearch.ir.common.IRException; import org.fastcatsearch.ir.io.BytesBuffer; import org.fastcatsearch.ir.io.IOUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * * @author sangwook * */ public class PostingBuffer { protected static Logger logger = LoggerFactory.getLogger(PostingBuffer.class); protected int postingSize; protected int lastDocNo; protected BytesBuffer postingVector; protected int lastDocDelta; protected int lastDocFrequency; public PostingBuffer(){ postingVector = new BytesBuffer(32); this.postingSize = 0; this.lastDocNo = -1; IOUtil.writeInt(postingVector, postingSize); IOUtil.writeInt(postingVector, lastDocNo); } protected void ensurePostingVectorCapasity(int additionalSize){ if(postingVector.remaining() < additionalSize){ //2^n 승으로 증가. int newAdditionalSize = postingVector.size() << 1; while(newAdditionalSize < postingVector.pos() + additionalSize){ newAdditionalSize <<= 1; } byte[] newbuffer = new byte[newAdditionalSize]; System.arraycopy(postingVector.bytes, 0, newbuffer, 0, postingVector.pos()); postingVector.bytes = newbuffer; postingVector.length = newbuffer.length; } } public void addOne(int docNo, int position) throws IRException{ // logger.debug("add >> {}, {}", docNo, position); //동일한 문서번호면 freq를 올려준다. if(docNo == lastDocNo){ lastDocFrequency++; }else if(docNo > lastDocNo){ //문서번호가 증가하면 기록. if(postingSize == 0){ //첫 문서면 기록하지 않고 두번째문서부터만 이전 문서기록. lastDocDelta = docNo; }else{ writeLastDocInfo(); lastDocDelta = docNo - lastDocNo - 1; } lastDocNo = docNo; lastDocFrequency++; postingSize++; }else{ throw new IRException("Input docNo cannot less than lastDocNo. docNo="+docNo+", lastDocNo="+lastDocNo); } } protected void writeLastDocInfo(){ ensurePostingVectorCapasity(10); //lastDocDelta 기록. IOUtil.writeVInt(postingVector, lastDocDelta); //lastDocFrequency 기록. IOUtil.writeVInt(postingVector, lastDocFrequency); lastDocFrequency = 0; } public void finish(){ //flush 안된 남은 posting정보 기록. if(lastDocFrequency > 0){ writeLastDocInfo(); } postingVector.flip(); IOUtil.writeInt(postingVector, postingSize); IOUtil.writeInt(postingVector, lastDocNo); postingVector.pos(0); } public int size(){ return postingVector.limit(); } public int count(){ return postingSize; } public int lastDocNo(){ return lastDocNo; } public int firstDocNo(){ return IOUtil.readVInt(postingVector.bytes, 8); } public BytesBuffer buffer(){ return postingVector; } }