package org.apache.lucene.index; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.Iterator; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.RAMFile; import org.apache.lucene.store.RAMInputStream; import org.apache.lucene.store.RAMOutputStream; import org.apache.lucene.util.BytesRef; /** * Prefix codes term instances (prefixes are shared) * @lucene.experimental */ class PrefixCodedTerms implements Iterable<Term> { final RAMFile buffer; private PrefixCodedTerms(RAMFile buffer) { this.buffer = buffer; } /** @return size in bytes */ public long getSizeInBytes() { return buffer.getSizeInBytes(); } /** @return iterator over the bytes */ @Override public Iterator<Term> iterator() { return new PrefixCodedTermsIterator(); } class PrefixCodedTermsIterator implements Iterator<Term> { final IndexInput input; String field = ""; BytesRef bytes = new BytesRef(); Term term = new Term(field, bytes); PrefixCodedTermsIterator() { try { input = new RAMInputStream("PrefixCodedTermsIterator", buffer); } catch (IOException e) { throw new RuntimeException(e); } } @Override public boolean hasNext() { return input.getFilePointer() < input.length(); } @Override public Term next() { assert hasNext(); try { int code = input.readVInt(); if ((code & 1) != 0) { // new field field = input.readString(); } int prefix = code >>> 1; int suffix = input.readVInt(); bytes.grow(prefix + suffix); input.readBytes(bytes.bytes, prefix, suffix); bytes.length = prefix + suffix; term.set(field, bytes); return term; } catch (IOException e) { throw new RuntimeException(e); } } @Override public void remove() { throw new UnsupportedOperationException(); } } /** Builds a PrefixCodedTerms: call add repeatedly, then finish. */ public static class Builder { private RAMFile buffer = new RAMFile(); private RAMOutputStream output = new RAMOutputStream(buffer); private Term lastTerm = new Term(""); /** add a term */ public void add(Term term) { assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0; try { int prefix = sharedPrefix(lastTerm.bytes, term.bytes); int suffix = term.bytes.length - prefix; if (term.field.equals(lastTerm.field)) { output.writeVInt(prefix << 1); } else { output.writeVInt(prefix << 1 | 1); output.writeString(term.field); } output.writeVInt(suffix); output.writeBytes(term.bytes.bytes, term.bytes.offset + prefix, suffix); lastTerm.bytes.copyBytes(term.bytes); lastTerm.field = term.field; } catch (IOException e) { throw new RuntimeException(e); } } /** return finalized form */ public PrefixCodedTerms finish() { try { output.close(); return new PrefixCodedTerms(buffer); } catch (IOException e) { throw new RuntimeException(e); } } private int sharedPrefix(BytesRef term1, BytesRef term2) { int pos1 = 0; int pos1End = pos1 + Math.min(term1.length, term2.length); int pos2 = 0; while(pos1 < pos1End) { if (term1.bytes[term1.offset + pos1] != term2.bytes[term2.offset + pos2]) { return pos1; } pos1++; pos2++; } return pos1; } } }