/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.util.fst; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.IntsRef; // javadocs import org.apache.lucene.util.RamUsageEstimator; /** * Wraps another Outputs implementation and encodes one or * more of its output values. You can use this when a single * input may need to map to more than one output, * maintaining order: pass the same input with a different * output by calling {@link Builder#add(IntsRef,Object)} multiple * times. The builder will then combine the outputs using * the {@link Outputs#merge(Object,Object)} method. * * <p>The resulting FST may not be minimal when an input has * more than one output, as this requires pushing all * multi-output values to a final state. * * <p>NOTE: the only way to create multiple outputs is to * add the same input to the FST multiple times in a row. This is * how the FST maps a single input to multiple outputs (e.g. you * cannot pass a List<Object> to {@link Builder#add}). If * your outputs are longs, and you need at most 2, then use * {@link UpToTwoPositiveIntOutputs} instead since it stores * the outputs more compactly (by stealing a bit from each * long value). * * <p>NOTE: this cannot wrap itself (ie you cannot make an * FST with List<List<Object>> outputs using this). * * @lucene.experimental */ // NOTE: i think we could get a more compact FST if, instead // of adding the same input multiple times with a different // output each time, we added it only once with a // pre-constructed List<T> output. This way the "multiple // values" is fully opaque to the Builder/FST. It would // require implementing the full algebra using set // arithmetic (I think?); maybe SetOfOutputs is a good name. @SuppressWarnings("unchecked") public final class ListOfOutputs<T> extends Outputs<Object> { private final Outputs<T> outputs; public ListOfOutputs(Outputs<T> outputs) { this.outputs = outputs; } @Override public Object common(Object output1, Object output2) { // These will never be a list: return outputs.common((T) output1, (T) output2); } @Override public Object subtract(Object object, Object inc) { // These will never be a list: return outputs.subtract((T) object, (T) inc); } @Override public Object add(Object prefix, Object output) { assert !(prefix instanceof List); if (!(output instanceof List)) { return outputs.add((T) prefix, (T) output); } else { List<T> outputList = (List<T>) output; List<T> addedList = new ArrayList<>(outputList.size()); for(T _output : outputList) { addedList.add(outputs.add((T) prefix, _output)); } return addedList; } } @Override public void write(Object output, DataOutput out) throws IOException { assert !(output instanceof List); outputs.write((T) output, out); } @Override public void writeFinalOutput(Object output, DataOutput out) throws IOException { if (!(output instanceof List)) { out.writeVInt(1); outputs.write((T) output, out); } else { List<T> outputList = (List<T>) output; out.writeVInt(outputList.size()); for(T eachOutput : outputList) { outputs.write(eachOutput, out); } } } @Override public Object read(DataInput in) throws IOException { return outputs.read(in); } @Override public void skipOutput(DataInput in) throws IOException { outputs.skipOutput(in); } @Override public Object readFinalOutput(DataInput in) throws IOException { int count = in.readVInt(); if (count == 1) { return outputs.read(in); } else { List<T> outputList = new ArrayList<>(count); for(int i=0;i<count;i++) { outputList.add(outputs.read(in)); } return outputList; } } @Override public void skipFinalOutput(DataInput in) throws IOException { int count = in.readVInt(); for(int i=0;i<count;i++) { outputs.skipOutput(in); } } @Override public Object getNoOutput() { return outputs.getNoOutput(); } @Override public String outputToString(Object output) { if (!(output instanceof List)) { return outputs.outputToString((T) output); } else { List<T> outputList = (List<T>) output; StringBuilder b = new StringBuilder(); b.append('['); for(int i=0;i<outputList.size();i++) { if (i > 0) { b.append(", "); } b.append(outputs.outputToString(outputList.get(i))); } b.append(']'); return b.toString(); } } @Override public Object merge(Object first, Object second) { List<T> outputList = new ArrayList<>(); if (!(first instanceof List)) { outputList.add((T) first); } else { outputList.addAll((List<T>) first); } if (!(second instanceof List)) { outputList.add((T) second); } else { outputList.addAll((List<T>) second); } //System.out.println("MERGE: now " + outputList.size() + " first=" + outputToString(first) + " second=" + outputToString(second)); //System.out.println(" return " + outputToString(outputList)); return outputList; } @Override public String toString() { return "OneOrMoreOutputs(" + outputs + ")"; } public List<T> asList(Object output) { if (!(output instanceof List)) { List<T> result = new ArrayList<>(1); result.add((T) output); return result; } else { return (List<T>) output; } } private static final long BASE_LIST_NUM_BYTES = RamUsageEstimator.shallowSizeOf(new ArrayList<Object>()); @Override public long ramBytesUsed(Object output) { long bytes = 0; if (output instanceof List) { bytes += BASE_LIST_NUM_BYTES; List<T> outputList = (List<T>) output; for(T _output : outputList) { bytes += outputs.ramBytesUsed(_output); } // 2 * to allow for ArrayList's oversizing: bytes += 2 * outputList.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF; } else { bytes += outputs.ramBytesUsed((T) output); } return bytes; } }