/*
* Copyright (C) 2014 Indeed Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.indeed.flamdex.writer;
import com.indeed.flamdex.api.DocIdStream;
import com.indeed.flamdex.reader.FlamdexMetadata;
import com.indeed.flamdex.api.FlamdexReader;
import com.indeed.flamdex.api.IntTermIterator;
import com.indeed.flamdex.api.StringTermIterator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* @author jplaisance
*/
public final class GenericFlamdexWriter implements FlamdexWriter {
private static final int DOC_ID_BUFFER_SIZE = 32;
private final String outputDirectory;
private final IntFieldWriterFactory intFieldWriterFactory;
private final StringFieldWriterFactory stringFieldWriterFactory;
private long maxDocs;
private final int formatVersion;
private final Set<String> intFields;
private final Set<String> stringFields;
public GenericFlamdexWriter(String outputDirectory, IntFieldWriterFactory intFieldWriterFactory, StringFieldWriterFactory stringFieldWriterFactory, long numDocs, int formatVersion) throws IOException {
this(outputDirectory, intFieldWriterFactory, stringFieldWriterFactory, numDocs, formatVersion, true);
}
public GenericFlamdexWriter(String outputDirectory, IntFieldWriterFactory intFieldWriterFactory, StringFieldWriterFactory stringFieldWriterFactory, long numDocs, int formatVersion, boolean create) throws IOException {
this.outputDirectory = outputDirectory;
this.intFieldWriterFactory = intFieldWriterFactory;
this.stringFieldWriterFactory = stringFieldWriterFactory;
this.maxDocs = numDocs;
this.formatVersion = formatVersion;
if (create) {
intFields = new HashSet<String>();
stringFields = new HashSet<String>();
} else {
final FlamdexMetadata metadata = FlamdexMetadata.readMetadata(outputDirectory);
if (metadata.numDocs != numDocs) {
throw new IllegalArgumentException("numDocs does not match numDocs in existing index");
}
intFields = new HashSet<String>(metadata.intFields);
stringFields = new HashSet<String>(metadata.stringFields);
}
}
@Override
public IntFieldWriter getIntFieldWriter(String field) throws IOException {
return getIntFieldWriter(field, false);
}
public IntFieldWriter getIntFieldWriter(String field, boolean blowAway) throws IOException {
if (!blowAway && intFields.contains(field)) {
throw new IllegalArgumentException("already added int field "+field);
}
intFields.add(field);
return intFieldWriterFactory.create(outputDirectory, field, maxDocs);
}
@Override
public StringFieldWriter getStringFieldWriter(String field) throws IOException {
return getStringFieldWriter(field, false);
}
public StringFieldWriter getStringFieldWriter(String field, boolean blowAway) throws IOException {
if (!blowAway && stringFields.contains(field)) {
throw new IllegalArgumentException("already added string field "+field);
}
stringFields.add(field);
return stringFieldWriterFactory.create(outputDirectory, field, maxDocs);
}
@Override
public String getOutputDirectory() {
return this.outputDirectory;
}
@Override
public void resetMaxDocs(long numDocs) {
this.maxDocs = numDocs;
}
@Override
public void close() throws IOException {
final List<String> intFieldsList = new ArrayList<String>(intFields);
Collections.sort(intFieldsList);
final List<String> stringFieldsList = new ArrayList<String>(stringFields);
Collections.sort(stringFieldsList);
FlamdexMetadata metadata = new FlamdexMetadata((int)maxDocs, intFieldsList, stringFieldsList, formatVersion);
FlamdexMetadata.writeMetadata(outputDirectory, metadata);
}
public static void writeFlamdex(final String indexDir, final FlamdexReader fdx, final IntFieldWriterFactory intFieldWriterFactory, final StringFieldWriterFactory stringFieldWriterFactory, int formatVersion,
final List<String> intFields, final List<String> stringFields) throws IOException {
final DocIdStream dis = fdx.getDocIdStream();
final int[] docIdBuf = new int[DOC_ID_BUFFER_SIZE];
final GenericFlamdexWriter w = new GenericFlamdexWriter(indexDir, intFieldWriterFactory, stringFieldWriterFactory, fdx.getNumDocs(), formatVersion);
for (final String intField : intFields) {
final IntFieldWriter ifw = w.getIntFieldWriter(intField);
final IntTermIterator iter = fdx.getIntTermIterator(intField);
while (iter.next()) {
ifw.nextTerm(iter.term());
dis.reset(iter);
while (true) {
final int n = dis.fillDocIdBuffer(docIdBuf);
for (int i = 0; i < n; ++i) {
ifw.nextDoc(docIdBuf[i]);
}
if (n < docIdBuf.length) break;
}
}
iter.close();
ifw.close();
}
for (final String stringField : stringFields) {
final StringFieldWriter sfw = w.getStringFieldWriter(stringField);
final StringTermIterator iter = fdx.getStringTermIterator(stringField);
while (iter.next()) {
sfw.nextTerm(iter.term());
dis.reset(iter);
while (true) {
final int n = dis.fillDocIdBuffer(docIdBuf);
for (int i = 0; i < n; ++i) {
sfw.nextDoc(docIdBuf[i]);
}
if (n < docIdBuf.length) break;
}
}
sfw.close();
}
dis.close();
w.close();
}
}