/*
* Copyright (C) 2014 Indeed Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.indeed.flamdex.lucene;
import com.indeed.flamdex.AbstractFlamdexReader;
import com.indeed.flamdex.api.DocIdStream;
import com.indeed.flamdex.api.IntTermIterator;
import com.indeed.flamdex.api.StringTermIterator;
import com.indeed.flamdex.fieldcache.UnsortedIntTermDocIterator;
import com.indeed.flamdex.utils.FlamdexUtils;
import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
public class LuceneFlamdexReader extends AbstractFlamdexReader {
private static final Logger log = Logger.getLogger(LuceneFlamdexReader.class);
protected final IndexReader reader;
protected final Collection<String> intFields;
protected final Collection<String> stringFields;
public LuceneFlamdexReader(IndexReader reader) {
this(reader, Collections.<String>emptyList(), getStringFieldsFromIndex(reader));
}
public LuceneFlamdexReader(IndexReader reader, String directory) {
this(reader, directory, Collections.<String>emptyList(), getStringFieldsFromIndex(reader));
}
public LuceneFlamdexReader(IndexReader reader, Collection<String> intFields, Collection<String> stringFields) {
this(reader, System.getProperty("java.io.tmpdir"), intFields, stringFields);
}
public LuceneFlamdexReader(IndexReader reader, String directory, Collection<String> intFields, Collection<String> stringFields) {
super(directory, reader.maxDoc());
this.reader = reader;
this.intFields = intFields;
this.stringFields = stringFields;
}
@Override
public Collection<String> getIntFields() {
return intFields;
}
@Override
public Collection<String> getStringFields() {
return stringFields;
}
private static Collection<String> getStringFieldsFromIndex(final IndexReader reader) {
final Collection<String> ret = new HashSet<String>();
// don't like having to use Object and downcast, but in Lucene versions prior to 3 getFieldNames() returns an un-genericized Collection instead of a Collection<String>
for (final Object o : reader.getFieldNames(IndexReader.FieldOption.INDEXED)) {
ret.add((String)o);
}
return ret;
}
@Override
public int getNumDocs() {
return reader.maxDoc();
}
@Override
public DocIdStream getDocIdStream() {
try {
return new LuceneDocIdStream(reader.termDocs());
} catch (IOException e) {
throw LuceneUtils.ioRuntimeException(e);
}
}
@Override
public IntTermIterator getIntTermIterator(final String field) {
return new LuceneIntTermIterator(reader, field);
}
@Override
public StringTermIterator getStringTermIterator(final String field) {
return new LuceneStringTermIterator(reader, field);
}
@Override
public long getIntTotalDocFreq(String field) {
return FlamdexUtils.getIntTotalDocFreq(this, field);
}
@Override
public long getStringTotalDocFreq(String field) {
return FlamdexUtils.getStringTotalDocFreq(this, field);
}
@Override
public Collection<String> getAvailableMetrics() {
return intFields;
}
@Override
protected UnsortedIntTermDocIterator createUnsortedIntTermDocIterator(String field) {
try {
return LuceneUnsortedIntTermDocIterator.create(reader, field);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public void close() throws IOException {
reader.close();
}
}