/*
* Copyright (C) 2014 Indeed Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.indeed.flamdex.lucene;
import com.indeed.flamdex.api.IntTermIterator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.PriorityQueue;
class LuceneIntTermIterator implements IntTermIterator, LuceneTermIterator {
// only handles positive numbers
private static final List<String> intPrefixes;
static {
final List<String> temp = new ArrayList<String>();
temp.add("0");
// TODO: support negative values
for (int i = 1; i <= 9; i++) {
for (long m = 1; m*i > 0; m *= 10) {
temp.add(""+(m*i));
}
}
// should already be sorted but re-sorting in case I screwed up -ahudson
Collections.sort(temp);
intPrefixes = Collections.unmodifiableList(temp);
}
static class Prefix implements Comparable<Prefix> {
final IndexReader reader;
final String field;
final String firstTerm;
final int length;
final char prefix;
TermEnum termEnum;
boolean endOfStream = false;
long val;
public Prefix(final IndexReader reader, final String field, final String firstTerm) {
this.reader = reader;
this.field = field;
this.prefix = firstTerm.charAt(0);
this.length = firstTerm.length();
this.firstTerm = firstTerm;
}
private void reset() {
closeTermEnum();
endOfStream = false;
}
private void closeTermEnum() {
if (termEnum != null) {
try {
termEnum.close();
} catch (IOException e) {
throw LuceneUtils.ioRuntimeException(e);
}
}
termEnum = null;
}
private void initialize() {
try {
termEnum = reader.terms(new Term(field, firstTerm));
} catch (IOException e) {
throw LuceneUtils.ioRuntimeException(e);
}
if (termEnum.term() == null || !field.equals(termEnum.term().field()) || !firstTerm.equals(termEnum.term().text())) {
throw new RuntimeException("Serious bug detected, term was "+termEnum.term()+", expected "+(new Term(field, firstTerm)));
}
val = Long.parseLong(firstTerm);
}
public boolean next() {
if (endOfStream) return false;
if (termEnum == null) {
initialize();
return true; // guaranteed to always work
}
String nextTargetString = Long.toString(val+1);
while (true) {
if (nextTargetString.length() != length || nextTargetString.charAt(0) != prefix) {
closeTermEnum();
endOfStream = true;
return false;
}
final boolean skipSuccess;
try {
skipSuccess = termEnum.skipTo(new Term(field, nextTargetString));
} catch (IOException e) {
throw LuceneUtils.ioRuntimeException(e);
}
if (!skipSuccess ||
termEnum.term() == null ||
!field.equals(termEnum.term().field()) ||
termEnum.term().text().charAt(0) != prefix) {
closeTermEnum();
endOfStream = true;
return false;
}
final String currentTerm = termEnum.term().text();
if (currentTerm.length() == length) {
val = Long.parseLong(termEnum.term().text());
// todo deal with potential parse int errors?
return true;
}
// length is either longer or shorter, either way find the next targetString w/ prefix and length
if (currentTerm.length() > length) {
nextTargetString = Long.toString(Long.parseLong(currentTerm.substring(0, length))+1);
} else {
final StringBuilder sb = new StringBuilder(length);
sb.append(currentTerm);
while (sb.length() != length) sb.append('0');
nextTargetString = sb.toString();
}
}
}
@Override
public int compareTo(final Prefix o) {
if (val < o.val) return -1;
if (val > o.val) return 1;
throw new RuntimeException("Impossible condition occurred");
}
}
private final IndexReader reader;
private final String field;
private PriorityQueue<Prefix> prefixQueue;
private List<Prefix> prefixes;
private long firstTerm = 0;
LuceneIntTermIterator(final IndexReader reader, final String field) {
this.reader = reader;
this.field = field;
}
private List<Prefix> determineAppropriatePrefixes() {
final String[][] firstTerm = new String[19][10];
try {
final TermEnum termEnum = reader.terms(new Term(field, "0"));
while (true) {
final Term term = termEnum.term();
if (term == null || !field.equals(term.field()) || term.text().charAt(0) > '9') break;
final String termText = term.text();
final int x = termText.length()-1;
if (x >= 0 && x < firstTerm.length) {
final int y = termText.charAt(0)-'0';
if (y >= 0 && y < firstTerm[x].length) {
if (firstTerm[x][y] == null) {
firstTerm[x][y] = termText;
}
}
}
if (!termEnum.next()) break;
}
termEnum.close();
} catch (IOException e) {
throw LuceneUtils.ioRuntimeException(e);
}
final List<Prefix> ret = new ArrayList<Prefix>();
for (final String intPrefix : intPrefixes) {
final int x = intPrefix.length()-1;
final int y = intPrefix.charAt(0)-'0';
if (firstTerm[x][y] != null) {
ret.add(new Prefix(reader, field, firstTerm[x][y]));
firstTerm[x][y] = null;
}
}
return ret;
}
private void initialize(final long term) {
if (prefixes == null) {
prefixes = determineAppropriatePrefixes();
} else {
for (final Prefix prefix : prefixes) {
prefix.reset();
}
}
if (prefixes.isEmpty()) {
prefixQueue = new PriorityQueue<Prefix>(1);
return;
}
prefixQueue = new PriorityQueue<Prefix>(prefixes.size());
for (final Prefix prefix : prefixes) {
if (prefix.next()) {
prefixQueue.add(prefix);
}
}
while (!prefixQueue.isEmpty()) {
if (prefixQueue.element().val >= term) break;
final Prefix prefix = prefixQueue.remove();
if (prefix.next()) {
prefixQueue.add(prefix);
}
}
}
@Override
public boolean next() {
if (prefixQueue == null) {
initialize(firstTerm);
} else if (!prefixQueue.isEmpty()) {
final Prefix prefix = prefixQueue.remove();
if (prefix.next()) prefixQueue.add(prefix);
}
return !prefixQueue.isEmpty();
}
@Override
public int docFreq() {
sanityCheck();
return prefixQueue.element().termEnum.docFreq();
}
@Override
public void close() {
// TODO ?
}
@Override
public void reset(long term) {
firstTerm = term;
prefixQueue = null;
}
@Override
public long term() {
sanityCheck();
return prefixQueue.element().val;
}
@Override
public TermEnum termEnum() {
sanityCheck();
return prefixQueue.element().termEnum;
}
private void sanityCheck() {
if (prefixQueue == null || prefixQueue.isEmpty()) {
throw new IllegalArgumentException("Invalid operation given iterators current state");
}
}
}