package org.apache.lucene.search.concordance.classic.impl;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.search.concordance.classic.AbstractConcordanceWindowCollector;
import org.apache.lucene.search.concordance.classic.ConcordanceWindow;
/**
* Like ConcordanceWindowCollector, but this collector
* doesn't store duplicate windows. Windows are defined as duplicates by
* {@link #buildEqualityKey(ConcordanceWindow, StringBuilder)}.
*/
public class DedupingConcordanceWindowCollector extends AbstractConcordanceWindowCollector {
private final Map<String, ConcordanceWindow> map = new HashMap<String, ConcordanceWindow>();
private final StringBuilder sb = new StringBuilder();
/**
* @param maxHits maximum number of windows to store. This could potentially
* visit lots more windows than maxHits.
*/
public DedupingConcordanceWindowCollector(int maxHits) {
super(maxHits);
}
@Override
public void collect(ConcordanceWindow w) {
if (getHitMax()) {
return;
}
buildEqualityKey(w, sb);
String key = sb.toString();
ConcordanceWindow oldWindow = map.get(key);
if (oldWindow == null) {
//we would have added a new window here
if (getMaxWindows() != AbstractConcordanceWindowCollector.COLLECT_ALL &&
map.size() >= getMaxWindows()) {
setHitMax();
return;
}
oldWindow = w;
} else {
//if the old window existed (i.e. new window is a duplicate)
//keep incrementing the count
oldWindow.incrementCount();
}
map.put(key, oldWindow);
addDocId(w.getUniqueDocID());
}
/**
* number of windows collected
*/
@Override
public int size() {
return map.size();
}
@Override
public List<ConcordanceWindow> getWindows() {
List<ConcordanceWindow> windows = new ArrayList<>();
windows.addAll(map.values());
return windows;
}
/**
* Public for easy overriding. Generate a key to be used to determine
* whether two windows are the same. Some implementations
* might want to lowercase, some might want genuine case folding,
* some might want to strip non-alphanumerics, etc.
* <p>
* If you are overriding this, make sure to call sb.setLength(0)!
*
* @param w ConcordanceWindow
* @param sb reuseable StringBuilder; sb.setLength(0) is called before use!
*/
private void buildEqualityKey(ConcordanceWindow w, StringBuilder sb) {
sb.setLength(0);
sb.append(w.getPre().toLowerCase());
sb.append(">>>");
sb.append(w.getTarget().toLowerCase());
sb.append("<<<");
sb.append(w.getPost().toLowerCase());
}
}