package com.brightgenerous.lucene;
import java.io.Serializable;
import java.lang.ref.SoftReference;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.WeakHashMap;
import java.util.concurrent.ConcurrentHashMap;
import com.brightgenerous.commons.EqualsUtils;
import com.brightgenerous.commons.HashCodeUtils;
import com.brightgenerous.commons.ToStringUtils;
import com.brightgenerous.lang.Args;
import com.brightgenerous.lucene.delegate.LuceneUtility;
import com.brightgenerous.lucene.delegate.StringDistanceDelegater;
@SuppressWarnings("deprecation")
public class LuceneUtils implements Serializable {
private static final long serialVersionUID = -5768707421292489384L;
public static boolean resolved() {
return LuceneUtility.RESOLVED;
}
static class InstanceKey implements Serializable {
private static final long serialVersionUID = -5571606798438371038L;
private final boolean levenstein;
private final boolean jaroWinkler;
public InstanceKey(boolean levenstein, boolean jaroWinkler) {
this.levenstein = levenstein;
this.jaroWinkler = jaroWinkler;
}
@Override
public int hashCode() {
final int multiplier = 37;
int result = 17;
result = (multiplier * result) + (levenstein ? 1 : 0);
result = (multiplier * result) + (jaroWinkler ? 1 : 0);
return result;
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (!(obj instanceof InstanceKey)) {
return false;
}
InstanceKey other = (InstanceKey) obj;
if (levenstein != other.levenstein) {
return false;
}
if (jaroWinkler != other.jaroWinkler) {
return false;
}
return true;
}
}
private final boolean levenstein;
private final boolean jaroWinkler;
private LuceneUtils(boolean levenstein, boolean jaroWinkler) {
Args.isTrue(levenstein || jaroWinkler, "(levenstein || jaroWinkler) must be true.");
this.levenstein = levenstein;
this.jaroWinkler = jaroWinkler;
}
public static LuceneUtils get() {
return getInstance(true, true);
}
public static LuceneUtils getLevenstein() {
return getInstance(true, false);
}
public static LuceneUtils getJaroWinkler() {
return getInstance(false, true);
}
private static volatile Map<InstanceKey, SoftReference<LuceneUtils>> cache;
protected static LuceneUtils getInstance(boolean levenstein, boolean jaroWinkler) {
if (cache == null) {
synchronized (LuceneUtils.class) {
if (cache == null) {
cache = new ConcurrentHashMap<>();
}
}
}
InstanceKey ik = new InstanceKey(levenstein, jaroWinkler);
SoftReference<LuceneUtils> sr = cache.get(ik);
LuceneUtils ret;
if (sr != null) {
ret = sr.get();
if (ret != null) {
return ret;
}
Set<InstanceKey> dels = new HashSet<>();
for (Entry<InstanceKey, SoftReference<LuceneUtils>> entry : cache.entrySet()) {
if (entry.getValue().get() == null) {
dels.add(entry.getKey());
}
}
for (InstanceKey del : dels) {
cache.remove(del);
}
}
ret = new LuceneUtils(levenstein, jaroWinkler);
cache.put(ik, new SoftReference<>(ret));
return ret;
}
public String near(String value, Collection<String> objs) {
return near(value, objs, levenstein, jaroWinkler);
}
public String near(String value, String[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return near(value, Arrays.asList(objs), levenstein, jaroWinkler);
}
public <T> T near(String value, Extracter<T> extracter, Collection<T> objs) {
return near(value, extracter, objs, levenstein, jaroWinkler);
}
public <T> T near(String value, Extracter<T> extracter, T[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return near(value, extracter, Arrays.asList(objs), levenstein, jaroWinkler);
}
public String far(String value, Collection<String> objs) {
return far(value, objs, levenstein, jaroWinkler);
}
public String far(String value, String[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return far(value, Arrays.asList(objs), levenstein, jaroWinkler);
}
public <T> T far(String value, Extracter<T> extracter, Collection<T> objs) {
return far(value, extracter, objs, levenstein, jaroWinkler);
}
public <T> T far(String value, Extracter<T> extracter, T[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return far(value, extracter, Arrays.asList(objs), levenstein, jaroWinkler);
}
public static String nearLevenstein(String value, Collection<String> objs) {
return near(value, objs, true, false);
}
public static String nearLevenstein(String value, String[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return nearLevenstein(value, Arrays.asList(objs));
}
public static String nearJaroWinkler(String value, Collection<String> objs) {
return near(value, objs, false, true);
}
public static String nearJaroWinkler(String value, String[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return nearJaroWinkler(value, Arrays.asList(objs));
}
public static String nearBoth(String value, Collection<String> objs) {
return near(value, objs, true, true);
}
public static String nearBoth(String value, String[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return nearBoth(value, Arrays.asList(objs));
}
private static String near(String value, Collection<String> objs, boolean levenstein,
boolean jaroWinkler) {
if (value == null) {
return null;
}
if ((objs == null) || objs.isEmpty()) {
return null;
}
double dist = Double.MIN_VALUE;
String ret = null;
for (String obj : objs) {
double d = getDistance(obj, value, levenstein, jaroWinkler);
if (1.0d <= d) {
return obj;
}
if (dist < d) {
dist = d;
ret = obj;
}
}
return ret;
}
public static <T> T nearLevenstein(String value, Extracter<T> extracter, Collection<T> objs) {
return near(value, extracter, objs, true, false);
}
public static <T> T nearLevenstein(String value, Extracter<T> extracter, T[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return nearLevenstein(value, extracter, Arrays.asList(objs));
}
public static <T> T nearJaroWinkler(String value, Extracter<T> extracter, Collection<T> objs) {
return near(value, extracter, objs, false, true);
}
public static <T> T nearJaroWinkler(String value, Extracter<T> extracter, T[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return nearJaroWinkler(value, extracter, Arrays.asList(objs));
}
public static <T> T nearBoth(String value, Extracter<T> extracter, Collection<T> objs) {
return near(value, extracter, objs, true, true);
}
public static <T> T nearBoth(String value, Extracter<T> extracter, T[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return nearBoth(value, extracter, Arrays.asList(objs));
}
private static <T> T near(String value, Extracter<T> extracter, Collection<T> objs,
boolean levenstein, boolean jaroWinkler) {
if (value == null) {
return null;
}
if ((objs == null) || objs.isEmpty()) {
return null;
}
if (extracter == null) {
extracter = getDefaultExtracter();
}
double dist = Double.MIN_VALUE;
T ret = null;
for (T obj : objs) {
double d = getDistance(extracter.extract(obj), value, levenstein, jaroWinkler);
if (1.0d <= d) {
return obj;
}
if (dist < d) {
dist = d;
ret = obj;
}
}
return ret;
}
public static String farLevenstein(String value, Collection<String> objs) {
return far(value, objs, true, false);
}
public static String farLevenstein(String value, String[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return farLevenstein(value, Arrays.asList(objs));
}
public static String farJaroWinkler(String value, Collection<String> objs) {
return far(value, objs, false, true);
}
public static String farJaroWinkler(String value, String[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return farJaroWinkler(value, Arrays.asList(objs));
}
public static String farBoth(String value, Collection<String> objs) {
return far(value, objs, true, true);
}
public static String farBoth(String value, String[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return farBoth(value, Arrays.asList(objs));
}
private static String far(String value, Collection<String> objs, boolean levenstein,
boolean jaroWinkler) {
if (value == null) {
return null;
}
if ((objs == null) || objs.isEmpty()) {
return null;
}
double dist = Double.MAX_VALUE;
String ret = null;
for (String obj : objs) {
double d = getDistance(obj, value, levenstein, jaroWinkler);
if (d <= 0.0d) {
return obj;
}
if (d < dist) {
dist = d;
ret = obj;
}
}
return ret;
}
public static <T> T farLevenstein(String value, Extracter<T> extracter, Collection<T> objs) {
return far(value, extracter, objs, true, false);
}
public static <T> T farLevenstein(String value, Extracter<T> extracter, T[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return farLevenstein(value, extracter, Arrays.asList(objs));
}
public static <T> T farJaroWinkler(String value, Extracter<T> extracter, Collection<T> objs) {
return far(value, extracter, objs, false, true);
}
public static <T> T farJaroWinkler(String value, Extracter<T> extracter, T[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return farJaroWinkler(value, extracter, Arrays.asList(objs));
}
public static <T> T farBoth(String value, Extracter<T> extracter, Collection<T> objs) {
return far(value, extracter, objs, true, true);
}
public static <T> T farBoth(String value, Extracter<T> extracter, T[] objs) {
if (value == null) {
return null;
}
if ((objs == null) || (objs.length < 1)) {
return null;
}
return farBoth(value, extracter, Arrays.asList(objs));
}
private static <T> T far(String value, Extracter<T> extracter, Collection<T> objs,
boolean levenstein, boolean jaroWinkler) {
if (value == null) {
return null;
}
if ((objs == null) || objs.isEmpty()) {
return null;
}
if (extracter == null) {
extracter = getDefaultExtracter();
}
double dist = Double.MAX_VALUE;
T ret = null;
for (T obj : objs) {
double d = getDistance(extracter.extract(obj), value, levenstein, jaroWinkler);
if (d <= 0.0d) {
return obj;
}
if (d < dist) {
dist = d;
ret = obj;
}
}
return ret;
}
public Comparator<String> comparator(String value) {
return comparator(value, levenstein, jaroWinkler);
}
public static Comparator<String> comparatorLevenstein(String value) {
return comparator(value, true, false);
}
public static Comparator<String> comparatorJaroWinkler(String value) {
return comparator(value, false, true);
}
public static Comparator<String> comparatorBoth(String value) {
return comparator(value, true, true);
}
private static Comparator<String> comparator(String value, boolean levenstein,
boolean jaroWinkler) {
Args.notNull(value, "value");
Args.isTrue(levenstein || jaroWinkler, "(levenstein || jaroWinkler) must be true.");
return new StringComparator(value, levenstein, jaroWinkler);
}
public <T> Comparator<T> comparator(String value, Extracter<T> extracter) {
return comparator(value, extracter, levenstein, jaroWinkler);
}
public static <T> Comparator<T> comparatorLevenstein(String value, Extracter<T> extracter) {
return comparator(value, extracter, true, false);
}
public static <T> Comparator<T> comparatorJaroWinkler(String value, Extracter<T> extracter) {
return comparator(value, extracter, false, true);
}
public static <T> Comparator<T> comparatorBoth(String value, Extracter<T> extracter) {
return comparator(value, extracter, true, true);
}
private static <T> Comparator<T> comparator(String value, Extracter<T> extracter,
boolean levenstein, boolean jaroWinkler) {
Args.notNull(value, "value");
Args.isTrue(levenstein || jaroWinkler, "(levenstein || jaroWinkler) must be true.");
if (extracter == null) {
extracter = getDefaultExtracter();
}
return new ExtractComparator<>(value, extracter, levenstein, jaroWinkler);
}
private static <T> Extracter<T> getDefaultExtracter() {
return new Extracter<T>() {
@Override
public String extract(T obj) {
if (obj == null) {
return null;
}
if (obj instanceof String) {
return (String) obj;
}
return String.valueOf(obj);
}
};
}
private static volatile StringDistanceDelegater ld;
private static volatile StringDistanceDelegater jd;
private static double getDistance(String obj, String value, boolean levenstein,
boolean jaroWinkler) {
if ((levenstein && (ld == null)) || (jaroWinkler && (jd == null))) {
synchronized (LuceneUtils.class) {
if (levenstein && (ld == null)) {
ld = LuceneUtility.createLevensteinDistance();
}
if (jaroWinkler && (jd == null)) {
jd = LuceneUtility.createJaroWinklerDistance();
}
}
}
if (levenstein && jaroWinkler) {
return Math.pow(ld.getDistance(obj, value), 2)
+ Math.pow(jd.getDistance(obj, value), 2);
}
if (levenstein) {
return ld.getDistance(obj, value);
}
if (jaroWinkler) {
return jd.getDistance(obj, value);
}
throw new IllegalStateException();
}
static class StringComparator implements Comparator<String> {
private final String value;
private final boolean levenstein;
private final boolean jaroWinkler;
private final Map<String, Double> caches = Collections
.synchronizedMap(new WeakHashMap<String, Double>());
public StringComparator(String value, boolean levenstein, boolean jaroWinkler) {
this.value = value;
this.levenstein = levenstein;
this.jaroWinkler = jaroWinkler;
}
@Override
public int compare(String o1, String o2) {
double d1 = distance(o1);
double d2 = distance(o2);
// between 0 ... 1.0, correct closer to 1.0
if (d1 == d2) {
if (o1 == o2) {
return 0;
}
if (o1 == null) {
return -1;
}
if (o2 == null) {
return 1;
}
return o1.compareTo(o2);
}
if (d1 < d2) {
return 1;
}
return -1;
}
private double distance(String obj) {
Double ret = caches.get(obj);
if (ret == null) {
synchronized (caches) {
ret = caches.get(obj);
if (ret == null) {
ret = Double.valueOf(getDistance(obj, value, levenstein, jaroWinkler));
caches.put(obj, ret);
}
}
}
return ret.doubleValue();
}
}
static class ExtractComparator<T> implements Comparator<T> {
private final String value;
private final Extracter<T> extracter;
private final boolean levenstein;
private final boolean jaroWinkler;
private final Map<T, Holder> caches = Collections
.synchronizedMap(new WeakHashMap<T, Holder>());
public ExtractComparator(String value, Extracter<T> extracter, boolean levenstein,
boolean jaroWinkler) {
this.value = value;
this.extracter = extracter;
this.levenstein = levenstein;
this.jaroWinkler = jaroWinkler;
}
@Override
public int compare(T o1, T o2) {
Holder h1 = distance(o1);
Holder h2 = distance(o2);
// between 0 ... 1.0, correct closer to 1.0
if (h1.distance == h2.distance) {
if (h1.value == h2.value) {
return 0;
}
if (h1.value == null) {
return -1;
}
if (h2.value == null) {
return 1;
}
return h1.value.compareTo(h2.value);
}
if (h1.distance < h2.distance) {
return 1;
}
return -1;
}
private Holder distance(T obj) {
Holder ret = caches.get(obj);
if (ret == null) {
synchronized (caches) {
ret = caches.get(obj);
if (ret == null) {
ret = new Holder();
ret.value = extracter.extract(obj);
ret.distance = getDistance(ret.value, value, levenstein, jaroWinkler);
caches.put(obj, ret);
}
}
}
return ret;
}
static class Holder {
double distance;
String value;
}
}
@Override
public int hashCode() {
if (HashCodeUtils.resolved()) {
return HashCodeUtils.hashCodeAlt(null, this);
}
return super.hashCode();
}
@Override
public boolean equals(Object obj) {
if (EqualsUtils.resolved()) {
return EqualsUtils.equalsAlt(null, this, obj);
}
return super.equals(obj);
}
@Override
public String toString() {
if (ToStringUtils.resolved()) {
return ToStringUtils.toStringAlt(this);
}
return super.toString();
}
}