/*******************************************************************************
* Copyright (c) 1998, 2015 Oracle and/or its affiliates. All rights reserved.
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License v1.0 and Eclipse Distribution License v. 1.0
* which accompanies this distribution.
* The Eclipse Public License is available at http://www.eclipse.org/legal/epl-v10.html
* and the Eclipse Distribution License is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* Contributors:
* Oracle - initial API and implementation from Oracle TopLink
******************************************************************************/
package org.eclipse.persistence.tools.workbench.utility.string;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* This partial string comparator engine will strip the prefixes
* or suffixes off of the sets of strings before passing them to another
* engine for matching. The number of strings that must contain the
* same affix before the affix is stripped can be configured by clients
* on construction of the engine.
*/
public class AffixStrippingPartialStringComparatorEngine
implements PartialStringComparatorEngine
{
private final PartialStringComparatorEngine engine;
private final ThresholdCalculator thresholdCalculator;
private final StringHolderWrapperFactory wrapperFactory;
/**
* Construct a stripping engine that requires every string to have the same
* prefix before the prefix is stripped from all the strings.
*/
public static PartialStringComparatorEngine forPrefixStripping(PartialStringComparatorEngine engine) {
return new AffixStrippingPartialStringComparatorEngine(engine, StringHolderWrapperFactory.PREFIX);
}
/**
* Construct a stripping engine that requires the specified percentage
* of strings to have the same prefix before the prefix is stripped from
* the strings.
*/
public static PartialStringComparatorEngine forPrefixStripping(PartialStringComparatorEngine engine, float factor) {
return new AffixStrippingPartialStringComparatorEngine(engine, factor, StringHolderWrapperFactory.PREFIX);
}
/**
* Construct a stripping engine that requires the specified number
* of strings to have the same prefix before the prefix is stripped from
* the strings.
*/
public static PartialStringComparatorEngine forPrefixStripping(PartialStringComparatorEngine engine, int count) {
return new AffixStrippingPartialStringComparatorEngine(engine, count, StringHolderWrapperFactory.PREFIX);
}
/**
* Construct a stripping engine that requires every string to have the same
* prefix before the prefix is stripped from all the strings.
*/
public static PartialStringComparatorEngine forSuffixStripping(PartialStringComparatorEngine engine) {
return new AffixStrippingPartialStringComparatorEngine(engine, StringHolderWrapperFactory.SUFFIX);
}
/**
* Construct a stripping engine that requires the specified percentage
* of strings to have the same suffix before the suffix is stripped from
* the strings.
*/
public static PartialStringComparatorEngine forSuffixStripping(PartialStringComparatorEngine engine, float factor) {
return new AffixStrippingPartialStringComparatorEngine(engine, factor, StringHolderWrapperFactory.SUFFIX);
}
/**
* Construct a stripping engine that requires the specified number
* of strings to have the same suffix before the suffix is stripped from
* the strings.
*/
public static PartialStringComparatorEngine forSuffixStripping(PartialStringComparatorEngine engine, int count) {
return new AffixStrippingPartialStringComparatorEngine(engine, count, StringHolderWrapperFactory.SUFFIX);
}
private AffixStrippingPartialStringComparatorEngine(PartialStringComparatorEngine engine, StringHolderWrapperFactory wrapperFactory) {
this(engine, 1.0f, wrapperFactory);
}
private AffixStrippingPartialStringComparatorEngine(PartialStringComparatorEngine engine, float factor, StringHolderWrapperFactory wrapperFactory) {
super();
this.engine = engine;
this.thresholdCalculator = new VariableThresholdCalculator(factor);
this.wrapperFactory = wrapperFactory;
}
private AffixStrippingPartialStringComparatorEngine(PartialStringComparatorEngine engine, int count, StringHolderWrapperFactory wrapperFactory) {
super();
this.engine = engine;
this.thresholdCalculator = new FixedThresholdCalculator(count);
this.wrapperFactory = wrapperFactory;
}
public StringHolderPair[] match(StringHolder[] stringHolders1, StringHolder[] stringHolders2) {
return this.unwrap(this.engine.match(this.wrap(stringHolders1), this.wrap(stringHolders2)));
}
private StringHolder[] wrap(StringHolder[] stringHolders) {
int len = stringHolders.length;
StringHolderWrapper[] wrappers = new StringHolderWrapper[len];
for (int i = len; i-- > 0; ) {
wrappers[i] = this.wrap(stringHolders[i]);
}
if (len > 1) { // there can't be a common affix with only a single string
int thresholdCount = this.thresholdCalculator.thresholdCountFor(wrappers);
if (thresholdCount > 1) { // there has to be more than a single string with the common affix
this.stripAffixes(wrappers, thresholdCount);
}
}
return wrappers;
}
private StringHolderWrapper wrap(StringHolder stringHolder) {
return this.wrapperFactory.buildStringHolderWrapper(stringHolder);
}
private void stripAffixes(StringHolderWrapper[] wrappers, int thresholdCount) {
// group the wrappers by their first/last letters
HashMap charGroups = new HashMap(100);
for (int i = wrappers.length; i-- > 0; ) {
StringHolderWrapper wrapper = wrappers[i];
char c = wrapper.outsideChar();
if (c == 0) {
// the char will be 0 if we have exceeded the string's length,
// so drop it from the entries to be examined
continue; // skip to the next wrapper
}
Character bigC = new Character(c);
ArrayList group = (ArrayList) charGroups.get(bigC);
if (group == null) {
group = new ArrayList();
charGroups.put(bigC, group);
}
group.add(wrapper);
}
// continue processing any groups that exceed the threshold
for (Iterator stream = charGroups.entrySet().iterator(); stream.hasNext(); ) {
Map.Entry entry = (Map.Entry) stream.next();
ArrayList group = (ArrayList) entry.getValue();
if (group.size() >= thresholdCount) {
StringHolderWrapper[] groupArray = (StringHolderWrapper[]) group.toArray(new StringHolderWrapper[group.size()]);
for (int i = groupArray.length; i-- > 0; ) {
groupArray[i].incrementAffixSize();
}
// recurse
this.stripAffixes(groupArray, thresholdCount);
}
}
}
private StringHolderPair[] unwrap(StringHolderPair[] stringHolderPairs) {
StringHolderPair[] result = new StringHolderPair[stringHolderPairs.length];
for (int i = stringHolderPairs.length; i-- > 0; ) {
StringHolderPair wrappedPair = stringHolderPairs[i];
result[i] = new SimpleStringHolderPair(
this.unwrap(wrappedPair.getStringHolder1()),
this.unwrap(wrappedPair.getStringHolder2()),
wrappedPair.getScore()
);
}
return result;
}
private StringHolder unwrap(StringHolder stringHolder) {
return (stringHolder == null) ?
null
:
((StringHolderWrapper) stringHolder).getStringHolder();
}
public String toString() {
StringBuffer sb = new StringBuffer();
StringTools.buildSimpleToStringOn(this, sb);
sb.append(" (");
this.wrapperFactory.toString(sb);
sb.append(" : ");
this.thresholdCalculator.toString(sb);
sb.append(')');
return sb.toString();
}
// ********** member classes **********
/**
* This interface allows us to use a pluggable threshold calculation
* (e.g. percentage-based, fixed count-based).
*/
private interface ThresholdCalculator {
/**
* Calculate the threshold count for the specified set of string holders.
*/
int thresholdCountFor(StringHolder[] stringHolders);
/**
* Append something useful to the specified string buffer.
*/
void toString(StringBuffer sb);
}
private class VariableThresholdCalculator implements ThresholdCalculator {
private float factor;
VariableThresholdCalculator(float factor) {
super();
if ((factor <= 0.0f) || (factor > 1.0f)) {
throw new IllegalArgumentException("valid range: 0.0 < factor <= 1.0");
}
this.factor = factor;
}
public int thresholdCountFor(StringHolder[] stringHolders) {
return (int) (stringHolders.length * this.factor);
}
public void toString(StringBuffer sb) {
sb.append(this.factor);
}
}
private class FixedThresholdCalculator implements ThresholdCalculator {
private int count;
FixedThresholdCalculator(int count) {
super();
if (count <= 1) {
throw new IllegalArgumentException("valid range: count > 1");
}
this.count = count;
}
public int thresholdCountFor(StringHolder[] stringHolders) {
return this.count;
}
public void toString(StringBuffer sb) {
sb.append(this.count);
}
}
private interface StringHolderWrapperFactory {
StringHolderWrapper buildStringHolderWrapper(StringHolder stringHolder);
void toString(StringBuffer sb);
StringHolderWrapperFactory PREFIX = new StringHolderWrapperFactory() {
public StringHolderWrapper buildStringHolderWrapper(StringHolder stringHolder) {
return StringHolderWrapper.forPrefixStripping(stringHolder);
}
public void toString(StringBuffer sb) {
sb.append("PREFIX");
}
};
StringHolderWrapperFactory SUFFIX = new StringHolderWrapperFactory() {
public StringHolderWrapper buildStringHolderWrapper(StringHolder stringHolder) {
return StringHolderWrapper.forSuffixStripping(stringHolder);
}
public void toString(StringBuffer sb) {
sb.append("SUFFIX");
}
};
}
private static class StringHolderWrapper implements StringHolder {
private final AffixPolicy affixPolicy;
private final StringHolder stringHolder;
private int stringHolderStringLength;
private int affixLength;
private String string; // this is lazy-initialized by the nested engine
static StringHolderWrapper forPrefixStripping(StringHolder stringHolder) {
return new StringHolderWrapper(stringHolder, AffixPolicy.PREFIX);
}
static StringHolderWrapper forSuffixStripping(StringHolder stringHolder) {
return new StringHolderWrapper(stringHolder, AffixPolicy.SUFFIX);
}
private StringHolderWrapper(StringHolder stringHolder, AffixPolicy affixPolicy) {
super();
this.stringHolder = stringHolder;
this.stringHolderStringLength = stringHolder.getString().length();
this.affixPolicy = affixPolicy;
this.affixLength = 0;
this.string = null;
}
public String getString() {
if (this.string == null) {
this.string = this.buildString();
}
return this.string;
}
private String buildString() {
return this.affixPolicy.buildString(this.stringHolder.getString(), this.affixLength);
}
StringHolder getStringHolder() {
return this.stringHolder;
}
char outsideChar() {
// return the NULL character if we are past the end of the string
return (this.affixLength == this.stringHolderStringLength) ? 0 : this.affixPolicy.outsideChar(this.stringHolder.getString(), this.affixLength);
}
void incrementAffixSize() {
// don't bump the prefix length past the end of the string
if (this.affixLength < this.stringHolderStringLength) {
this.affixLength++;
}
}
public String toString() {
return StringTools.buildToStringFor(this, this.buildString());
}
private interface AffixPolicy {
String buildString(String string, int affixLength);
char outsideChar(String string, int affixLength);
AffixPolicy PREFIX = new AffixPolicy() {
public String buildString(String string, int affixLength) {
return string.substring(affixLength);
}
public char outsideChar(String string, int affixLength) {
return string.charAt(affixLength);
}
};
AffixPolicy SUFFIX = new AffixPolicy() {
public String buildString(String string, int affixLength) {
return string.substring(0, string.length() - affixLength);
}
public char outsideChar(String string, int affixLength) {
return string.charAt(string.length() - affixLength - 1);
}
};
}
}
}