/*
* beymani: Outlier and anamoly detection
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.beymani.util;
import java.util.ArrayList;
import java.util.List;
/**
* Various sequence matching algorithms
* @author pranab
*
* @param <T>
*/
public class SequenceMatcher<T> {
private List<T> seqData = new ArrayList<T>();
private int maxSize;
private double sim;
private boolean normalize;
private boolean similarity;
private int matchSize;
public SequenceMatcher(boolean normalize, boolean similarity) {
this.normalize = normalize;
this.similarity = similarity;
}
public SequenceMatcher(int maxSize,boolean normalized, boolean similarity) {
this(normalized, similarity);
this.maxSize = maxSize;
}
public void add(T item) {
seqData.add(item);
if (maxSize > 0 && seqData.size() > maxSize) {
seqData.remove(0);
}
}
/**
* Simple positional matching
* @param other
* @return
*/
public double matchCount(SequenceMatcher<T> other) {
matchSize = seqData.size() < other.seqData.size() ? seqData.size() : other.seqData.size();
sim = 0;
for (int i = 0; i < matchSize; ++i) {
if (seqData.get(i).equals(other.seqData.get(i))) {
++sim;
}
}
prepeareResult(matchSize);
return sim;
}
/**
* Positional matching with higher reward for adjacent mactches
* @param other
* @return
*/
public double adjacencyRewardedMatchCount(SequenceMatcher<T> other) {
matchSize = seqData.size() < other.seqData.size() ? seqData.size() : other.seqData.size();
sim = 0;
int adjCount = 1;
for (int i = 0; i < matchSize; ++i) {
if (seqData.get(i).equals(other.seqData.get(i))) {
sim += adjCount;
++adjCount;
} else {
adjCount = 1;
}
}
prepeareResult(matchSize);
return sim;
}
/**
* Positional matching with higher reward for adjacent mactches
* @param other
* @return
*/
public double maxCommonSubSeqMatchCount(SequenceMatcher<T> other) {
int matchSize = seqData.size() < other.seqData.size() ? seqData.size() : other.seqData.size();
sim = 0;
int adjCount = 0;
for (int i = 0; i < matchSize; ++i) {
if (seqData.get(i).equals(other.seqData.get(i))) {
++adjCount;
} else {
if (adjCount > sim) {
sim = adjCount;
}
adjCount = 0;
}
}
prepeareResult(matchSize * (matchSize + 1) / 2);
return sim;
}
/**
* @param scale
*/
private void prepeareResult(int scale) {
if (normalize) {
sim /= scale;
if (!similarity) {
sim = 1.0 - sim;
}
} else {
if (!similarity) {
sim = scale - sim;
}
}
}
}