/**
* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
*/
package net.sourceforge.pmd.cpd;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class MatchCollector {
private MatchAlgorithm ma;
private Map startMap = new HashMap();
private Map fileMap = new HashMap();
public MatchCollector(MatchAlgorithm ma) {
this.ma = ma;
}
public void collect(int minimumLength, List marks) {
//first get a pairwise collection of all maximal matches
for (int i = 0; i < marks.size() - 1; i++) {
TokenEntry mark1 = (TokenEntry) marks.get(i);
for (int j = i + 1; j < marks.size(); j++) {
TokenEntry mark2 = (TokenEntry) marks.get(j);
int diff = mark1.getIndex() - mark2.getIndex();
if (-diff < minimumLength) {
continue;
}
if (hasPreviousDupe(mark1, mark2)) {
continue;
}
int dupes = countDuplicateTokens(mark1, mark2);
//false positive check
if (dupes < minimumLength) {
continue;
}
//is it still too close together
if (diff + dupes >= 1) {
continue;
}
determineMatch(mark1, mark2, dupes);
}
}
}
public List getMatches() {
ArrayList matchList = new ArrayList(startMap.values());
groupMatches(matchList);
return matchList;
}
/**
* A greedy algorithm for determining non-overlapping matches
*/
private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
Match match = new Match(dupes, mark1, mark2);
String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
ArrayList pairMatches = (ArrayList) fileMap.get(fileKey);
if (pairMatches == null) {
pairMatches = new ArrayList();
fileMap.put(fileKey, pairMatches);
}
boolean add = true;
for (int k = 0; k < pairMatches.size(); k++) {
Match other = (Match) pairMatches.get(k);
if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex()
> 0) {
boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
|| (!ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
if (other.getTokenCount() >= match.getTokenCount()) {
add = false;
break;
} else {
pairMatches.remove(k);
startMap.remove(other.getMatchCode());
}
}
}
}
if (add) {
pairMatches.add(match);
startMap.put(match.getMatchCode(), match);
}
}
private void groupMatches(ArrayList matchList) {
Collections.sort(matchList);
HashSet matchSet = new HashSet();
Match.MatchCode matchCode = new Match.MatchCode();
for (int i = matchList.size(); i > 1; i--) {
Match match1 = (Match) matchList.get(i - 1);
TokenEntry mark1 = (TokenEntry) match1.getMarkSet().iterator().next();
matchSet.clear();
matchSet.add(match1.getMatchCode());
for (int j = i - 1; j > 0; j--) {
Match match2 = (Match) matchList.get(j - 1);
if (match1.getTokenCount() != match2.getTokenCount()) {
break;
}
TokenEntry mark2 = null;
for (Iterator iter = match2.getMarkSet().iterator(); iter.hasNext();) {
mark2 = (TokenEntry) iter.next();
if (mark2 != mark1) {
break;
}
}
int dupes = countDuplicateTokens(mark1, mark2);
if (dupes < match1.getTokenCount()) {
break;
}
matchSet.add(match2.getMatchCode());
match1.getMarkSet().addAll(match2.getMarkSet());
matchList.remove(i - 2);
i--;
}
if (matchSet.size() == 1) {
continue;
}
//prune the mark set
Set pruned = match1.getMarkSet();
boolean done = false;
ArrayList a1 = new ArrayList(match1.getMarkSet());
Collections.sort(a1);
for (int outer = 0; outer < a1.size() - 1 && !done; outer++) {
TokenEntry cmark1 = (TokenEntry) a1.get(outer);
for (int inner = outer + 1; inner < a1.size() && !done; inner++) {
TokenEntry cmark2 = (TokenEntry) a1.get(inner);
matchCode.setFirst(cmark1.getIndex());
matchCode.setSecond(cmark2.getIndex());
if (!matchSet.contains(matchCode)) {
if (pruned.size() > 2) {
pruned.remove(cmark2);
}
if (pruned.size() == 2) {
done = true;
}
}
}
}
}
}
private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
if (mark1.getIndex() == 0) {
return false;
}
return !matchEnded(ma.tokenAt(-1, mark1), ma.tokenAt(-1, mark2));
}
private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) {
int index = 0;
while (!matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) {
index++;
}
return index;
}
private boolean matchEnded(TokenEntry token1, TokenEntry token2) {
return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF;
}
}