package org.rubypeople.rdt.internal.core.pmd; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.eclipse.core.resources.IFile; public class CPD { private Map<String, SourceCode> source = new HashMap<String, SourceCode>(); private int minimumTileSize; private Language language = new RubyLanguage(); private MatchAlgorithm matchAlgorithm; private Tokens tokens = new Tokens(); private CPDListener listener = new CPDNullListener(); private Set<String> current = new HashSet<String>(); private CPD(int minimumTileSize) { this.minimumTileSize = minimumTileSize; } public static Iterator<Match> findMatches(List<IFile> files) throws IOException { int minimumTokens = 5; // TODO Make this configurable in a preference page CPD cpd = new CPD(minimumTokens); cpd.add(files); cpd.go(); return cpd.getMatches(); } private void go() { TokenEntry.clearImages(); matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener); matchAlgorithm.findMatches(); } private Iterator<Match> getMatches() { return matchAlgorithm.matches(); } private void add(List<IFile> files) throws IOException { for (IFile file : files) { add(files.size(), file); } } private void add(int fileCount, IFile file) throws IOException { File realFile = file.getLocation().toFile(); // TODO refactor this thing into a separate class String signature = realFile.getName() + '_' + realFile.length(); if (current.contains(signature)) { // skip duplicates return; } current.add(signature); if (!realFile.getCanonicalPath().equals(realFile.getAbsolutePath())) { // skip symlinks return; } listener.addedFile(fileCount, realFile); // FIXME We need to compensate all our token offsets by the end-of-line characters! SourceCode sourceCode = new SourceCode(new SourceCode.FileCodeLoader(realFile)); language.getTokenizer().tokenize(sourceCode, tokens); source.put(sourceCode.getFileName(), sourceCode); } }