/*
* DuplicateHelper.java
*
* Copyright (C) 2009-12 by RStudio, Inc.
*
* Unless you have received this program directly from RStudio pursuant
* to the terms of a commercial license agreement with RStudio, then
* this program is licensed to you under the terms of version 3 of the
* GNU Affero General Public License. This program is distributed WITHOUT
* ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
* AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
*
*/
package org.rstudio.core.client;
import org.rstudio.core.client.files.FileSystemItem;
import java.util.*;
public class DuplicateHelper
{
/**
* Provides information about duplicates in the list that was tested.
*/
public static class DuplicationInfo<T>
{
public DuplicationInfo(Comparator<T> comparator)
{
comparator_ = comparator;
}
/**
* For a given value, return how many times it appears in the original
* value list (or 0 if never).
*/
public int occurrences(T value)
{
for (Pair<T, Integer> count : valueCounts_)
if (0 == comparator_.compare(value, count.first))
return count.second;
return 0;
}
/**
* Returns a list, each element of which is a list of indices of elements
* in the original value list whose values are duplicates.
*
* For example:
*
* a = ["foo", "bar", "bar", "bar", "foo"]
* dupeInfo = detectDupes(a)
* dupeInfo.dupes() ==> [ [0,4], [1,2,3] ]
*/
public ArrayList<ArrayList<Integer>> dupes()
{
return dupes_;
}
void addDupeInfo(T value, ArrayList<Integer> indices)
{
valueCounts_.add(new Pair<T, Integer>(value, indices.size()));
if (indices.size() > 1)
dupes_.add(indices);
}
private final Comparator<T> comparator_;
private ArrayList<ArrayList<Integer>> dupes_ =
new ArrayList<ArrayList<Integer>>();
private ArrayList<Pair<T, Integer>> valueCounts_ =
new ArrayList<Pair<T, Integer>>();
}
private static class CaseInsensitiveStringComparator implements Comparator<String>
{
public int compare(String s1, String s2)
{
return s1.compareToIgnoreCase(s2);
}
}
public static <T> int dedupeSortedList(ArrayList<T> list)
{
int removedCount = 0;
for (int i = list.size() - 1; i > 0; i--)
{
T x = list.get(i-1);
T y = list.get(i);
if (((x == null) == (y == null)) &&
((x == null) || x.equals(y)))
{
list.remove(i);
removedCount++;
}
}
return removedCount;
}
/**
* Detect duplicates and calculate frequency information in the given
* list, according to the given comparator's definition of equality.
* The comparator must correctly support not only equality but also
* comparisons, since the duplicate detection algorithm relies on sorting.
*/
public static <T> DuplicationInfo<T> detectDupes(List<T> list,
final Comparator<T> comparator)
{
ArrayList<Pair<Integer, T>> sorted = new ArrayList<Pair<Integer, T>>();
for (int i = 0; i < list.size(); i++)
{
sorted.add(new Pair<Integer, T>(i, list.get(i)));
}
// Sort our copy of the list, so dupes are right next to each other
Collections.sort(sorted, new Comparator<Pair<Integer, T>>()
{
public int compare(Pair<Integer, T> left,
Pair<Integer, T> right)
{
return comparator.compare(left.second, right.second);
}
});
DuplicationInfo<T> dupeInfo = new DuplicationInfo<T>(comparator);
ArrayList<Integer> currentDupes = new ArrayList<Integer>();
T lastSeenValue = null;
for (Pair<Integer, T> value : sorted)
{
if (lastSeenValue == null ||
comparator.compare(lastSeenValue, value.second) != 0)
{
// This value isn't the same as the previous one. If we've got
// dupes in our list, then add them to the results. Then start
// a new list.
if (currentDupes.size() > 0)
dupeInfo.addDupeInfo(lastSeenValue, currentDupes);
currentDupes = new ArrayList<Integer>();
}
// Add ourselves to the current list
currentDupes.add(value.first);
lastSeenValue = value.second;
}
if (currentDupes.size() > 0)
dupeInfo.addDupeInfo(lastSeenValue, currentDupes);
return dupeInfo;
}
/**
* Use Mac OS X style prettifying of paths. Display the filename,
* and if there are multiple entries with the same filename, append
* a disambiguating folder to those filenames.
*/
public static ArrayList<String> getPathLabels(ArrayList<String> paths,
boolean includeExtension)
{
ArrayList<String> labels = new ArrayList<String>();
for (String entry : paths)
{
if (includeExtension)
labels.add(FileSystemItem.getNameFromPath(entry));
else
labels.add(FileSystemItem.createFile(entry).getStem());
}
DuplicationInfo<String> dupeInfo = DuplicateHelper.detectDupes(
labels, new CaseInsensitiveStringComparator());
for (ArrayList<Integer> dupeList : dupeInfo.dupes())
{
fixupDupes(paths, dupeList, labels);
}
dupeInfo = DuplicateHelper.detectDupes(
labels, new CaseInsensitiveStringComparator());
// There are edge cases where we may still end up with dupes at this
// point. In that case, just disambiguate using the full path.
// Example:
// ~/foo/tmp/README
// ~/bar/tmp/README
// ~/foo/README
// ~/bar/README
for (ArrayList<Integer> dupeList : dupeInfo.dupes())
{
for (Integer index : dupeList)
{
FileSystemItem fsi = FileSystemItem.createFile(
paths.get(index));
String name = includeExtension ? fsi.getName() : fsi.getStem();
labels.set(index, disambiguate(name,
fsi.getParentPathString()));
}
}
return labels;
}
private static void fixupDupes(ArrayList<String> fullPaths,
ArrayList<Integer> indices,
ArrayList<String> labels)
{
ArrayList<ArrayList<String>> pathElementListList =
new ArrayList<ArrayList<String>>();
for (Integer index : indices)
pathElementListList.add(toPathElements(fullPaths.get(index)));
while (indices.size() > 0)
{
ArrayList<String> lastPathElements = new ArrayList<String>();
for (int i = 0; i < pathElementListList.size(); i++)
{
ArrayList<String> pathElementList = pathElementListList.get(i);
if (pathElementList.size() == 0)
{
int trueIndex = indices.get(i);
String path = FileSystemItem.createFile(fullPaths.get(trueIndex))
.getParentPathString();
labels.set(trueIndex,
disambiguate(labels.get(trueIndex), path));
indices.remove(i);
pathElementListList.remove(i);
i--;
}
else
{
lastPathElements.add(
pathElementList.remove(pathElementList.size() - 1));
}
}
DuplicationInfo<String> dupeInfo = DuplicateHelper.detectDupes(
lastPathElements,
new CaseInsensitiveStringComparator());
for (int i = 0; i < lastPathElements.size(); i++)
{
if (1 == dupeInfo.occurrences(lastPathElements.get(i)))
{
int trueIndex = indices.get(i);
labels.set(trueIndex, disambiguate(labels.get(trueIndex),
lastPathElements.get(i)));
indices.remove(i);
pathElementListList.remove(i);
lastPathElements.remove(i);
i--;
}
}
assert indices.size() == pathElementListList.size();
}
}
private static String disambiguate(String filename, String disambiguatingPath)
{
return filename + " \u2014 " + disambiguatingPath;
}
private static ArrayList<String> toPathElements(String path)
{
FileSystemItem fsi = FileSystemItem.createFile(path);
return new ArrayList<String>(
Arrays.asList(fsi.getParentPathString().split("/")));
}
}