/**
* Copyright 2014, Emory University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.emory.clir.clearnlp.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Random;
import java.util.Set;
import java.util.regex.Pattern;
import edu.emory.clir.clearnlp.collection.list.FloatArrayList;
import edu.emory.clir.clearnlp.collection.pair.DoubleIntPair;
import edu.emory.clir.clearnlp.collection.pair.Pair;
/**
* @since 3.0.0
* @author Jinho D. Choi ({@code jinho.choi@emory.edu})
*/
public class DSUtils
{
private DSUtils() {}
static public Set<String> getBagOfWords(String s, Pattern splitter)
{
Set<String> set = new HashSet<>();
for (String t : splitter.split(s))
{
t = t.trim();
if (!t.isEmpty()) set.add(t);
}
return set;
}
static public Set<String> getBagOfWords(InputStream in, Pattern splitter)
{
BufferedReader reader = IOUtils.createBufferedReader(in);
Set<String> set = new HashSet<>();
String line;
try
{
while ((line = reader.readLine()) != null)
set.addAll(getBagOfWords(line, splitter));
}
catch (IOException e) {e.printStackTrace();}
return set;
}
static public Set<String> createStringHashSet(InputStream in)
{
return createStringHashSet(in, true, false);
}
/**
* @param in internally wrapped by {@code new BufferedReader(new InputStreamReader(in))}.
* The file that the input-stream is created from consists of one entry per line.
*/
static public Set<String> createStringHashSet(InputStream in, boolean trim, boolean decap)
{
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
Set<String> set = new HashSet<>();
String line;
try
{
while ((line = reader.readLine()) != null)
{
if (trim)
{
line = line.trim();
if (line.isEmpty()) continue;
}
if (decap)
line = StringUtils.toLowerCase(line);
set.add(line);
}
}
catch (IOException e) {e.printStackTrace();}
return set;
}
static public Map<String,String> createStringHashMap(InputStream in, CharTokenizer tokenizer)
{
return createStringHashMap(in, tokenizer, true);
}
/**
* @param in internally wrapped by {@code new BufferedReader(new InputStreamReader(in))}.
* The file that the input-stream is created from consists of one entry per line ("key"<delim>"value").
*/
static public Map<String,String> createStringHashMap(InputStream in, CharTokenizer tokenizer, boolean trim)
{
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
Map<String,String> map = new HashMap<>();
String[] t;
String line;
try
{
while ((line = reader.readLine()) != null)
{
if (trim)
{
line = line.trim();
if (line.isEmpty()) continue;
}
t = tokenizer.tokenize(line);
map.put(t[0], t[1]);
}
}
catch (IOException e) {e.printStackTrace();}
return map;
}
static public <T extends Comparable<? extends T>>void sortReverseOrder(List<T> list)
{
Collections.sort(list, Collections.reverseOrder());
}
static public <T extends Comparable<? extends T>>void sortReverseOrder(T[] array)
{
Arrays.sort(array, Collections.reverseOrder());
}
static public <T>boolean hasIntersection(Collection<T> col1, Collection<T> col2)
{
if (col2.size() < col1.size())
{
Collection<T> tmp = col1;
col1 = col2;
col2 = tmp;
}
for (T item : col1)
{
if (col2.contains(item))
return true;
}
return false;
}
/** @return a set containing all field values of this class. */
static public Set<String> getFieldSet(Class<?> cs)
{
Set<String> set = new HashSet<>();
try
{
for (Field f : cs.getFields())
set.add(f.get(cs).toString());
}
catch (IllegalArgumentException e) {e.printStackTrace();}
catch (IllegalAccessException e) {e.printStackTrace();}
return set;
}
/** @return the index'th item if exists; otherwise, {@code null}. */
static public <T>T get(List<T> list, int index)
{
return isRange(list, index) ? list.get(index) : null;
}
/** @return the index'th item if exists; otherwise, {@code null}. */
static public <T>T get(T[] array, int index)
{
return isRange(array, index) ? array[index] : null;
}
/** @return the last item in the list if exists; otherwise, {@code null}. */
static public <T>T getLast(List<T> list)
{
return list.isEmpty() ? null : list.get(list.size()-1);
}
static public <T>boolean isRange(List<T> list, int index)
{
return 0 <= index && index < list.size();
}
static public <T>boolean isRange(T[] array, int index)
{
return 0 <= index && index < array.length;
}
/**
* @param beginIndex inclusive
* @param endIndex exclusive
*/
static public int[] range(int beginIndex, int endIndex, int gap)
{
double d = MathUtils.divide(endIndex-beginIndex, gap);
if (d < 0) return new int[0];
int[] array = new int[MathUtils.ceil(d)];
int i, j;
if (beginIndex < endIndex)
{
for (i=beginIndex,j=0; i<endIndex; i+=gap,j++)
array[j] = i;
}
else
{
for (i=beginIndex,j=0; i>endIndex; i+=gap,j++)
array[j] = i;
}
return array;
}
static public int[] range(int size)
{
return range(0, size, 1);
}
static public void swap(int[] array, int index0, int index1)
{
int tmp = array[index0];
array[index0] = array[index1];
array[index1] = tmp;
}
static public <T>void swap(List<T> list, int index0, int index1)
{
T tmp = list.get(index0);
list.set(index0, list.get(index1));
list.set(index1, tmp);
}
static public void shuffle(int[] array, Random rand)
{
shuffle(array, rand, array.length);
}
/** Calls {@link #shuffle(List, Random, int)}, where {@code lastIndex = list.size()}. */
static public <T>void shuffle(List<T> list, Random rand)
{
shuffle(list, rand, list.size());
}
static public void shuffle(int[] array, Random rand, int lastIndex)
{
int i, j, size = lastIndex - 1;
for (i=0; i<size; i++)
{
j = rand.nextInt(size - i) + i + 1;
swap(array, i, j);
}
}
/**
* A slightly modified version of Durstenfeld's shuffle algorithm.
* @param lastIndex shuffle up to this index (exclusive, cannot be greater than the list of the list).
*/
static public <T>void shuffle(List<T> list, Random rand, int lastIndex)
{
int i, j, size = lastIndex - 1;
for (i=0; i<size; i++)
{
j = rand.nextInt(size - i) + i + 1;
swap(list, i, j);
}
}
/** Adds all items in the specific array to the specific list. */
static public void addAll(List<String> list, String[] array)
{
for (String item : array)
list.add(item);
}
static public void append(FloatArrayList list, float value, int n)
{
int i;
for (i=0; i<n; i++)
list.add(value);
}
static public <T>void removeLast(List<T> list)
{
if (!list.isEmpty()) list.remove(list.size()-1);
}
static public int maxIndex(double[] array)
{
int i, size = array.length, maxIndex = 0;
double maxValue = array[maxIndex];
for (i=1; i<size; i++)
{
if (maxValue < array[i])
{
maxIndex = i;
maxValue = array[maxIndex];
}
}
return maxIndex;
}
static public int maxIndex(double[] array, int[] indices)
{
int i, j, size = indices.length, maxIndex = indices[0];
double maxValue = array[maxIndex];
for (j=1; j<size; j++)
{
i = indices[j];
if (maxValue < array[i])
{
maxIndex = i;
maxValue = array[i];
}
}
return maxIndex;
}
static public Pair<DoubleIntPair,DoubleIntPair> top2(double[] array)
{
int i, size = array.length;
DoubleIntPair fst, snd;
if (array[0] < array[1])
{
fst = toDoubleIntPair(array, 1);
snd = toDoubleIntPair(array, 0);
}
else
{
fst = toDoubleIntPair(array, 0);
snd = toDoubleIntPair(array, 1);
}
for (i=2; i<size; i++)
{
if (fst.d < array[i])
{
snd.set(fst.d, fst.i);
fst.set(array[i], i);
}
else if (snd.d < array[i])
snd.set(array[i], i);
}
return new Pair<DoubleIntPair,DoubleIntPair>(fst, snd);
}
static public Pair<DoubleIntPair,DoubleIntPair> top2(double[] array, int[] include)
{
int i, j, size = include.length;
DoubleIntPair fst, snd;
if (array[include[0]] < array[include[1]])
{
fst = toDoubleIntPair(array, include[1]);
snd = toDoubleIntPair(array, include[0]);
}
else
{
fst = toDoubleIntPair(array, include[0]);
snd = toDoubleIntPair(array, include[1]);
}
for (j=2; j<size; j++)
{
i = include[j];
if (fst.d < array[i])
{
snd.set(fst.d, fst.i);
fst.set(array[i], i);
}
else if (snd.d < array[i])
snd.set(array[i], i);
}
return new Pair<DoubleIntPair,DoubleIntPair>(fst, snd);
}
static public DoubleIntPair toDoubleIntPair(double[] array, int index)
{
return new DoubleIntPair(array[index], index);
}
static public <T>List<?>[] createEmptyListArray(int size)
{
List<?>[] array = new ArrayList<?>[size];
for (int i=0; i<size; i++)
array[i] = new ArrayList<T>();
return array;
}
static public <T>PriorityQueue<?>[] createEmptyPriorityQueueArray(int size, boolean ascending)
{
PriorityQueue<?>[] queue = new PriorityQueue<?>[size];
for (int i=0; i<size; i++)
queue[i] = ascending ? new PriorityQueue<>() : new PriorityQueue<>(Collections.reverseOrder());
return queue;
}
@SuppressWarnings("unchecked")
static public <T>List<T> toArrayList(T... items)
{
List<T> list = new ArrayList<>(items.length);
for (T item : items) list.add(item);
return list;
}
@SuppressWarnings("unchecked")
static public <T>Set<T> toHashSet(T... items)
{
Set<T> set = new HashSet<>(items.length);
for (T item : items) set.add(item);
return set;
}
static public <T>Set<T> merge(List<Set<T>> sets)
{
Set<T> merge = new HashSet<>();
for (Set<T> set : sets) merge.addAll(set);
return merge;
}
static public String[] toArray(Collection<String> col)
{
String[] array = new String[col.size()];
col.toArray(array);
return array;
}
static public <T>List<T> removeAll(Collection<T> source, Collection<T> remove)
{
List<T> list = new ArrayList<>(source);
list.removeAll(remove);
return list;
}
/** @return true if s2 is a subset of s1. */
static public <T>boolean isSubset(Collection<T> s1, Collection<T> s2)
{
for (T t : s2)
{
if (!s1.contains(t))
return false;
}
return true;
}
}