/**
* Copyright 2014, Emory University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.emory.clir.clearnlp.util;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import edu.emory.clir.clearnlp.util.constant.PatternConst;
/**
* @since 3.0.0
* @author Jinho D. Choi ({@code jinho.choi@emory.edu})
*/
public class Splitter implements PatternConst
{
static public CharTokenizer T_UNDERSCORE = new CharTokenizer('_');
static public CharTokenizer T_HYPHEN = new CharTokenizer('-');
static public CharTokenizer T_SPACE = new CharTokenizer(' ');
static public CharTokenizer T_COMMA = new CharTokenizer(',');
static public CharTokenizer T_COLON = new CharTokenizer(':');
static public CharTokenizer T_TAB = new CharTokenizer('\t');
static public String[] split(String s, Pattern p)
{
return p.split(s);
}
static public String[] splitSpace(String s)
{
return T_SPACE.tokenize(s);
}
static public List<String> splitTabsToList(String s)
{
return T_TAB.tokenizeToList(s);
}
static public String[] splitTabs(String s)
{
return T_TAB.tokenize(s);
}
static public String[] splitUnderscore(String s)
{
return T_UNDERSCORE.tokenize(s);
}
static public String[] splitHyphens(String s)
{
return T_HYPHEN.tokenize(s);
}
static public String[] splitCommas(String s)
{
return T_COMMA.tokenize(s);
}
static public String[] splitCommas(String s, boolean includeEmpty)
{
return T_COMMA.tokenize(s, includeEmpty);
}
static public String[] splitColons(String s)
{
return T_COLON.tokenize(s);
}
static public List<String> splitIncludingMatches(Pattern p, String s)
{
ArrayList<String> list = new ArrayList<>();
Matcher m = p.matcher(s);
int last = 0, curr;
while (m.find())
{
curr = m.start();
if (last < curr)
list.add(s.substring(last, curr));
last = m.end();
list.add(m.group());
}
if (last < s.length())
list.add(s.substring(last));
list.trimToSize();
return list;
}
static public String[] split(String s, int... splitIndices)
{
int i, len = splitIndices.length;
String[] t = new String[len+1];
int beginIndex = 0;
for (i=0; i<len; i++)
{
t[i] = s.substring(beginIndex, splitIndices[i]);
beginIndex = splitIndices[i];
}
t[i] = s.substring(beginIndex);
return t;
}
}