/**
* Copyright 2014, Emory University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.emory.clir.clearnlp.dictionary.english;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Set;
import edu.emory.clir.clearnlp.dictionary.PathTokenizer;
import edu.emory.clir.clearnlp.util.CharUtils;
import edu.emory.clir.clearnlp.util.DSUtils;
import edu.emory.clir.clearnlp.util.IOUtils;
/**
* @since 3.0.0
* @author Jinho D. Choi ({@code jinho.choi@emory.edu})
*/
public class DTHyphen
{
private Set<String> s_prefix;
private Set<String> s_suffix;
public DTHyphen()
{
InputStream prefix = IOUtils.getInputStreamsFromClasspath(PathTokenizer.EN_HYPHEN_PREFIX);
InputStream suffix = IOUtils.getInputStreamsFromClasspath(PathTokenizer.EN_HYPHEN_SUFFIX);
init(prefix, suffix);
}
public DTHyphen(InputStream prefix, InputStream suffix)
{
init(prefix, suffix);
}
public void init(InputStream prefix, InputStream suffix)
{
s_prefix = DSUtils.createStringHashSet(prefix, true, true);
s_suffix = DSUtils.createStringHashSet(suffix, true, true);
}
public boolean isPrefix(String lower)
{
return s_prefix.contains(lower);
}
public boolean isSuffix(String lower)
{
return s_suffix.contains(lower);
}
public boolean preserveHyphen(char[] cs, int index)
{
if (CharUtils.isHyphen(cs[index]) && (index+1 == cs.length || CharUtils.isAlphabet(cs[index+1])))
{
int len = cs.length;
char[] tmp;
if (index > 0)
{
tmp = Arrays.copyOfRange(cs, 0, index);
CharUtils.toLowerCase(tmp);
if (isPrefix(new String(tmp)))
return true;
}
if (index+1 < len)
{
tmp = Arrays.copyOfRange(cs, index+1, len);
CharUtils.toLowerCase(tmp);
if (isSuffix(new String(tmp)))
return true;
}
if (index+2 < len)
{
if (CharUtils.isVowel(cs[index+1]) && CharUtils.isHyphen(cs[index+2]))
return true;
}
if (0 <= index-2)
{
if (CharUtils.isVowel(cs[index-1]) && CharUtils.isHyphen(cs[index-2]))
return true;
}
}
return false;
}
}