/**
* Copyright 2007-2014
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util;
import java.util.Collection;
import java.util.List;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.process.PTBEscapingProcessor;
public class CoreNlpUtils
{
public static CoreLabel tokenToWord(Token aToken)
{
CoreLabel t = new CoreLabel();
t.setOriginalText(aToken.getCoveredText());
t.setWord(aToken.getCoveredText());
t.setBeginPosition(aToken.getBegin());
t.setEndPosition(aToken.getEnd());
if (aToken.getLemma() != null) {
t.setLemma(aToken.getLemma().getValue());
}
else {
t.setLemma(aToken.getText());
}
if (aToken.getPos() != null) {
t.setTag(aToken.getPos().getPosValue());
}
return t;
}
@SuppressWarnings("unchecked")
public static <T extends HasWord> List<T> applyPtbEscaping(List<T> words,
Collection<String> quoteBegin, Collection<String> quoteEnd)
{
PTBEscapingProcessor<T, String, Word> escaper = new PTBEscapingProcessor<T, String, Word>();
// Apply escaper to the whole sentence, not to each token individually. The
// escaper takes context into account, e.g. when transforming regular double
// quotes into PTB opening and closing quotes (`` and '').
words = (List<T>) escaper.apply(words);
for (HasWord w : words) {
if (quoteBegin != null && quoteBegin.contains(w.word())) {
w.setWord("``");
}
else if (quoteEnd != null && quoteEnd.contains(w.word())) {
w.setWord("\'\'");
}
}
return words;
}
}