/** * Copyright (C) 2012 cogroo <cogroo@cogroo.org> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cogroo; import java.util.ArrayList; import java.util.List; import opennlp.tools.util.Span; import br.usp.pcs.lta.cogroo.entity.Sentence; import br.usp.pcs.lta.cogroo.entity.Token; import br.usp.pcs.lta.cogroo.entity.impl.runtime.TokenCogroo; public class ExpandedSentence { private static final long serialVersionUID = 1L; private String expandedSentence; private Sentence theSentence; private int[] offsets; private List<Token> tokens; public ExpandedSentence(Sentence sentence) { theSentence = sentence; if (expandedSentence == null) { synchronized (this) { if (expandedSentence == null) { int offset = 0; int lastStart = -1; offsets = new int[theSentence.getTokens().size()]; StringBuilder expSent = new StringBuilder(theSentence.getSentence() .length()); tokens = new ArrayList<Token>(); for (int i = 0; i < theSentence.getTokens().size(); i++) { Token t = theSentence.getTokens().get(i); String lexeme = t.getLexeme(); if (lexeme.length() < t.getSpan().length() && t.getSpan().getStart() != lastStart) { lexeme = t.getSpan().getCoveredText(theSentence.getSentence()) .replace(" ", "_"); } if (t.getSpan().getStart() == lastStart) { // oops! a contraction! // we configure the offset and append the string // compute the skip int skip = theSentence.getTokens().get(i - 1).getSpan().getEnd() + 1; append(offset + skip, lexeme, expSent); offset += lexeme.length() + 1; } else { append(offset + t.getSpan().getStart(), lexeme, expSent); } offsets[i] = offset; lastStart = t.getSpan().getStart(); Token newToken = new TokenCogroo(t.getLexeme(), offset + t.getSpan().getStart()); tokens.add(newToken); } expandedSentence = expSent.toString(); } } } } public String getExtendedSentence() { return expandedSentence; } public Sentence getSent() { return theSentence; } private void append(int i, String string, StringBuilder expSent) { while (expSent.length() < i) { expSent.append(" "); } expSent.append(string); } public Span getTokenSpan(int i) { Span s = theSentence.getTokens().get(i).getSpan(); return new Span(s.getStart() + offsets[i], s.getEnd() + offsets[i]); } }