/**
* Copyright (C) 2012 cogroo <cogroo@cogroo.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.cogroo.tools.featurizer;
import java.util.Objects;
import opennlp.tools.chunker.ChunkSample;
public class WordTag {
private final String word;
private final String postag;
private final String chunktag;
public WordTag(String word, String postag) {
this(word, postag, null);
}
public WordTag(String word, String postag, String chunktag) {
super();
this.word = word;
this.postag = postag;
this.chunktag = chunktag;
}
public String getWord() {
return word;
}
public String getPostag() {
return postag;
}
public String getChunktag() {
return chunktag;
}
public static WordTag[] create(String[] word, String[] postag) {
WordTag[] arr = new WordTag[word.length];
for (int i = 0; i < word.length; i++) {
arr[i] = new WordTag(word[i], postag[i]);
}
return arr;
}
public static WordTag[] create(String[] word, String[] postag, String[] chunktag) {
WordTag[] arr = new WordTag[word.length];
for (int i = 0; i < word.length; i++) {
arr[i] = new WordTag(word[i], postag[i], chunktag[i]);
}
return arr;
}
public static void extract(WordTag[] wt, String[] word, String[] tag) {
for (int i = 0; i < wt.length; i++) {
word[i] = wt[i].getWord();
tag[i] = wt[i].getPostag();
}
}
public static void extract(WordTag[] wt, String[] word, String[] tag, String[] chunks) {
for (int i = 0; i < wt.length; i++) {
word[i] = wt[i].getWord();
if(wt[i].getChunktag() == null) {
String t = wt[i].getPostag();
int bar = t.indexOf("|");
tag[i] = t.substring(0, bar);
chunks[i] = t.substring(bar+1);
} else {
tag[i] = wt[i].getPostag();
chunks[i] = wt[i].getChunktag();
}
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
} else if (o instanceof WordTag) {
return Objects.equals(this.word, ((WordTag) o).word)
&& Objects.equals(this.postag, ((WordTag) o).postag)
&& Objects.equals(this.chunktag, ((WordTag) o).chunktag);
}
return false;
}
@Override
public int hashCode() {
return Objects.hash(word, postag, chunktag);
}
@Override
public String toString() {
if(getChunktag() == null)
return getWord() + "_" + getPostag();
else
return getWord() + "_" + getPostag() + "_" + getChunktag();
}
public static WordTag[] create(ChunkSample cs) {
WordTag[] wt = new WordTag[cs.getSentence().length];
String[] sentence = cs.getSentence();
String[] pos = cs.getTags();
String[] chunks = cs.getPreds();
for (int i = 0; i < wt.length; i++) {
wt[i] = new WordTag(sentence[i], pos[i], chunks[i]);
}
return wt;
}
}