/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.cogroo.cmdline.chunker2;
import java.util.Arrays;
import java.util.Random;
import org.cogroo.tools.chunker2.ChunkSample;
import org.cogroo.tools.chunker2.ChunkerEvaluationMonitor;
import org.cogroo.tools.chunker2.TokenTag;
import opennlp.tools.util.Span;
public class ChunkerDetailedFMeasureSizeListener extends
DetailedFMeasureForSizeListener<ChunkSample> implements ChunkerEvaluationMonitor{
@Override
protected Span[] asSpanArray(ChunkSample sample) {
int size = sample.getSentence().length;
String[] headTags = new String[size];
String[] chunkTags = new String[size];
String[] posTags = new String[size];
String[] lexemes = new String[size];
TokenTag.extract(TokenTag.create(sample.getSentence(), sample.getTags()), lexemes, posTags, headTags);
for (int i = 0; i < chunkTags.length; i++) {
chunkTags[i] = posTags[i].substring(posTags[i].indexOf('|') + 1);
}
Span[] out = asHeadSpan(ChunkSample.phrasesAsSpanList(lexemes, posTags, chunkTags), headTags);
return out;
}
private void print(Span[] out, String[] lexemes) {
String[] chunks = Span.spansToStrings(out, lexemes);
for (int i = 0; i < chunks.length; i++) {
chunks[i] = out[i].getType() + ": " + chunks[i];
}
System.out.println(Arrays.toString(chunks));
}
Random randomGenerator = new Random();
private Span[] asHeadSpan(Span[] chunks, String[] headTags) {
Span[] out = new Span[chunks.length];
for (int i = 0; i < chunks.length; i++) {
Span c = chunks[i];
int head = -1;
for (int j = c.getStart(); j < c.getEnd(); j++) {
// find the head
if(isHead(headTags[j])) {
head = j - c.getStart();
}
}
if(head == -1) {
if(c.length() == 1) {
head = 0;
} else {
head = randomGenerator.nextInt(100000) + 8;
}
}
out[i] = new Span(c.getStart(), c.getEnd(), Integer.toString(head));
}
return out;
}
private boolean isHead(String outcome) {
return !outcome.equals("O");
}
}