/** * Copyright (C) 2012 cogroo <cogroo@cogroo.org> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.cogroo.entities.tree; import java.util.List; import opennlp.tools.util.Span; import org.cogroo.entities.SyntacticChunk; import org.cogroo.entities.impl.MorphologicalTag; import org.cogroo.tools.checker.rules.model.TagMask; import org.cogroo.tools.checker.rules.model.TagMask.SyntacticFunction; public class TextModel { private final TagMask VERB; private final TagMask NONE; private final TagMask SUBJ; private Node root; // private final Text text; public final static String SPAN = "Span: "; public final static String SYNTACTIC_FUNCTION = "Synt.: "; public final static String CHUNK_FUNCTION = "Chunk.: "; public final static String MORPH_FUNCTION = "Morph.: "; public TextModel(org.cogroo.entities.Sentence sentence) { this.VERB = new TagMask(); this.VERB.setSyntacticFunction(SyntacticFunction.VERB); this.SUBJ = new TagMask(); this.SUBJ.setSyntacticFunction(SyntacticFunction.SUBJECT); this.NONE = new TagMask(); this.NONE.setSyntacticFunction(SyntacticFunction.NONE); root = toRoot(sentence); } public Node getRoot() { return root; } private Span getSpan(List<org.cogroo.entities.Token> tokens) { if (tokens != null && tokens.size() > 0) { return new Span(tokens.get(0).getSpan().getStart(), tokens .get(tokens.size() - 1).getSpan().getEnd()); } return null; } private Sentence toRoot(org.cogroo.entities.Sentence sentence) { Sentence sent = new Sentence(); sent.setLevel(0); // Span span = getSpan(sentence.getTokens()); List<SyntacticChunk> chunks = sentence.getSyntacticChunks(); for (SyntacticChunk syntacticChunk : chunks) { addChild(syntacticChunk, sent); } return sent; } private void addChild(SyntacticChunk syntacticChunk, Node parent) { if (syntacticChunk.getSyntacticTag() != null && !syntacticChunk.getSyntacticTag().match(this.NONE)) { Chunk c = new Chunk(); c.setLevel(parent.getLevel() + 1); c.setSyntacticTag(syntacticChunk.getSyntacticTag().toVerboseString()); for (org.cogroo.entities.Chunk child : syntacticChunk.getChildChunks()) { addChild(child, c); } parent.addElement(c); } else { for (org.cogroo.entities.Chunk chunk : syntacticChunk.getChildChunks()) { addChild(chunk, parent); } } } private void addChild(org.cogroo.entities.Chunk chunk, Node parent) { if(chunk.getType() == null) { addChild(chunk.getTokens(), parent); } else { Chunk c = new Chunk(); c.setLevel(parent.getLevel() + 1); c.setSyntacticTag(chunk.getType()); addChild(chunk.getTokens(), c); parent.addElement(c); } } private void addChild(List<org.cogroo.entities.Token> tokenList, Node parent) { for (int i = 0; i < tokenList.size(); i++) { addChild(tokenList.get(i), parent, isHead(i, tokenList.get(i))); } } private boolean isHead(int i, org.cogroo.entities.Token token) { int relativeIndex = i; //- token.getChunk().getFirstToken(); if(relativeIndex == token.getChunk().getRelativeHeadIndex()) return true; return false; } private void addChild(org.cogroo.entities.Token token, Node parent, boolean isHead) { Token t = new Token(); t.setLevel(parent.getLevel() + 1); t.setLexeme(token.getLexeme()); t.setIsChunkHead(isHead); t.setMorphologicalTag(token.getMorphologicalTag().getClazzE().name()); MorphologicalTag mt = token.getMorphologicalTag().clone(); mt.setClazz(null); t.setFeatures(mt.toString()); t.setLemma(token.getPrimitive()); // Span span = token.getSpan(); // t.details.add(MORPH_FUNCTION + // token.getMorphologicalTag().toVerboseString()); // t.details.add(SPAN + span); // t.parent = parent; parent.addElement(t); } // public class Node // { // protected Node parent; // protected String text; // protected Span span; // protected List<String> details = new ArrayList<String>(); // protected List<Node> children; // // public List<Edge> getAllEdges() // { // List<Edge> edges = new ArrayList<Edge>(); // if(children != null) // { // for (Node node : children) { // edges.addAll(node.getAllEdges()); // edges.add(new Edge(this, node)); // } // } // // return edges; // } // // public String getText() // { // if(this.details.size() > 0) // { // StringBuilder sb = new StringBuilder(text); // for (String d : details) { // sb.append("\n" + d); // } // // return sb.toString(); // } // return text; // } // } // public class Edge // { // private Node parent; // private Node child; // public Edge(Node parent, Node child) { // super(); // this.parent = parent; // this.child = child; // } // public Node getParent() { // return parent; // } // public Node getChild() { // return child; // } // } public class Text extends Node { } public class Sentence extends Node { @Override public String getSyntacticTag() { return "S"; } } public class Chunk extends Node { } public class Token extends Leaf { } }