/******************************************************************************* * Copyright 2015-2016 - CNRS (Centre National de Recherche Scientifique) * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * *******************************************************************************/ package eu.project.ttc.resources; import java.io.IOException; import java.io.InputStream; import java.util.LinkedList; import java.util.Map; import java.util.Queue; import java.util.Scanner; import org.apache.commons.io.IOUtils; import org.apache.uima.resource.DataResource; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.SharedResourceObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import eu.project.ttc.utils.TermSuiteConstants; public class PrefixTree implements SharedResourceObject { private static final Logger LOGGER = LoggerFactory.getLogger(PrefixTree.class); public static final String PREFIX_TREE = "PrefixTree"; private Node rootNode = new Node(Node.ROOT_CHAR); private static class Node { public static final char ROOT_CHAR = '^'; private boolean validPrefix; private char character; private Node parent; private Map<Character, Node> children = Maps.newHashMap(); public Node(char character) { super(); this.character = character; } public void indexString(Queue<Character> charSequence) { indexString(charSequence, 0); } private void indexString(Queue<Character> charSequence, int depth) { if(charSequence.isEmpty()) { Preconditions.checkArgument(!(isRoot() && depth == 0), "Empty string is not a valid prefix"); this.validPrefix = true; } else { Character c = charSequence.poll(); Node childNode; if(children.containsKey(c)) childNode = children.get(c); else { childNode = new Node(c); childNode.parent = this; children.put(c, childNode); } childNode.indexString(charSequence, depth + 1); } } public String getPrefix(Queue<Character> charSequence) { return getPrefix(charSequence, null, 0); } public String getPrefix(Queue<Character> charSequence, String lastPrefixFound, int depth) { if(validPrefix) lastPrefixFound = toPrefixString(); if(charSequence.isEmpty()) return lastPrefixFound; else { Character c = charSequence.poll(); if(children.containsKey(c)) { Node child = children.get(c); return child.getPrefix(charSequence, lastPrefixFound, depth + 1); } else return lastPrefixFound; } } private String toPrefixString() { return toPrefixString(new StringBuffer()); } private String toPrefixString(StringBuffer buffer) { if(isRoot()) { return buffer.reverse().toString(); } else { buffer.append(this.character); return parent.toPrefixString(buffer); } } private boolean isRoot() { return this.character == ROOT_CHAR; } } @Override public void load(DataResource data) throws ResourceInitializationException { InputStream inputStream = null; try { inputStream = data.getInputStream(); Scanner scanner = null; try { String line; scanner = new Scanner(inputStream, "UTF-8"); scanner.useDelimiter(TermSuiteConstants.LINE_BREAK); while (scanner.hasNext()) { line = scanner.next().split(TermSuiteConstants.DIESE)[0].trim(); if(line.startsWith("#")) continue; else if(line.isEmpty()) continue; else if(line.endsWith("-")) { if(line.length() == 1) continue; else line = line.substring(0, line.length() - 1); } rootNode.indexString(toCharQueue(line)); } } catch (Exception e) { e.printStackTrace(); throw new ResourceInitializationException(e); } finally { IOUtils.closeQuietly(scanner); } } catch (IOException e) { LOGGER.error("Could not load file {}",data.getUrl()); throw new ResourceInitializationException(e); } finally { IOUtils.closeQuietly(inputStream); } } public String getPrefix(String word) { return rootNode.getPrefix(toCharQueue(word)); } private Queue<Character> toCharQueue(String line) { LinkedList<Character> characters = Lists.newLinkedList(); for (int i = 0; i < line.length(); i++) characters.add(line.charAt(i)); return characters; } }