/* * Copyright 2010 NCHOVY * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.krakenapps.ahocorasick; import java.io.IOException; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.Queue; public class AhoCorasickSearch { private TrieNode root; private List<TrieNode> nodes; public AhoCorasickSearch() { this.root = new TrieNode(0, null, null); this.root.setFailure(root); this.nodes = new ArrayList<TrieNode>(); this.nodes.add(root); } public AhoCorasickSearch addKeyword(Pattern pattern) { byte[] keyword = pattern.getKeyword(); TrieNode currentNode = root; for (int i = 0; i < keyword.length; i++) { byte b = keyword[i]; if (!currentNode.hasNext(b)) { try { TrieNode node = currentNode.addNext(nodes.size(), b); nodes.add(node); } catch (IOException e) { } } currentNode = currentNode.getNext(b); } currentNode.addPattern(pattern); return this; } public void compile() { Queue<TrieNode> queue = new LinkedList<TrieNode>(); queue.add(root); while (!queue.isEmpty()) { TrieNode current = queue.poll(); current.setFailure(findFailureNode(current)); queue.addAll(current.getAllNext()); } } private TrieNode findFailureNode(TrieNode node) { TrieNode current = node.getPrevious(); Byte body = node.getBody(); if (current == root || current == null) return root; do { current = current.getFailure(); if (current.hasNext(body)) return current.getNext(body); } while (current != root); return root; } public List<Pair> search(byte[] buf) { return search(buf, 0, buf.length); } public List<Pair> search(byte[] buf, int offset, int limit) { return search(buf, offset, limit, new SearchContext()); } public List<Pair> search(byte[] buf, SearchContext ctx) { return search(buf, 0, buf.length, ctx); } public List<Pair> search(byte[] buf, int offset, int limit, SearchContext ctx) { List<Pair> result = new ArrayList<Pair>(); TrieNode node = nodes.get(ctx.getLastNodeId()); int length = ctx.getLength(); int needResultCount = ctx.getNeedResultCount(); boolean includeFailureSet = ctx.isIncludeFailurePatterns(); if (needResultCount == 0) return result; if (offset < 0) offset = 0; if (buf.length < offset) return result; if (buf.length < offset + limit) limit = buf.length - offset; int searchLimit = offset + limit; for (int i = offset; i < searchLimit; i++) { TrieNode nextNode = node.getNext(buf[i]); if (nextNode != null) { node = nextNode; for (Pattern p : node.getPatterns(includeFailureSet)) { int pos = length - offset + i - p.getKeyword().length + 1; result.add(new Pair(pos, p)); needResultCount--; if (needResultCount == 0) break; } } else { node = root; } } ctx.setLastNodeId(node.getId()); ctx.addLength(limit); ctx.addResultCount(result.size()); return result; } }