/* * Copyright (c) 2010, IETR/INSA of Rennes * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * Neither the name of the IETR/INSA of Rennes nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ package net.sf.orcc.tools.sequitur; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; /** * This class defines the Sequitur algorithm. * * @author Matthieu Wipliez * */ public class Sequitur { public static void main(String[] args) { Sequitur seq = new Sequitur(); List<Character> terminals = new ArrayList<Character>(); // final String pattern = // "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc"; final String pattern = "abbbbbbbbbbccccccccbccccccccbccccccccbccccccccbccccccccbccccccccbccccccccbd"; // final String pattern = // "aaaaaaaaaaaaaaaabcccccccccccccccccccccccccccccccc"; for (int i = 0; i < 1; i++) { for (char c : pattern.toCharArray()) { terminals.add(c); } } seq.getSRule(terminals); } private Map<Digram, Symbol> digrams; private int ruleIndex; private Rule s; /** * Creates a new Sequitur algorithm. */ public Sequitur() { digrams = new HashMap<Digram, Symbol>(); ruleIndex = 1; } /** * Creates a new rule to represent the given digram. The given symbol and * its successor are replaced by a reference to the newly-created rule. The * penultimate and last symbols are also replaced by a reference to the * newly-created rule. * * @param symbol * symbol where the digram occurs * @param digram * a digram */ private void createNewRule(Symbol symbol, Digram digram) { Rule newRule = new Rule(String.valueOf(ruleIndex), digram); ruleIndex++; // removes digram located at symbol, and references rule NonTerminalSymbol s1 = new NonTerminalSymbol(newRule); replaceDigram(symbol, s1); // append reference to rule NonTerminalSymbol s2 = new NonTerminalSymbol(newRule); replaceDigram(digram.getS1(), s2); } /** * Enforces the digram utility constraint. * * @param digram * a digram whose symbols are newly-linked * @param s1 * first symbol that references the digram */ private void enforceDigramUtility(Digram digram, Symbol s1) { if (digram.getS1().isGuard() || digram.getS2().isGuard()) { return; } Symbol symbol = digrams.get(digram); if (symbol == null) { // digram is not repeated elsewhere digrams.put(digram, s1); } else if (symbol.getNext() != digram.getS1()) { // the new digram is repeated elsewhere and the repetitions do not // overlap Symbol g1 = symbol.getPrevious(); Symbol g2 = symbol.getNext().getNext(); if (g1 == g2) { // there is a rule that contains only this digram Rule rule = ((GuardSymbol) g1).getRule(); replaceDigram(s1, new NonTerminalSymbol(rule)); } else { // create a new rule createNewRule(symbol, digram); } } } /** * Enforces the rule utility constraint when the digram located at the given * symbol <code>s1</code> is replaced by the given non-terminal symbol. * * @param s1 * first symbol of the digram replaced * @param ntSymbol * a non-terminal symbol */ private void enforceRuleUtility(Symbol s1, NonTerminalSymbol ntSymbol) { Symbol s2 = s1.getNext(); if (s1.isNonTerminal()) { trySubstitute((NonTerminalSymbol) s1, ntSymbol); } else if (s2.isNonTerminal()) { trySubstitute((NonTerminalSymbol) s2, ntSymbol); } } public Rule getSRule(List<?> terminals) { s = new Rule("s"); Iterator<?> it = terminals.iterator(); if (it.hasNext()) { Symbol penultimate = new TerminalSymbol(it.next()); s.append(penultimate); while (it.hasNext()) { Symbol last = new TerminalSymbol(it.next()); s.append(last); // a link is made between penultimate and last enforceDigramUtility(new Digram(penultimate, last), penultimate); penultimate = s.getLast(); } } System.out.println(this); return s; } /** * Replaces the digram located at the given symbol <code>s1</code> by the * given new symbol. * * @param s1 * first symbol of the digram to replace * @param ntSymbol * a non-terminal symbol */ private void replaceDigram(Symbol s1, NonTerminalSymbol ntSymbol) { Symbol before = s1.getPrevious(); Symbol s2 = s1.getNext(); Symbol after = s2.getNext(); ntSymbol.insertBetween(before, after); // if either symbol is a non-terminal, decrements the reference count of // the rule it references if (s1.isNonTerminal()) { ((NonTerminalSymbol) s1).getRule().decrementReferenceCount(); } if (s2.isNonTerminal()) { ((NonTerminalSymbol) s2).getRule().decrementReferenceCount(); } // removes old digrams digrams.remove(new Digram(before, s1)); digrams.remove(new Digram(s2, after)); // adds new digrams around the new symbol enforceDigramUtility(new Digram(before, ntSymbol), before); enforceDigramUtility(new Digram(ntSymbol, after), ntSymbol); // replaces the digram by a reference to the new symbol // does not enforce digram utility, because we just did that Symbol reference = ntSymbol.getRule().getFirst(); digrams.put(new Digram(s1, s2), reference); // because a digram is replaced by a non-terminal symbol enforceRuleUtility(s1, ntSymbol); } @Override public String toString() { String res = ""; for (Entry<String, Rule> entry : Rule.rules.entrySet()) { res += entry.getValue() + "\n"; } return res; } private void trySubstitute(NonTerminalSymbol ntSymbol, NonTerminalSymbol other) { Rule rule = ntSymbol.getRule(); if (rule.isReferencedOnce()) { Symbol symbol = other.getRule().getFirst(); while (!symbol.equals(ntSymbol)) { symbol = symbol.getNext(); } Symbol before = symbol.getPrevious(); Symbol after = symbol.getNext(); Symbol ruleStart = rule.getFirst(); Symbol ruleEnd = rule.getLast().getNext(); symbol = ruleStart; while (symbol != ruleEnd) { Symbol next = symbol.getNext(); symbol.insertBetween(before, after); before = symbol; symbol = next; } rule.delete(); toString(); } } }