/** * Packer version 3.0 (final) * Copyright 2004-2007, Dean Edwards * Web: {@link http://dean.edwards.name/} * * This software is licensed under the MIT license * Web: {@link http://www.opensource.org/licenses/mit-license} * * Ported to Java by Pablo Santiago based on C# version by Jesse Hansen, <twindagger2k @ msn.com> * Web: {@link http://jpacker.googlecode.com/} * Email: <pablo.santiago @ gmail.com> */ package com.jpacker; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Wrapper class for a {@link JPackerWord} list built based on script's keywords (later * wrapped into a JPackerWord list) * * @author Pablo Santiago <pablo.santiago @ gmail.com> */ public final class JPackerWords { private JPackerEncoding encoding; private static final Pattern WORDS = Pattern.compile("\\w+"); private List<JPackerWord> words = new ArrayList<JPackerWord>(); /** * Constructor * * @param script * The input script to look up for keywords * @param encoding * The encoding level to use */ public JPackerWords(String script, JPackerEncoding encoding) { this.encoding = encoding; Matcher matcher = WORDS.matcher(script); while (matcher.find()) { add(new JPackerWord(matcher.group())); } encode(); } private void add(JPackerWord word) { if (!words.contains(word)) { words.add(word); } JPackerWord w = find(word); w.setCount(w.getCount() + 1); } private void encode() { // sort by frequency Collections.sort(words, new Comparator<JPackerWord>() { @Override public int compare(JPackerWord x, JPackerWord y) { return y.getCount() - x.getCount(); } }); // a dictionary of encoding base -> base10 Map<String, Integer> encoded = new HashMap<String, Integer>(); for (int i = 0; i < words.size(); i++) { encoded.put(encoding.getEncoder().encode(i), i); } int index = 0; for (JPackerWord word : words) { if (encoded.containsKey(word.getWord())) { word.setIndex(encoded.get(word.getWord())); word.setReplacement(""); } else { while (words.contains(new JPackerWord(encoding.getEncoder().encode(index)))) { index++; } word.setIndex(index++); word.setReplacement(word.getWord()); } word.setEncoded(encoding.getEncoder().encode(word.getIndex())); } // sort by encoding Collections.sort(words, new Comparator<JPackerWord>() { @Override public int compare(JPackerWord x, JPackerWord y) { return x.getIndex() - y.getIndex(); } }); } /** * Find a word in the JPackerWord list * * @param word * The JPackerWord object to find in the list * @return The JPackerWord object if found, null otherwise */ public JPackerWord find(JPackerWord word) { Iterator<JPackerWord> it = words.iterator(); while (it.hasNext() == true) { JPackerWord pw = it.next(); if (pw.equals(word)) { return pw; } } return null; } /** * Gets the list of JPackerWord objects * * @return The list of JPackerWord objects */ public List<JPackerWord> getWords() { return words; } /** * This method has been overridden to return the list of JPackerWord objects * as a single String object separated by the '|' character * * @return A List of JPackerWord objects as a single String object separated * by the '|' character */ @Override public String toString() { StringBuilder sb = new StringBuilder(); for (JPackerWord word : words) { sb.append(word.getReplacement()).append('|'); } sb.deleteCharAt(sb.length() - 1); return sb.toString(); } }