/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package automenta.netention.app;
import automenta.netention.demo.Demo;
import automenta.netention.swing.util.SwingWindow;
import com.syncleus.dann.math.statistics.SimpleMarkovChain;
import com.syncleus.dann.math.statistics.SimpleMarkovChainEvidence;
import java.awt.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.util.List;
import java.util.*;
import javax.swing.*;
/**
*
* @author seh
*/
public class RunTextCutup extends JPanel implements Demo {
private final JTextArea inputArea;
private final JTextArea outputArea;
private Map<String, Integer> knownCount;
private Set<String> known;
//int maxKnown = 10;
//int order = 2;
//double orderWeights[] = { 1, 0.5, 0.25, 0.15, 0.05, 0.01 };
final int initialMaxKnown = 1024;
Font textFont = new Font("Arial", Font.PLAIN, 24);
public RunTextCutup() {
super(new BorderLayout(4, 4));
inputArea = new JTextArea();
inputArea.setLineWrap(true);
inputArea.setWrapStyleWord(true);
inputArea.setFont(textFont);
outputArea = new JTextArea();
outputArea.setLineWrap(true);
outputArea.setWrapStyleWord(true);
outputArea.setFont(textFont);
JPanel menuPanel = new JPanel(new GridBagLayout());
{
GridBagConstraints gc = new GridBagConstraints();
gc.gridx = 1;
gc.gridy = 1;
gc.fill = gc.VERTICAL;
gc.weightx = 0;
gc.weighty = 1.0;
gc.insets = new Insets(0,4,0,4);
final JTextField knownField = new JTextField(Integer.toString(initialMaxKnown));
knownField.setColumns(6);
menuPanel.add(knownField, gc);
gc.gridx++;
final JComboBox orderField = new JComboBox();
orderField.addItem("1st-Order*");
orderField.addItem("2nd-Order");
orderField.addItem("3rd-Order");
orderField.addItem("4th-Order");
menuPanel.add(orderField, gc);
gc.gridx++;
JButton run = new JButton("Run");
run.addActionListener(new ActionListener() {
@Override public void actionPerformed(ActionEvent e) {
final int order = orderField.getSelectedIndex() + 1;
final int maxSymbols = Integer.decode(knownField.getText());
SwingUtilities.invokeLater(new Runnable() {
@Override public void run() {
RunTextCutup.this.run(maxSymbols, order);
}
});
}
});
menuPanel.add(run, gc);
}
JPanel center = new JPanel(new GridLayout(2, 1));
center.add(new JScrollPane(inputArea));
center.add(new JScrollPane(outputArea));
add(center, BorderLayout.CENTER);
add(menuPanel, BorderLayout.SOUTH);
}
public List<String> getTokens() {
String input = inputArea.getText();
List<String> s = new LinkedList();
StringTokenizer st = new StringTokenizer(input, " ,.!?;-()[]{}/:@\n", true);
while (st.hasMoreTokens()) {
String w = st.nextToken();
if (!w.equals("\n")) {
w = w.trim();
}
if (w.length() > 0) {
w = w.toLowerCase();
s.add(w);
}
}
return s;
}
public List<String> getSequence(List<String> tokens) {
String input = inputArea.getText();
List<String> s = new LinkedList();
StringTokenizer st = new StringTokenizer(input, " ,.!?;-()[]{}/:@\n", true);
for (String w : tokens) {
if (isKnown(w)) {
s.add(w);
}
}
return s;
}
public boolean isKnown(String symbol) {
return known.contains(symbol);
}
public static boolean isEntirelyPunctuation(String w) {
for (char c : w.toCharArray()) {
if (Character.isLetterOrDigit(c)) {
return false;
}
}
return true;
}
protected void run(int maxSymbols, int order) {
SimpleMarkovChainEvidence<String> sme = new SimpleMarkovChainEvidence<String>(true, order);
knownCount = new HashMap();
List<String> tokens = getTokens();
for (String s : tokens) {
Integer i = knownCount.get(s);
if (i == null) {
knownCount.put(s, 1);
} else {
knownCount.put(s, i + 1);
}
}
List<String> sortedKnown = new ArrayList<String>(knownCount.keySet());
Collections.sort(sortedKnown, new Comparator<String>() {
@Override public int compare(String o1, String o2) {
double f1 = getStrength(o1);
double f2 = getStrength(o2);
if (f1 > f2) {
return -1;
} else if (f1 < f2) {
return 1;
} else {
return 0;
}
}
});
int n = Math.min(sortedKnown.size(), maxSymbols);
known = new HashSet(sortedKnown.subList(0, n));
long learnStart = System.nanoTime();
List<String> seq = getSequence(tokens);
for (String s : seq) {
sme.learnStep(s);
//if (s.equals(".") || (s.equals("?")) || (s.equals("!")))
//sme.newChain();
}
double learnTime = (System.nanoTime() - learnStart)/1e9;
System.out.println("Learning: " + learnTime + " #: " + seq.size());
String output = "";
try {
long markovStart = System.nanoTime();
//TODO the cast will be unnecessary when MarkovChain supports weighted transition generation
SimpleMarkovChain<String> mc = (SimpleMarkovChain)sme.getMarkovChain();
double markovChainGenerationTime = (System.nanoTime() - markovStart)/1e9;
System.out.println("Markov Chain: " + markovChainGenerationTime);
//System.out.println(mc.getTransitionProbabilityMatrix());
System.out.println("#" + mc.getStates().size());
for (int i = 0; i < 700; i++) {
String nextSymbol = mc.generateTransition();
if (nextSymbol.equals(" ")) {
output += " ";
} else {
output += nextSymbol + " ";
}
}
} catch (Exception e) {
output += "\n" + e.toString();
e.printStackTrace();
}
outputArea.setText(output);
}
public double getStrength(String symbol) {
int count = knownCount.get(symbol);
double sizeBoost = symbol.length() > 4 ? 2.0 : 1.0;
return count * sizeBoost;
}
@Override
public JPanel newPanel() {
return this;
}
@Override
public String getName() {
return "Text Cut-Up";
}
@Override
public String getDescription() {
return "..";
}
public static void main(String[] args) {
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
try {
UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
} catch (Exception ex) {
System.err.println(ex);
}
SwingWindow window = new SwingWindow(new RunTextCutup().newPanel(), 900, 800, true);
}
});
}
}