/*
* Copyright 2004-2010 Information & Software Engineering Group (188/1)
* Institute of Software Technology and Interactive Systems
* Vienna University of Technology, Austria
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.ifs.tuwien.ac.at/dm/somtoolbox/license.html
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package at.tuwien.ifs.somtoolbox.apps.helper;
import java.io.IOException;
import java.util.ArrayList;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import com.martiansoftware.jsap.JSAPResult;
import at.tuwien.ifs.somtoolbox.apps.config.OptionFactory;
import at.tuwien.ifs.somtoolbox.input.SOMLibFileFormatException;
import at.tuwien.ifs.somtoolbox.layers.Layer.GridLayout;
import at.tuwien.ifs.somtoolbox.layers.Layer.GridTopology;
import at.tuwien.ifs.somtoolbox.output.SOMLibMapOutputter;
import at.tuwien.ifs.somtoolbox.properties.PropertiesException;
/**
* Reads a HTML representation of a SOM, and writes SOMLib unit and weight files for it. Tested with
* http://www.ifs.tuwien.ac.at/~andi/somlib/data/time60/times_000_2.html, might not work for newer HTML representations.
*
* @author Rudolf Mayer
* @version $Id: HTMLMapReader.java 3589 2010-05-21 10:42:01Z mayer $
*/
public class HTMLMapReader {
public static void main(String[] args) throws ParserException, IOException, SOMLibFileFormatException,
PropertiesException {
// register and parse all options
JSAPResult config = OptionFactory.parseResults(args, OptionFactory.getOptInputFileName(true),
OptionFactory.getOptOutputFileName(true), OptionFactory.getOptFileNamePrefix(false),
OptionFactory.getOptFileNameSuffix(false));
String inputFile = config.getString("inputFile");
String outputFileName = config.getString("output");
String prefix = config.getString("fileNamePrefix", "");
String suffix = config.getString("fileNameSuffix", "");
Parser parser = new Parser();
parser.setResource(inputFile);
final NodeList parse = parser.parse(new TagNameFilter("table"));
ArrayList<ArrayList<ArrayList<String>>> inputs = new ArrayList<ArrayList<ArrayList<String>>>();
Node table = parse.elementAt(0);
int numVectors = 0;
final NodeList trs = table.getChildren();
for (int i = 0; i < trs.size(); i++) {
final Node node = trs.elementAt(i);
if (node instanceof TableRow && node.getChildren() != null) {
final ArrayList<ArrayList<String>> row = new ArrayList<ArrayList<String>>();
inputs.add(row);
final NodeList tds = node.getChildren();
for (int j = 0; tds != null && j < tds.size(); j++) {
final Node node2 = tds.elementAt(j);
if (node2 instanceof TableColumn) {
final ArrayList<String> cell = new ArrayList<String>();
row.add(cell);
// System.out.println(node2.getClass());
// System.out.println(node2);
final NodeList hrefs = node2.getChildren();
for (int k = 0; hrefs != null && k < hrefs.size(); k++) {
final Node node3 = hrefs.elementAt(k);
if (node3 instanceof LinkTag) {
// System.out.println(((LinkTag) node3).getLinkText());
cell.add(prefix + ((LinkTag) node3).getLinkText() + suffix);
numVectors++;
}
}
}
}
}
}
String[][][] labels = new String[inputs.get(0).size()][inputs.size()][];
System.out.println("Map size: " + labels.length + "x" + labels[0].length);
double[][][][] weights = new double[inputs.get(0).size()][inputs.size()][1][];
for (int i = 0; i < labels.length; i++) {
for (int j = 0; j < labels[i].length; j++) {
ArrayList<String> l = inputs.get(j).get(i);
labels[i][j] = l.toArray(new String[l.size()]);
weights[i][j][0] = new double[] { 1, 2 }; // some dummy weightvector...
}
}
String fDir = ".";
SOMLibMapOutputter.writeUnitDescriptionFile(labels, GridLayout.rectangular, GridTopology.planar, fDir,
outputFileName, false);
SOMLibMapOutputter.writeWeightVectorFile(weights, GridLayout.rectangular, GridTopology.planar, fDir,
outputFileName, false);
SOMLibMapOutputter.writeMapDescriptionFile(labels.length, labels[0].length, 1, weights[0][0][0].length,
numVectors, fDir, outputFileName);
}
}