package ini.trakem2.tree;
import ini.trakem2.persistence.FSLoader;
import ini.trakem2.utils.IJError;
import ini.trakem2.utils.Utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.GZIPInputStream;
/** Reads either a DOCTYPE chunk from an xml file or a .dtd file directly. */
public class DTDParser {
private DTDParser() {}
/** Extracts the template by reading the ELEMENT and ATTLIST tags from a .dtd file or the DOCTYPE of an .xml file. */
static public TemplateThing[] extractTemplate(String path) throws Exception {
if (path.matches(".*(\\.xml|\\.xml\\.gz)")) return parseXMLFile(path);
if (path.length() -4 == path.lastIndexOf(".dtd")) return parseDTDFile(path);
return null;
}
/** Parses the tags of a .dtd file. Returns the TemplateThing roots. */
static public TemplateThing[] parseDTDFile(String dtd_path) throws Exception {
// fetch file
BufferedReader dis = null;
final StringBuilder data = new StringBuilder();
try {
InputStream i_stream;
if (FSLoader.isURL(dtd_path)) {
i_stream = new java.net.URL(dtd_path).openStream();
} else {
File f = new File(dtd_path);
if (!f.exists()) return null;
i_stream = new FileInputStream(dtd_path);
}
dis = new BufferedReader(new InputStreamReader(i_stream));
String tmp;
while (null != (tmp = dis.readLine())) {
data.append(tmp);
}
} catch (Exception e) {
IJError.print(e);
} finally {
try {
dis.close();
} catch (Exception e) { IJError.print(e); }
}
return parseDTD(data);
}
/** Parses a !DOCTYPE chunk from an .xml file, if any. Returns the TemplateThing roots. Assumes there is only one continuous DOCTYPE clause and the root template thing, the layer_set and the display are part of the project tag. */
static public TemplateThing[] parseXMLFile(String xml_path) throws Exception {
// fetch file
BufferedReader dis = null;
final StringBuilder data = new StringBuilder();
try {
InputStream i_stream;
if (FSLoader.isURL(xml_path)) {
i_stream = new java.net.URL(xml_path).openStream();
} else {
File f = new File(xml_path);
if (!f.exists()) return null;
i_stream = new FileInputStream(xml_path);
}
if (xml_path.endsWith(".xml.gz")) i_stream = new GZIPInputStream(i_stream);
dis = new BufferedReader(new InputStreamReader(i_stream));
String tmp;
while (null != (tmp = dis.readLine())) {
int i_doc = tmp.indexOf("<!DOCTYPE ");
if (-1 != i_doc) {
// start listening
int i_end = -1;
// advance lines until finding an opening bracket
while (null != tmp && -1 == (i_end = tmp.indexOf('['))) {
tmp = dis.readLine();
}
if (-1 == i_end) break; // oops
// found. Collect everything between both brackets
String st = tmp.substring(i_end +1).trim();
if (st.length() > 0) data.append(st);
while (null != (tmp = dis.readLine()) && -1 == (i_end = tmp.indexOf(']'))) {
data.append(tmp);
}
// get in last line
st = tmp.substring(0, i_end).trim();
if (st.length() > 0) data.append(st);
// done!
break;
}
}
} catch (Exception e) {
IJError.print(e);
} finally {
try {
dis.close();
} catch (Exception e) { IJError.print(e); }
}
if (0 == data.length()) return null;
return parseDTD(data);
}
static private class Attribute {
String type, name; //, a1=null, a2=null;
Attribute(String chunk) {
chunk = Utils.cleanString(chunk);
String[] words = chunk.split(" ");
this.type = words[0];
this.name = words[1];
//if (words.length > 2) this.a1 = words[2];
//if (words.length > 3) this.a2 = words[3];
if (words.length > 4) Utils.log("WARNING: ignoring past the 4th word in the DTD: " + words[4] + " ... ");
}
public boolean equals(Object ob) {
if (ob instanceof Attribute && ((Attribute)ob).name.equals(this.name)) {
return true;
}
return false;
}
}
static private class Type {
String name;
String[] children = null;
String[] limits = null;
/** Parses itself out of a chunk of text between '<' and '>'. */
Type(String chunk) {
chunk = Utils.cleanString(chunk);
// first word is the type
int i = chunk.indexOf(' ');
this.name = chunk.substring(0, i).toLowerCase(); // types are ALWAYS lowercase. I need no more headaches.
// remove prepended tag if any
chunk = chunk.substring(i+1);
i = chunk.indexOf('(');
if (-1 == i) return; // contains an EMPTY
int i_end = chunk.lastIndexOf(')');
chunk = chunk.substring(i+1, i_end); //capturing contents of parenthesis
chunk = chunk.replaceAll(" ", ""); //no spaces allowed inside the parenthesis
this.children = chunk.split(",");
this.limits = new String[children.length];
for (i=0; i<children.length; i++) {
char c = children[i].charAt(children[i].length() - 1);
switch (c) {
case '?': // optional
case '*': // zero or more
case '+': // one or more
limits[i] = Character.toString(c);
children[i] = children[i].substring(0, children[i].length() -1);
break;
default:
limits[i] = null;
break;
}
// print children
//Utils.log("parent " + this.name + " has child : __" + children[i] + "__");
}
}
/*
boolean containsChild(String type) {
if (null == children) return false;
for (int i=0; i<children.length; i++) {
if (children[i].equals(type)) return true;
}
return false;
}
*/
/** Recursive, but avoids adding children to nested types. The table ht_attributes contains type names as keys, and hashtables of attributes as values. */
void createChildren(final TemplateThing parent, final Map<String,DTDParser.Type> ht_types) {
// create children for it, unless nested
if (!parent.isNested() && null != children) {
for (int k=0; k<children.length; k++) {
Type ty = (Type)ht_types.get(children[k]);
if (null == ty) {
Utils.log2("DTDParser: ignoring " + children[k]);
continue;
}
// remove prepended tag if any
String tyn = ty.name;
if (0 == tyn.indexOf("t2_")) {
tyn = tyn.substring(3);
}
TemplateThing child = new TemplateThing(tyn);
//Utils.log2("DTDParser: created TT " + tyn);
parent.addChild(child);
ty.createChildren(child, ht_types);
}
}
}
}
static private class TypeNode {
private TypeNode parent;
private Set<TypeNode> children = new HashSet<TypeNode>();
private String name;
TypeNode(final String name) {
this.name = name;
}
TypeNode addChild(String cn) {
TypeNode child = new TypeNode(cn);
child.parent = this;
children.add(child);
return child;
}
}
/** A method to check whether a type is internal to TrakEM2 and should be ignored for a template. */
static private boolean isAllowed(String type) {
/*
// ignore meta
if (0 == type.length()) return false;
char c = type.charAt(type.length() -1);
switch (c) {
case '*':
case '+':
case '?':
type = type.substring(0, type.length() -1);
break;
}
*/
/*
if (type.equals("layer")
|| type.equals("layer_set")
|| type.equals("label")
|| type.equals("pipe")
|| type.equals("profile")
|| type.equals("ball")
|| type.equals("ball_ob")
|| type.equals("patch")
|| type.equals("display")
|| type.equals("project")
|| type.equals("trakem2")
) return false;
*/
if (0 == type.indexOf("t2_")
|| type.equals("trakem2")
|| type.equals("project")
|| 0 == type.indexOf("ict_transform")
) return false;
return true;
}
/** Parses a chunk of text into a hierarchy of TemplateThing instances, the roots of which are in the returned array. */
static public TemplateThing[] parseDTD(final StringBuilder data) throws Exception {
// debug:
// Utils.log(data.toString());
// extract all tags into a hashtable of type names
final HashMap<String,DTDParser.Type> ht_types = new HashMap<String,DTDParser.Type>();
final List<DTDParser.Type> types = new ArrayList<DTDParser.Type>(); // sequential, as found in the DTD file
final HashMap<String,Map<String,Attribute>> ht_attributes = new HashMap<String,Map<String,Attribute>>();
final String text = data.toString();
int i_first = text.indexOf('<');
int i_last = text.indexOf('>');
int i_space;
String root_type_name = null;
while (-1 != i_first && -1 != i_last) {
// sanity check:
if (i_last < i_first) {
Utils.showMessage("Unbalanced '<' and '>' in the DTD document.");
return null;
}
String chunk = text.substring(i_first +1, i_last);
i_space = chunk.indexOf(' ');
if (chunk.startsWith("!ELEMENT")) {
DTDParser.Type type = new DTDParser.Type(chunk.substring(i_space +1));
if (isAllowed(type.name)) {
ht_types.put(type.name, type);
types.add(type);
} else if (type.name.equals("project")) {
if (null != root_type_name) {
throw new Exception("ERROR in XML file: more than one project template element defined:\n At least: " + root_type_name + " and " + type.name);
}
// the root is what the project has in parentheses, which must only be one element
// (given that the TemplateTree has a single root)
int openp = chunk.indexOf('(');
if (-1 == openp) {
throw new Exception("ERROR in XML file: project template doesn't have a child element!");
}
int closep = chunk.indexOf(')', openp +1);
root_type_name = chunk.substring(openp+1, closep).trim();
if (-1 != root_type_name.indexOf(',')) {
throw new Exception("ERROR in XML file: project template has more than one child element!");
}
}
} else if (chunk.startsWith("!ATTLIST")) {
DTDParser.Attribute attr = new DTDParser.Attribute(chunk.substring(i_space +1));
if (isAllowed(attr.type)) {
Map<String,Attribute> oht = ht_attributes.get(attr.type);
if (null == oht) {
//Utils.log2("at 1 for " + attr.type + " " + attr.name);
oht = new HashMap<String,Attribute>();
ht_attributes.put(attr.type, oht);
}
if (oht.containsKey(attr.name)) {
Utils.log("Parsing DTD: already have attribute " + attr.name + " for type " + attr.type);
} else {
//Utils.log2("at 2 for " + attr.type + " " + attr.name);
oht.put(attr.name, attr);
}
}
} // else ignore
i_first = text.indexOf('<', i_last +1);
i_last = text.indexOf('>', i_last +1);
}
// Now traverse the hash tables and reconstruct the hierarchy of TemplateThing.
if (null == root_type_name) {
// Can happen when reading a .dtd file instead of extracting the dtd from an XML file
// Reconstruct the tree, as is sequentially specified in the DTD:
final Map<String,TypeNode> nodes = new HashMap<String,TypeNode>(); // a Map of the last created node with that name (there could be more than one, so NOT all TypeNode instances will be contained in the Map).
final List<TypeNode> seqnodes = new ArrayList<TypeNode>(); // sequential, as found in dtd file
for (final DTDParser.Type type : types) {
TypeNode tn = nodes.get(type.name);
if (null == tn) {
// Create a new node with, for now, a null parent
tn = new TypeNode(type.name);
nodes.put(type.name, tn);
// Add it as an ELEMENT declaration
seqnodes.add(tn);
}
if (tn.children.isEmpty() && null != type.children) {
for (final String child : type.children) {
nodes.put(child, tn.addChild(child));
}
}
}
for (final TypeNode node : seqnodes) {
if (null == node.parent) {
if (null != root_type_name) {
Utils.log("WARNING found second DTD root: " + node.name);
} else {
Utils.log2("Found DTD root: " + node.name);
root_type_name = node.name;
}
}
}
}
if (null == root_type_name) {
throw new Exception("ERROR in XML file: could not find the root element!");
}
// find root_type as a Type instance
DTDParser.Type root_type = ht_types.get(root_type_name);
if (null == root_type) {
throw new Exception("ERROR in XML file: could not find the root element DTDParser.Type instance!");
}
// The root is the one and only element of the project node
TemplateThing root = new TemplateThing(root_type_name);
root_type.createChildren(root, ht_types); // avoids nested
return new TemplateThing[]{root};
}
static public void main(String[] args) {
try {
if (args[0].length() -4 == args[0].indexOf(".xml")) {
DTDParser.parseXMLFile(args[0]);
} else {
DTDParser.parseDTDFile(args[0]);
}
} catch (Exception e) { IJError.print(e); }
}
}