DTDParser.java example

Explorer
TrakEM2-master

package ini.trakem2.tree;

import ini.trakem2.persistence.FSLoader;
import ini.trakem2.utils.IJError;
import ini.trakem2.utils.Utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.GZIPInputStream;

/** Reads either a DOCTYPE chunk from an xml file or a .dtd file directly. */
public class DTDParser {

	private DTDParser() {}

	/** Extracts the template by reading the ELEMENT and ATTLIST tags from a .dtd file or the DOCTYPE of an .xml file. */
	static public TemplateThing[] extractTemplate(String path) throws Exception {
		if (path.matches(".*(\\.xml|\\.xml\\.gz)")) return parseXMLFile(path);
		if (path.length() -4 == path.lastIndexOf(".dtd")) return parseDTDFile(path);
		return null;
	}

	/** Parses the tags of a .dtd file. Returns the TemplateThing roots. */
	static public TemplateThing[] parseDTDFile(String dtd_path) throws Exception {
		// fetch file
		BufferedReader dis = null;
		final StringBuilder data = new StringBuilder();
		try {
			InputStream i_stream;
			if (FSLoader.isURL(dtd_path)) {
				i_stream = new java.net.URL(dtd_path).openStream();
			} else {
				File f = new File(dtd_path);
				if (!f.exists()) return null;
				i_stream = new FileInputStream(dtd_path);
			}
			dis = new BufferedReader(new InputStreamReader(i_stream));
			String tmp;
			while (null != (tmp = dis.readLine())) {
				data.append(tmp);
			}
		} catch (Exception e) {
			IJError.print(e);
		} finally {
			try {
				dis.close();
			} catch (Exception e) { IJError.print(e); }
		}

		return parseDTD(data);
	}

	/** Parses a !DOCTYPE chunk from an .xml file, if any. Returns the TemplateThing roots. Assumes there is only one continuous DOCTYPE clause and the root template thing, the layer_set and the display are part of the project tag. */
	static public TemplateThing[] parseXMLFile(String xml_path) throws Exception {
		// fetch file
		BufferedReader dis = null;
		final StringBuilder data = new StringBuilder();
		try {
			InputStream i_stream;
			if (FSLoader.isURL(xml_path)) {
				i_stream = new java.net.URL(xml_path).openStream();
			} else {
				File f = new File(xml_path);
				if (!f.exists()) return null;
				i_stream = new FileInputStream(xml_path);
			}
			if (xml_path.endsWith(".xml.gz")) i_stream = new GZIPInputStream(i_stream);
			dis = new BufferedReader(new InputStreamReader(i_stream));
			String tmp;
			while (null != (tmp = dis.readLine())) {
				int i_doc = tmp.indexOf("<!DOCTYPE ");
				if (-1 != i_doc) {
					// start listening
					int i_end = -1;
					// advance lines until finding an opening bracket
					while (null != tmp && -1 == (i_end = tmp.indexOf('['))) {
						tmp = dis.readLine();
					}
					if (-1 == i_end) break; // oops
					// found. Collect everything between both brackets
					String st = tmp.substring(i_end +1).trim();
					if (st.length() > 0) data.append(st);
					while (null != (tmp = dis.readLine()) && -1 == (i_end = tmp.indexOf(']'))) {
						data.append(tmp);
					}
					// get in last line
					st = tmp.substring(0, i_end).trim();
					if (st.length() > 0) data.append(st);
					// done!
					break;
				}
			}
		} catch (Exception e) {
			IJError.print(e);
		} finally {
			try {
				dis.close();
			} catch (Exception e) { IJError.print(e); }
		}

		if (0 == data.length()) return null;

		return parseDTD(data);
	}

	static private class Attribute {
		String type, name; //, a1=null, a2=null;
		Attribute(String chunk) {
			chunk = Utils.cleanString(chunk);
			String[] words = chunk.split(" ");
			this.type = words[0];
			this.name = words[1];
			//if (words.length > 2) this.a1 = words[2];
			//if (words.length > 3) this.a2 = words[3];
			if (words.length > 4) Utils.log("WARNING: ignoring past the 4th word in the DTD: " + words[4] + " ... ");
		}
		public boolean equals(Object ob) {
			if (ob instanceof Attribute && ((Attribute)ob).name.equals(this.name)) {
				return true;
			}
			return false;
		}
	}

	static private class Type {
		String name;
		String[] children = null;
		String[] limits = null;

		/** Parses itself out of a chunk of text between '<' and '>'. */
		Type(String chunk) {
			chunk = Utils.cleanString(chunk);
			// first word is the type
			int i = chunk.indexOf(' ');
			this.name = chunk.substring(0, i).toLowerCase(); // types are ALWAYS lowercase. I need no more headaches.
			// remove prepended tag if any
			chunk = chunk.substring(i+1);
			i = chunk.indexOf('(');
			if (-1 == i) return; // contains an EMPTY
			int i_end = chunk.lastIndexOf(')');
			chunk = chunk.substring(i+1, i_end); //capturing contents of parenthesis
			chunk = chunk.replaceAll(" ", ""); //no spaces allowed inside the parenthesis
			this.children = chunk.split(",");
			this.limits = new String[children.length];
			for (i=0; i<children.length; i++) {
				char c = children[i].charAt(children[i].length() - 1);
				switch (c) {
					case '?': // optional
					case '*': // zero or more
					case '+': // one or more
						limits[i] = Character.toString(c);
						children[i] = children[i].substring(0, children[i].length() -1);
						break;
					default:
						limits[i] = null;
						break;
				}
				// print children
				//Utils.log("parent " + this.name + " has child : __" + children[i] + "__");
			}
		}
		/*
		boolean containsChild(String type) {
			if (null == children) return false;
			for (int i=0; i<children.length; i++) {
				if (children[i].equals(type)) return true;
			}
			return false;
		}
		*/
		/** Recursive, but avoids adding children to nested types. The table ht_attributes contains type names as keys, and hashtables of attributes as values. */
		void createChildren(final TemplateThing parent, final Map<String,DTDParser.Type> ht_types) {

			// create children for it, unless nested
			if (!parent.isNested() && null != children) {
				for (int k=0; k<children.length; k++) {
					Type ty = (Type)ht_types.get(children[k]);
					if (null == ty) {
						Utils.log2("DTDParser: ignoring " + children[k]);
						continue;
					}
					// remove prepended tag if any
					String tyn = ty.name;
					if (0 == tyn.indexOf("t2_")) {
						tyn = tyn.substring(3);
					}
					TemplateThing child = new TemplateThing(tyn);
					//Utils.log2("DTDParser: created TT " + tyn);
					parent.addChild(child);
					ty.createChildren(child, ht_types);
				}
			}
		}
	}

	static private class TypeNode {
		private TypeNode parent;
		private Set<TypeNode> children = new HashSet<TypeNode>();
		private String name;
		TypeNode(final String name) {
			this.name = name;
		}
		TypeNode addChild(String cn) {
			TypeNode child = new TypeNode(cn);
			child.parent = this;
			children.add(child);
			return child;
		}
	}

	/** A method to check whether a type is internal to TrakEM2 and should be ignored for a template. */
	static private boolean isAllowed(String type) {
		/*
		// ignore meta
		if (0 == type.length()) return false;
		char c = type.charAt(type.length() -1);
		switch (c) {
			case '*':
			case '+':
			case '?':
				type = type.substring(0, type.length() -1);
				break;
		}
		*/
		/*
		if (type.equals("layer")
		 || type.equals("layer_set")
		 || type.equals("label")
		 || type.equals("pipe")
		 || type.equals("profile")
		 || type.equals("ball")
		 || type.equals("ball_ob")
		 || type.equals("patch")
		 || type.equals("display")
		 || type.equals("project")
		 || type.equals("trakem2")
		) return false;
		*/
		if (0 == type.indexOf("t2_")
		 || type.equals("trakem2")
		 || type.equals("project")
		 || 0 == type.indexOf("ict_transform")
		) return false;
		
		return true;
	}

	/** Parses a chunk of text into a hierarchy of TemplateThing instances, the roots of which are in the returned array. */
	static public TemplateThing[] parseDTD(final StringBuilder data) throws Exception {
		// debug:
		// Utils.log(data.toString());

		// extract all tags into a hashtable of type names
		final HashMap<String,DTDParser.Type> ht_types = new HashMap<String,DTDParser.Type>();
		final List<DTDParser.Type> types = new ArrayList<DTDParser.Type>(); // sequential, as found in the DTD file
		final HashMap<String,Map<String,Attribute>> ht_attributes = new HashMap<String,Map<String,Attribute>>();
		final String text = data.toString();
		int i_first = text.indexOf('<');
		int i_last = text.indexOf('>');
		int i_space;
		String root_type_name = null;

		while (-1 != i_first && -1 != i_last) {
			// sanity check:
			if (i_last < i_first) {
				Utils.showMessage("Unbalanced '<' and '>' in the DTD document.");
				return null;
			}
			String chunk = text.substring(i_first +1, i_last);
			i_space = chunk.indexOf(' ');
			if (chunk.startsWith("!ELEMENT")) {
				DTDParser.Type type = new DTDParser.Type(chunk.substring(i_space +1));
				if (isAllowed(type.name)) {
					ht_types.put(type.name, type);
					types.add(type);
				} else if (type.name.equals("project")) {
					if (null != root_type_name) {
						throw new Exception("ERROR in XML file: more than one project template element defined:\n   At least: " + root_type_name + " and " + type.name);
					}
					// the root is what the project has in parentheses, which must only be one element
					// (given that the TemplateTree has a single root)
					int openp = chunk.indexOf('(');
					if (-1 == openp) {
						throw new Exception("ERROR in XML file: project template doesn't have a child element!");
					}
					int closep = chunk.indexOf(')', openp +1);
					root_type_name = chunk.substring(openp+1, closep).trim();
					if (-1 != root_type_name.indexOf(',')) {
						throw new Exception("ERROR in XML file: project template has more than one child element!");
					}
				}
			} else if (chunk.startsWith("!ATTLIST")) {
				DTDParser.Attribute attr = new DTDParser.Attribute(chunk.substring(i_space +1));
				if (isAllowed(attr.type)) {
					Map<String,Attribute> oht = ht_attributes.get(attr.type);
					if (null == oht) {
						//Utils.log2("at 1 for " + attr.type + " " + attr.name);
						oht = new HashMap<String,Attribute>();
						ht_attributes.put(attr.type, oht);
					}
					if (oht.containsKey(attr.name)) {
						Utils.log("Parsing DTD: already have attribute " + attr.name + " for type " + attr.type);
					} else {
						//Utils.log2("at 2 for " + attr.type + " " + attr.name);
						oht.put(attr.name, attr);
					}
				}
			} // else ignore
			i_first = text.indexOf('<', i_last +1);
			i_last = text.indexOf('>', i_last +1);
		}
		// Now traverse the hash tables and reconstruct the hierarchy of TemplateThing.

		if (null == root_type_name) {
			// Can happen when reading a .dtd file instead of extracting the dtd from an XML file
			// Reconstruct the tree, as is sequentially specified in the DTD:
			final Map<String,TypeNode> nodes = new HashMap<String,TypeNode>(); // a Map of the last created node with that name (there could be more than one, so NOT all TypeNode instances will be contained in the Map).
			final List<TypeNode> seqnodes = new ArrayList<TypeNode>(); // sequential, as found in dtd file

			for (final DTDParser.Type type : types) {
				TypeNode tn = nodes.get(type.name);
				if (null == tn) {
					// Create a new node with, for now, a null parent
					tn = new TypeNode(type.name);
					nodes.put(type.name, tn);
					// Add it as an ELEMENT declaration
					seqnodes.add(tn);
				}
				if (tn.children.isEmpty() && null != type.children) {
					for (final String child : type.children) {
						nodes.put(child, tn.addChild(child));
					}
				}
			}
			for (final TypeNode node : seqnodes) {
				if (null == node.parent) {
					if (null != root_type_name) {
						Utils.log("WARNING found second DTD root: " + node.name);
					} else {
						Utils.log2("Found DTD root: " + node.name);
						root_type_name = node.name;
					}
				}
			}
		}

		if (null == root_type_name) {
			throw new Exception("ERROR in XML file: could not find the root element!");
		}

		// find root_type as a Type instance
		DTDParser.Type root_type = ht_types.get(root_type_name);
		if (null == root_type) {
			throw new Exception("ERROR in XML file: could not find the root element DTDParser.Type instance!");
		}

		// The root is the one and only element of the project node
		TemplateThing root = new TemplateThing(root_type_name);
		root_type.createChildren(root, ht_types); // avoids nested

		return new TemplateThing[]{root};
	}

	static public void main(String[] args) {
		try {
			if (args[0].length() -4 == args[0].indexOf(".xml")) {
				DTDParser.parseXMLFile(args[0]);
			} else {
				DTDParser.parseDTDFile(args[0]);
			}
		} catch (Exception e) { IJError.print(e); }
	}
}