package folioxml.export; import folioxml.config.InfobaseSet; import folioxml.core.InvalidMarkupException; import folioxml.core.TokenUtils; import folioxml.text.TextLinesBuilder; import folioxml.xml.Node; import folioxml.xml.NodeFilter; import folioxml.xml.NodeList; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.util.HashMap; import java.util.HashSet; import java.util.Map; public class InventoryNodes implements NodeListProcessor { //HashSet<String> bookmarkNames = new HashSet<String>(); LogStreamProvider logs; public InventoryNodes(LogStreamProvider logs) { this.logs = logs; } HashMap<String, Integer> stats = new HashMap<String, Integer>(30); HashMap<String, HashSet<String>> uniques = new HashMap<String, HashSet<String>>(30); private void increment(String statName) { incrementBy(statName, 1); } public void PrintStats(Appendable a) throws IOException { for (Map.Entry<String, Integer> e : stats.entrySet()) { a.append(e.getKey() + ": " + e.getValue().toString() + "\n"); } } public void PrintExternalInfobases(InfobaseSet internalInfobases, Appendable a) throws IOException { HashSet<String> dests = uniques.get("destination infobases"); if (dests == null) return; a.append("External infobases: \n"); for (String val : dests) { if (internalInfobases.byName(val) == null) { a.append(val); a.append("\n"); } } a.append("\n"); } public void PrintUniques(Appendable a) throws IOException { for (Map.Entry<String, HashSet<String>> e : uniques.entrySet()) { a.append("\n"); for (int i = 0; i < 60; i++) a.append('-'); a.append("\nUnique " + e.getKey() + " (" + +e.getValue().size() + "): \n"); for (String val : e.getValue()) { a.append(val); a.append("\n"); } } } private void incrementBy(String statName, int offset) { Integer i = stats.get(statName); if (i == null) i = 0; i += offset; stats.put(statName, i); } public NodeList process(NodeList nodes) throws InvalidMarkupException { //Deep copy so we don't affect anything. nodes = nodes.deepCopy(); for (Node n : nodes.search(new NodeFilter("bookmark")).list()) increment("bookmark definitions"); //Report unique groups and levels for (Node n : nodes.search(new NodeFilter("record")).list()) { if (n.get("groups") != null) { for (String group : n.get("groups").split(",")) { logUnique(group, "groups"); } } if (n.get("level") != null) { logUnique(n.get("level"), "levels"); increment("level records"); } //TODO report unique combination of levels } for (Node every : nodes.flattenRecursive().list()) { if (every.isTag()) { logUnique(every.getTagName(), "XML element names"); } } //Report on use of tabs inside paragraphs for (Node para : nodes.search(new NodeFilter("p")).list()) { //I guess recurse the tree and reset each time a breaking element opens or closes. TextLinesBuilder.TabUsage tabs = new TextLinesBuilder().analyzeTabUsage(new NodeList(para)); boolean canPre = para.children == null || para.children.phrasingContentOnly(); if (tabs != TextLinesBuilder.TabUsage.None) { //System.out.println("Tab-aligned paragraph:"); increment("tab-aligned paragraphs"); //System.out.println(para.toXmlString(true)); if (!canPre) { logAndPullNode(para, "tab-aligned paragraphs that contain block elements", "Tab-aligned paragraph with block elements:"); } } } //TODO: report on use of underlining for non-links //TODO: export hidden text. (better if we use SLX?) NodeList images = nodes.filterByTagName("img|object|link|a", true); for (Node n : images.list()) { if ("true".equalsIgnoreCase(n.get("resolved"))) { continue; //It's resolved. } if (n.get("href") != null && validUrl(n.get("href"))) { continue; //It's a valid URI } if (n.get("id") != null && n.get("href") == null && "a".equalsIgnoreCase(n.getTagName())) { //It's an anchor, skip continue; } if (!"true".equalsIgnoreCase(n.get("resolved")) && !"popup".equalsIgnoreCase(n.get("type"))) { logUnique(n.toXmlString(false), "unresolved references"); } } for (Node link : nodes.search(new NodeFilter("link", "infobase", null)).list()) logUnique(link.get("infobase"), "destination infobases"); NodeList objects = nodes.filterByTagName("object", true); for (Node t : objects.list()) { String handler = t.get("handler"); if (TokenUtils.fastMatches("bitmap|metafile|picture", handler)) { //Convert these three types to "img" tags immediately. increment("images"); } else { logAndPullNode(t, "unsupported objects", "Unsupported object:"); } } logAndPullNodes(nodes.filterByTagName("note", true), "notes", "Note: "); return processLinks(nodes); } private void logAndPullNodes(NodeList list, String counter, String intro) throws InvalidMarkupException { for (Node n : list.list()) { logAndPullNode(n, counter, intro); } } private void logAndPullNode(Node n, String counter, String intro) throws InvalidMarkupException { //Unless type = popup, skip the internals for distinction. String data = null; if (n.matches("link")) { if (n.get("type") == null || !n.get("type").equalsIgnoreCase("popup")) { data = n.toTokenString(); } } if (data == null) data = n.toXmlString(true); logUnique(data, counter); increment(counter); n.pull(); } private void logUnique(String data, String counter) throws InvalidMarkupException { HashSet<String> set = uniques.get(counter); if (set == null) { set = new HashSet<String>(); uniques.put(counter, set); } if (!set.contains(data)) { set.add(data); increment("unique " + counter); } } private boolean validUrl(String s) { try { URL u = new URL(s); return true; } catch (MalformedURLException e) { return false; } } public NodeList processLinks(NodeList nodes) throws InvalidMarkupException { /* if (nodes.filterByTagName("a", true).count() > 0){ throw new InvalidMarkupException("Only raw XML can be inventoried."); }*/ //Program, menu, data links are always local logAndPullNodes(nodes.search(new NodeFilter("link|a", "program", null)), "program links", "Program link:"); logAndPullNodes(nodes.search(new NodeFilter("link|a", "dataLink", null)), "data links", "Data link:"); logAndPullNodes(nodes.search(new NodeFilter("link|a", "menu", null)), "menu links", "Menu link:"); //Add number of href URL links. NodeList urlLinks = nodes.search(new NodeFilter("link|a", "href", null)); incrementBy("URL links", urlLinks.count()); for (Node n : urlLinks.list()) { if (validUrl(n.get("href"))) { logUnique(n.get("href"), "URL links"); } } for (Node n : urlLinks.list()) { String url = n.get("href"); if (!"true".equalsIgnoreCase(n.get("resolved")) && !validUrl(url)) { logUnique(url, "invalid URL links"); } } urlLinks.pull(); //Pull so we don't run into them later //jump and cross-infobase jump links NodeList jumpLinks = nodes.search(new NodeFilter("link|a", "jumpDestination", null)); for (Node n : jumpLinks.list()) { if (n.get("infobase") != null) { //logAndPullNode(n, "cross-infobase bookmark links", "Cross-infobase jump link:"); n.pull(); } else { increment("bookmark links"); n.pull(); } } //object and cross-infobase object links NodeList objectLinks = nodes.search(new NodeFilter("link|a", "objectName", null)); for (Node n : objectLinks.list()) { if (n.get("infobase") != null) { logAndPullNode(n, "cross-infobase object links", "Cross-infobase object link:"); } else { logAndPullNode(n, "object links", "Object link:"); } } //Inline popups (not originally links) logAndPullNodes(nodes.search(new NodeFilter("link|a", "type", "popup")), "inline popups", "Inline popup link:"); //Named popup links logAndPullNodes(nodes.search(new NodeFilter("link|a", "popupTitle", null)), "named popup links", "Link to named popup:"); NodeList queryLinks = nodes.search(new NodeFilter("link|a", "query", null)); for (Node n : queryLinks.list()) { if (n.get("infobase") != null) { logAndPullNode(n, "cross-infobase query links", "Cross-infobase query link:"); } else if (n.get("title") != null) { logAndPullNode(n, "query popup links", "Query popup link:"); } else { logAndPullNode(n, "query links", "Query link:"); } } logAndPullNodes(nodes.search(new NodeFilter("link")), "unrecognized links", "Unrecognized link:"); return nodes; } }