package folioxml.css; import folioxml.core.InvalidMarkupException; import folioxml.core.Pair; import folioxml.core.TokenBase; import folioxml.slx.SlxRecord; import folioxml.slx.SlxToken; import folioxml.xml.NodeFilter; import folioxml.xml.NodeList; import java.util.*; import java.util.regex.Pattern; /** * Don't use this at the same time as SlxTranslator, since this will process tokens before SlxTranslator has finished adding the class names... (They're subsequent tags) * * @author nathanael */ public class CssClassCleaner { public CssClassCleaner() { } /** * Map of namespace -> Map of 'originalName'.lowerCaseCultureEnglish -> (newName, originalName) */ public Map<String, Map<String, Pair<String, String>>> dict = new HashMap<String, Map<String, Pair<String, String>>>(); /** * Map of namespace -> Map of 'newName'.lowerCaseCultureEnglish -> (originalName) */ public Map<String, Map<String, String>> reverseDict = new HashMap<String, Map<String, String>>(); /** * Map of namespace -> Collection of newName.lowerCaseCultureEnglish for conflict checking. */ public Map<String, HashSet<String>> valueDict = new HashMap<String, HashSet<String>>(); /** * Valid CSS names (and now, XML IDs) (A subset of the specification, since some browsers don't support all the spec) */ protected static Pattern pName = Pattern.compile("^[_a-zA-Z][_a-zA-Z0-9-]*$"); //Removed leading dash. It wasn't allowed in XML IDs, and we need this multi-purpose. Was ^(-)?[_a-zA-Z][_a-zA-Z0-9-]*$ /* XML 1.0 rev 5 spec for IDs * [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] */ /** * Provides a CSS-compliant version of the specified identifier. * It is suggested that you process the infobase header <style-def/> tags first, since the * additional information they contain can assist in making more intelligent naming * choices in the case of conflicts. (Such as Normal-highlighter). * Appending a random 4-digit hex value such as "-f92a" is a last resort. * <p> * It is important to use the same CssClassCleaner instance across the entire infobase to maintain consistency (or save and restore the name table). * The name table should be preserved so translation back to FFF can be performed. * Storage in the root infobase record is a good idea... * * @param name The class name to sanitize * @param parent The token this class name originated from. Helps choose names in the case of conflicts. * @return * @throws InvalidMarkupException */ public String cleanId(String name, String namespace, boolean throwExceptionIfDuplicate) throws InvalidMarkupException { //Namespaces are separated Map<String, Pair<String, String>> mappings = dict.get(namespace); if (mappings == null) { mappings = new HashMap<String, Pair<String, String>>(); dict.put(namespace, mappings); } Map<String, String> reverseMappings = reverseDict.get(namespace); if (reverseMappings == null) { reverseMappings = new HashMap<String, String>(); reverseDict.put(namespace, reverseMappings); } HashSet<String> values = valueDict.get(namespace); if (values == null) { values = new HashSet<String>(); valueDict.put(namespace, values); } /*CSS identifiers -?[_a-z]|{nonascii}|{escape}([_a-z0-9-]|{nonascii}|{escape})* Simplified: -?[_a-zA-z][_a-zA-Z0-9-]* Folio names are case-insensitive, but preserve case. Do browsers limit length? Test this! We have names that are probably longer than 255. */ String lowerName = name.toLowerCase(Locale.ENGLISH); //TODO: What if there is both a character style named "Normal" and a link style named "Normal"? This needs the intelligence to know which Normal variant to use when the <span> or <link> tag is reached. Style-def isn't enough //What if character-style, field, and highlighter have overlapping names? //The map doesn't add new conflicts, but what if they already exist between style types? //First check if 'name' exists in the mappings. Pair<String, String> result = mappings.get(lowerName); //If so, return precomputed result. if (result != null) { if (throwExceptionIfDuplicate) throw new InvalidMarkupException("Duplicate mapping for (" + lowerName + ") encountered: " + result.getFirst() + " -> " + result.getSecond() + ". Please rename character styles, highlighters, and fields to use unique names; they cannot overlap in CSS."); return result.getFirst(); } //If 'name' is valid anyways, cache to the Map /* Optmization causes problem when run twice on the same css class * if (pName.matcher(name).matches()) { mappings.put(lowerName, new Pair<String,String>(name,name)); values.add(lowerName); return name; //Nothing to do - already a valid name. }else{*/ String sanitizedName = sanitizeString(name); //Sanitize String newName = sanitizedName; String lowerNewName = newName.toLowerCase(Locale.ENGLISH); //Check for conflicts. Attempt style-def naming /*if (values.contains(lowerNewName) && parent != null && parent.matches("style-def")){ //Append -type if present on parent and valid. String type = parent.get("type"); if (type != null && pName.matcher(type).matches()){ newName = sanitizedName + "-" + type; lowerNewName = newName.toLowerCase(Locale.ENGLISH); } }*/ boolean foundConflict = false; //Check for conflicts. Generate 4-digit hex suffix. while (values.contains(lowerNewName)) { newName = sanitizedName + "-" + Integer.toHexString(new Random().nextInt(256 * (256 - 16)) + (256 * 16)); lowerNewName = newName.toLowerCase(Locale.ENGLISH); foundConflict = true; } if (foundConflict) { for (Pair<String, String> pair : mappings.values()) { if (pair.getFirst().equalsIgnoreCase(sanitizedName)) { System.out.println("Renaming \"" + name + "\" to \"" + newName + "\" to avoid conflicting with a similar Folio Style " + pair.getSecond()); break; } } } //Add mapping mappings.put(lowerName, new Pair<String, String>(newName, name)); values.add(lowerNewName); reverseMappings.put(lowerNewName, name); //Return new name return newName; //} } private String sanitizeString(String name) { //Remove all characters that don't match the regex. //When removing spaces, and the next character is lowercase, uppercase it. StringBuilder sb = new StringBuilder(name.length()); boolean lastCharDelimiter = false; for (int i = 0; i < name.length(); i++) { char c = name.charAt(i); //Allow [_a-zA-Z] as the first character // Aug 12. Removed hyphen allowance for XML ID compat //And the remainder allow [_a-zA-Z0-9-] boolean valid = isCharValid(c, i > 0, true, true, i > 0); if (valid) { //Uppercase lowercase letters following a space. if (lastCharDelimiter && sb.length() > 0 && sb.charAt(sb.length() - 1) != '_') { sb.append('_'); } //Keep the character sb.append(c); } lastCharDelimiter = (c == ' ' || c == '.' || c == ',' || c == '+' || c == '/' || c == '\\' || c == '#' || c == '%' || c == '(' || c == ':'); } return sb.toString(); } private boolean isCharValid(char c, boolean allowHyphen, boolean allowUnderline, boolean allowAZ, boolean allowNumbers) { if (c == '-' && allowHyphen) return true; if (c == '_' && allowUnderline) return true; if (((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) && allowAZ) return true; if (allowNumbers && (c >= '0' && c <= '9')) return true; return false; } public void process(SlxToken t) throws InvalidMarkupException { process(t, getNamespace(t), getPrefix(t), false); } public void process(TokenBase t, String namespace, String prefix, boolean throwExceptionIfDuplicate) throws InvalidMarkupException { if (!t.isTag()) return; //Only tags have attributes //Requires SLX valid. //For paragraphs, spans, links... String s = t.get("class"); if (s != null) { String ns = cleanId(prefix + s, namespace, throwExceptionIfDuplicate); if (!s.equals(ns)) t.set("class", ns); } //For <bookmarks name="" and <link jumpdestination="" //For objects and bookmarks, use hash instead - don't CSS clean. } /** * Must be called before .process(). TODO: Resolve replaceDefiniton=true first * * @param r * @throws InvalidMarkupException */ public void processRootRecord(SlxRecord r) throws InvalidMarkupException { //Index style-def tags only. Then repeat and get the rest. for (TokenBase t : r.getTokens()) { if (t.isTag() && t.matches("style-def")) { process(t, getNamespace(t), getPrefix(t), true); //disabled at one point because of Neil's style class name changed quick fix was to disable } } } /** * objects, bookmarks, and popups are not processed. They are hashed. * * @param t * @return * @throws InvalidMarkupException */ private String getNamespace(TokenBase t) throws InvalidMarkupException { /* Namespaces * Character styles Link Styles Paragraph Styles Level Styles Highlighter Styles Field Styles */ String type = t.get("type"); if (t.matches("p|paragraph-attribute") || (t.matches("style-def") && "paragraph".equalsIgnoreCase(type))) return "paragraph"; if (t.matches("record|record-attribute") || (t.matches("style-def") && "level".equalsIgnoreCase(type))) return "level"; if (t.matches("link|popupLink|a") || (t.matches("style-def") && "link".equalsIgnoreCase(type))) return "link"; //Correct for the 'class' attribute, but not for 'objectName'. //if (t.matches("span|style-def") && "character-style".equalsIgnoreCase(type)) return "character-style"; //if (t.matches("span|style-def") && "highlighter".equalsIgnoreCase(type)) return "highlighter"; //All other span tags are fields. //if (t.matches("span") || (t.matches("style-def") && TokenUtils.fastMatches("text|date|time|integer|decimal", type))) return "field"; return "span"; } private String getPrefix(TokenBase t) throws InvalidMarkupException { /* Namespaces * Character styles Link Styles Paragraph Styles Level Styles Highlighter Styles Field Styles */ String type = t.get("type"); if (t.matches("span|style-def") && "character-style".equalsIgnoreCase(type)) return "cs_"; if (t.matches("span|style-def") && "highlighter".equalsIgnoreCase(type)) return "hl_"; //All other span tags are fields. //if (t.matches("span") || (t.matches("style-def") && TokenUtils.fastMatches("text|date|time|integer|decimal", type))) return ""; return ""; } /** * Returns the original name for the specified token based on the class attribute and tag name * * @param t * @param cssClass * @return * @throws InvalidMarkupException */ public String findOriginalName(SlxToken t) throws InvalidMarkupException { return findOriginalName(getNamespace(t), t.get("class")); } /** * Returns the original name for the specified cssClass use the token specified to determine the namespace. * * @param t * @param cssClass * @return * @throws InvalidMarkupException */ public String findOriginalName(SlxToken t, String cssClass) throws InvalidMarkupException { return findOriginalName(getNamespace(t), cssClass); } /** * Returns the original name for the specified cssClass * * @param t * @param cssClass * @return * @throws InvalidMarkupException */ public String findOriginalName(String namespace, String cssClass) throws InvalidMarkupException { if (cssClass == null) return null; Map<String, String> mappings = reverseDict.get(namespace); if (mappings != null) { String s = mappings.get(cssClass.toLowerCase(Locale.ENGLISH)); //if (pair == null) return cssClass; //WARNING: TERRIBLE THING TO DO.... WILL BReAK EVERYTHING if (s == null) throw new InvalidMarkupException("Cannot find original css name for " + cssClass + " (in " + namespace + " namespace)"); return s;//return pair.getSecond(); } return null; } /** * NOT IMPLEMENTED Saves the mappings to the specified record. Verify that root.level = "root". * Builds a map of String->SlxToken (<style-def> tags). * If a <style-def /> doesn't exist for the mapping, insert it after the last style-def. * originalName = "" attribute is where the original names are stored. * * @param root * @throws InvalidMarkupException */ public void saveTo(SlxRecord root) throws InvalidMarkupException { // TODO: We need this for later - For converting back, and possible for reference for (String namespace : dict.keySet()) { for (String key : dict.get(namespace).keySet()) { Pair<String, String> pair = dict.get(namespace).get(key); SlxToken t = new SlxToken("<mapping from=\"" + pair.getSecond() + "\" to=\"" + pair.getFirst() + "\" namespace=\"" + namespace + "\" />"); root.write(t); //System.out.println(t.toTokenString()); } } } public void loadFrom(NodeList nodes) throws InvalidMarkupException { //Index style-def tags only. Then repeat and get the rest. ///System.out.print(nodes.toXmlString(true)); nodes = nodes.searchOuter(new NodeFilter("mapping")); for (TokenBase t : nodes.list()) { String namespace = t.get("namespace"); //Namespaces are separated Map<String, Pair<String, String>> mappings = dict.get(namespace); if (mappings == null) { mappings = new HashMap<String, Pair<String, String>>(); dict.put(namespace, mappings); } Map<String, String> reverseMappings = reverseDict.get(namespace); if (reverseMappings == null) { reverseMappings = new HashMap<String, String>(); reverseDict.put(namespace, reverseMappings); } HashSet<String> values = valueDict.get(namespace); if (values == null) { values = new HashSet<String>(); valueDict.put(namespace, values); } String lowerName = t.get("from").toLowerCase(Locale.ENGLISH); mappings.put(lowerName, new Pair<String, String>(t.get("from"), t.get("to"))); reverseMappings.put(t.get("to").toLowerCase(Locale.ENGLISH), t.get("from")); values.add(lowerName); } assert (nodes.count() > 1); } }