package folioxml.slx; import folioxml.core.InvalidMarkupException; import folioxml.core.TokenUtils; import folioxml.css.CssUtils; import folioxml.folio.FolioToken; import folioxml.translation.FolioCssUtils; import folioxml.translation.FolioSlxTranslator; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public class SlxTransformer implements ISlxTokenWriter { private SlxContextStack stack = new SlxContextStack(false, true); private ISlxTokenWriter reciever = null; private SlxValidator validator = new SlxValidator(stack); /** * Creats a new Slx transfomer with the specified record as the root context. You must call .endRecord() at the end, since no closing record tag will be arriving. * * @param record * @throws InvalidMarkupException */ public SlxTransformer(SlxRecord record) throws InvalidMarkupException { this(record, record); } /** * Allows you to specify an alterate token receiever instead of the record. Can be used to add a post-proccessing filter. * * @param r * @param record * @throws InvalidMarkupException */ public SlxTransformer(ISlxTokenWriter r, SlxRecord record) throws InvalidMarkupException { this.reciever = r; record.startsNewContext = true; stack.add(record); } /** * Creats a new SlxTransformer with the specfied reciever. If you don't pass in a root record, opening and closing record tokens will be expected. * * @param r * @param record */ public SlxTransformer(ISlxTokenWriter r) { this.reciever = r; } /** * In practice this shouldn't need to be called. Usually a SlxTransfomer is initialized pointing to the correct underlying instance, and it doesn't have to change. * Remember that a token stack is being maintained. */ public void setUnderlyingWriter(ISlxTokenWriter underlyingReceiver) { this.reciever = underlyingReceiver; } /** * If record tags are being filtered out before the transformer, you can call this to cause opening tags to flush. */ public void endRecord(boolean writeClosingTag) throws InvalidMarkupException { disableOutput = !writeClosingTag; write(newToken("</record>")); disableOutput = false; } /* * Call this to make sure that the stack is empty once you have finished using SlxTransformer. */ public void verifyDone() throws InvalidMarkupException { //Throw an exception if we have leftovers. if (stack.topItem() != null) throw new InvalidMarkupException("Token stream is not complete - there are orphaned tags", stack.topItem()); } protected boolean disableOutput = false; protected void out(SlxToken t) throws InvalidMarkupException { if (!disableOutput) reciever.write(t); } public boolean silent = false; /* Slx compatibility tag set infobase-meta, style-def/>, record, record-attribute/>, span, link, popupLink, end-popup-contents/>, note, namedPopup, parabreak />, object/>, table, tr, td * paragraph-attribute/>, pagebreak />, br/>, bookmark/>, pp/>, se/> */ /* Transformed tag set * new: <p>, <popup>, <link type="popup"> * infobase-meta, style-def/>, record>, span, link, popup, note, namedPopup, object/>, table, tr, td * br/>, bookmark/> */ /* removed by transform: record-attribute, paragraph-attribute, pp, se, pagebreak, popupLink, end-popup-contents, parabreak*/ /* * context tags: record, infobase-meta, popupLink, note, namedPopup, popup * standard: p, table, tr, td, object/>, br/>, bookmark/>, style-def/>, record-attribute/>, paragraph-attribute/> * ghost: span, link * * auto-repairs: insert <p> tags, close p tags * auto-close tr, td, p * auto-close record tags. * auto-close ghost tags (span,link) before context end. */ /** * The token receieved by the write() command. */ protected SlxToken input = null; private static Pattern pEntity = Pattern.compile("&[^;&<]++;", Pattern.CASE_INSENSITIVE); /** * Splits text tokens that contain entities apart into alternating text/entity tokens * * @param t * @throws folioxml.core.InvalidMarkupException */ public void writeText(SlxToken t) throws InvalidMarkupException { Matcher m = pEntity.matcher(t.markup); int lastEnd = 0; while (m.find(lastEnd)) { //Text if (m.start() > lastEnd) { outValidate(newToken(t.markup.substring(lastEnd, m.start()))); } //Entity outValidate(newToken(m.group())); //Increment lastEnd = m.end(); } if (lastEnd > 0) { //Last bit if (t.markup.length() > lastEnd) { outValidate(newToken(t.markup.substring(lastEnd, t.markup.length()))); } } else { //No entities, I guess. outValidate(t); } } public void write(SlxToken t) throws InvalidMarkupException { input = t; //Folio compatibility: auto-open paragraph before the text or entities. Needed inside table cells... boolean isContent = t.isContent(); if (!(stack.has("p")) && isContent) writeTag(newToken("<p>")); //Pass tags to writeTag(), pass others on to the receiver. if (t.isTag()) { writeTag(t); //Pass text to writeText() for entity splitting } else if (t.type == SlxToken.TokenType.Text) { writeText(t); //Pass comments and entities through } else { outValidate(t); } if (isContent) { //Mark containing paragraph that it has content... //Value can be inverted on closing p tag... SlxToken p = stack.get("p"); if (p != null && p.get("hasContent") == null) { p.set("hasContent", "true"); } } } public void outValidate(SlxToken t) throws InvalidMarkupException { validator.preValidate(t); validator.validate(t); out(t); } public void writeTag(SlxToken t) throws InvalidMarkupException { if (t.tagType == SlxToken.TagType.None) throw new InvalidMarkupException("Tags must be opening, closing, or self closing; TagType.None is not a valid value.", t); /** Classify tags **/ //Mark context tags if (t.matches("infobase-meta|record|note|popup|namedPopup")) t.startsNewContext = true; //Not |td|tr|table? //Mark ghost tags - they aren't hierarchical. They get put in the stack, but top() and pop() ignore them, and they aren't checked against the hierarchy if (t.matches("span|link")) t.isGhost = true; /************************************ /** (Additive only) Folio compatibility - these all call writeTag() recursively, so we can ignore the order. These only go one level deep though.. Nested paragraphs would cause a problem **/ //Use !t.isClosing instead of t.isOpening - otherwise a self-closing tag won't cause the previous tag to be auto-closed //Auto close paragraphs before opening a new paragraph or table if (t.matches("p|table") && !t.isClosing() && stack.has("p")) writeTag(makeClosingTag(stack.get("p"))); //Auto close paragraphs before the end of a table cell if (t.matches("td|th") && t.isClosing() && stack.has("p")) writeTag(makeClosingTag(stack.get("p"))); //There should never be a open paragraph tag if (t.matches("td|th") && t.isClosing() && stack.has("p")) throw new InvalidMarkupException("Nested paragraphs!"); //Auto close paragraphs before closing a context scope if (t.matches("infobase-meta|record|note|popup|namedPopup") && t.isClosing() && stack.has("p")) writeTag(makeClosingTag(stack.get("p"))); //Auto close ghosts before closing a context scope if (t.startsNewContext && t.isClosing()) closeGhosts(); //Auto close cells before opening a new cell, or opening or closing a row. (T if (stack.has("td|th") && ((t.matches("td|th") && !t.isClosing()) || t.matches("tr"))) writeTag(makeClosingTag(stack.get("td|th"))); //Auto close table rows before the end of the table or the start of a new row. if (stack.has("tr") && ((t.matches("table") && t.isClosing()) || (t.matches("tr") && !t.isClosing()))) writeTag(makeClosingTag(stack.get("tr"))); //Auto-close open records before opening another. Records can't overlap if (t.matches("record") && !t.isClosing() && stack.has("record", true)) writeTag(newToken("</record>")); //Start a new paragraph before any of these tags - if it's not already open. //Paragraph attributes are specified inside <td> tags also, so we MUST start <p> inside <td> quickly. if (!t.isClosing() && !(stack.has("p")) && t.matches("span|link|object|note|paragraph-attribute|pagebreak|br")) writeTag(newToken("<p>")); //Certain types of folio tags don't have closing tags - such as character attributes. These need to be auto-closed when another of the same type is encountered. //TODO - in XML mode, this isn't wanted. But in folio mode, this is wanted for all types - to prevent overlapping //Added Jul 27-09 if (t.matches("span") && t.isOpening()) { String type = t.get("type"); if (type != null) { if (TokenUtils.fastMatches("bold|italic|hidden|strikeout|underline|condensed|outline|shadow|font-family|font-size|background-color|foreground-color|subsuperscript", type)) { //it's one of the character attributes. SlxToken opener = stack.find(t.getTagName(), type, false); if (opener != null) { //Ok, there's already an open ghost tag of this type in the context. Close it, since we're overriding that now. writeTag(makeClosingTag(opener)); } } } } /** * Table support. * * Many attributes must be copied from the table style="" tag to each cell. * * -folio-horizontal-gap:unit;-folio-horizontal-gap:unit; * padding-horizontal:unit;padding-vertical:unit; * border-horizontal, border-vertical; * * Cells may already have padding and border, since it can be individually specified. In that case, cell border wins. * * Padding = max(0,gap - borderResult) + (padding-local + padding-horizontal/vertical) * * Also... * * cellWidths attr must be divided among the table cells.. aggregate widths for cells using colSpan. Folio also uses model where padding subtracts from width. */ if (t.matches("tr|table") && t.isClosing()) { SlxToken ta = stack.get("table"); assert (ta != null); ta.removeAttr("currentColumn"); } /* table, row, and cell tags take 3x as long to process... Could be optimized to take 50% of the time... But would require * in-memory collections attached to SlxTokens... */ if (t.matches("td") && t.isOpening()) { String sCols = t.get("colspan") != null ? t.get("colspan") : "1"; //1-based (1 is default) int cols = Integer.parseInt(sCols); //May throw an exception, but only if FolioSlxTranslator didn't do the translation. SlxToken ta = stack.get("table"); //Parent table assert (ta != null); int columnIndex = ta.get("currentColumn") == null ? 0 : Integer.parseInt(ta.get("currentColumn")); //We must store the currentColumn index on the table, in case we ever wish to support nested tables. SlxToken tr = stack.get("tr"); //Parent row assert (tr != null); boolean isTh = ("true".equalsIgnoreCase(tr.get("rowIsHeader"))); //Add this column index to the list of header columns if columnIsHeader=true if ("true".equalsIgnoreCase(t.get("columnIsHeader"))) { ta.appendToAttributeSmart("headerCols", Integer.toString(columnIndex)); } //Is this in a header column? String[] headerCols = ta.get("headerCols") == null ? new String[]{} : ta.get("headerCols").split(","); for (String h : headerCols) { if (h.equals(Integer.toString(columnIndex))) isTh = true; } if (isTh) t.set("th", "true"); if (ta.get("colWidths") != null) { //Handle column widths String[] widths = ta.get("colWidths").split(","); //TEMP: TODOD!!! Changed Jul1 for helptaulojistas //OLD: if (columnIndex >= widths.length) throw new InvalidMarkupException("More columns in table than specified in the column widths collection (" + (columnIndex + 1) + ").",ta); if (columnIndex < widths.length) t.appendToAttributeSmart("style", "width:" + widths[columnIndex] + ";"); } else { //t.set("nowidth", "true"); } //Parsing all this css every time is very slow... adding ~15% execution time to infobases that are 100% tables. Map<String, String> cellCss = CssUtils.parseCss(t.get("style"), true); Map<String, String> tableCss = CssUtils.parseCss(ta.get("style"), true); //Now, time to calculate padding and copy border settings //Padding = max(0,gap - borderResult) + (padding-local + padding-horizontal/vertical) for (String side : new String[]{"left", "top", "right", "bottom"}) { //Docs are wrong!!! They are clear, but wrong. vertical maps to bottom and top, not right and left. String orientation = (side.equalsIgnoreCase("left") || side.equalsIgnoreCase("right")) ? "horizontal" : "vertical"; //Copy the table border settings to the cell if they do not already exist. if (tableCss.containsKey("border-" + orientation) && !cellCss.containsKey("border-" + side)) { cellCss.put("border-" + side, tableCss.get("border-" + orientation)); } String globalPadding = tableCss.get("padding-" + orientation); String globalGap = tableCss.get("-folio-" + orientation + "-gap"); String localPadding = cellCss.get("padding-" + side); String localBorder = cellCss.get("border-" + side); //The first element is usually the units (if FolioCssUtils generated). if (localBorder != null) { for (String token : localBorder.split("\\s+")) { if (FolioCssUtils.isCssUnit(token)) { //The first unit token. localBorder = token; break; } } } //We need to convert them all to the same unit... if (globalPadding == null) globalPadding = "0in"; if (localPadding == null) localPadding = "0in"; if (localBorder == null) localBorder = "0in"; if (globalGap == null) globalGap = "0in"; //And calculate. //ToInches calls add 8% overhead to entire conversion proccess. double padding = Math.max(0, FolioCssUtils.toInches(globalGap) - FolioCssUtils.toInches(localBorder)) + FolioCssUtils.toInches(localPadding) + FolioCssUtils.toInches(globalPadding); //And store cellCss.put("padding-" + side, padding + "in"); } CssUtils.coalesce(cellCss); //Re-simplify //Simplify and write back to style attr. t.set("style", CssUtils.writeCss(cellCss)); columnIndex += cols; ta.set("currentColumn", Integer.toString(columnIndex)); } /** (Destructive) Folio compatibility */ if (t.matches("p") && t.isClosing()) { SlxToken opener = stack.get("p"); if (opener.get("hasContent") == null) { //Append class '_empty' //opener.set("class", (opener.get("class") != null ? opener.get("class") + " ": "") + "_empty"); opener.appendToAttributeSmart("style", "padding-top:1em;"); //Better than changing the class. Multiple CSS names make things harder to parse. } else opener.removeAttr("hasContent"); } /* transform <td tr="true"></td> pairs to <th> </th>. Must be after any additive code. */ if (t.matches("td") && t.isClosing()) { SlxToken opener = stack.get("td"); assert (opener != null); if ("true".equalsIgnoreCase(opener.get("th"))) { t.setTagName("th"); opener.setTagName("th"); opener.removeAttr("th"); //opener.set("th", "found"); } } //Transform <popupLink> into <link><popup>, <end-popup-contents/> into </popup>, and </popupLink> into </link> if (t.matches("popupLink")) { if (t.isOpening()) { SlxToken extraTag = null; //link tag this.writeTag(newToken("<link type=\"popup\">")); //Handle <PW:Popup,5.47917,1.22917,"Various Pictures",FD:"non indexed field"> //Put in extraTags attribute: FD,"non indexed field" if (t.get("extraTags") != null) { extraTag = FolioSlxTranslator.translate(new FolioToken("<" + t.get("extraTags") + ">")); t.removeAttr("extraTags"); } SlxToken popup = newToken("<popup>"); t.addAttributesTo(popup); this.writeTag(popup); //Write extra tag. if (extraTag != null) { this.warn("Siamese tag encountered in PW tag. Placing the following token inside <popup>: " + extraTag, t); this.writeTag(extraTag); } return; } else if (t.isClosing()) { this.writeTag(newToken("</link type=\"popup\">")); return; } else { return; } //just delete self-closing <popupLinks/> } else if (t.matches("end-popup-contents")) { assert (t.isSelfClosing()); this.writeTag(newToken("</popup>")); return; } //Start a new paragraph when we hit a <parabreak/> or <parabreak> tag. Do nothing if it is a closing </parabreak> tag. Always discards the parabreak tag. if (t.matches("parabreak")) { if (!t.isClosing()) writeTag(newToken("<p>")); return; } //Delete these - for now if (t.matches("pp|se|pagebreak")) return; //TODO: document //Pre validate before we perform any modifications to ancestors (where the source tag is deleted) validator.preValidate(t); /** Record the order of the infobase level definitions on the record */ if (t.matches("style-def") && "level".equalsIgnoreCase(t.get("type"))) stack.get("record").appendToAttributeSmart("levelDefOrder", t.get("class")); /** Copy the <LN:> list to the containing record (should be the root)*/ if (t.matches("infobase-meta") && "levels".equalsIgnoreCase(t.get("type"))) stack.get("record").set("levels", t.get("content")); //Put these attributes on the parent paragraph, and eat the tags if (t.matches("paragraph-attribute")) { if (!t.isClosing()) t.addAttributesTo(stack.get("p")); return; //What about the 'few paragraph attributes that apply to the entire table?' } //Put these attributes on the parent record, and eat the tags if (t.matches("record-attribute")) { SlxToken rec = stack.get("record", true); if (rec == null) throw new InvalidMarkupException("record-attribute can only exist inside record", t); if (!t.isClosing()) t.addAttributesTo(rec); //bypass context boundaries for this one return; } //Character attributes can have default tags (closing tags) without having opening tags. We don't need these - they're pointless, but allowed in folio. Remove them if (t.matches("span") && t.isClosing()) { String type = t.get("type"); if (type != null) { if (TokenUtils.fastMatches("bold|italic|hidden|strikeout|underline|condensed|outline|shadow|font-family|font-size|background-color|foreground-color|subsuperscript", type)) { //it's one of the character attributes. SlxToken opener = stack.find(t.getTagName(), type, false); if (opener == null) { warn("No opening tag found for this character attribute tag. Removing.", t); return; } } } } if (t.isClosing() && t.isGhost) { //The other, less normal orphaned closing ghost tags, like link and non-char attrib uses of span //jul 27 09 if (!stack.matchingOpeningTagExists(t)) { //Should throw an InvalidMarkupException, but for now we can just drop these. //TODO warn("Dropping orphaned closing ghost tag", t); return; } } //Check for CSS combinations. Must be after any additive code. if (!t.isOpening()) { SlxToken opener = t.isClosing() ? stack.getOpeningTag(t) : t; //t is its own opener if it is self closing. //No additives should remain. Check css for bad combos and fix String css = opener.get("style"); if (css != null) { String newCss = FolioCssUtils.fixCss(css, silent); if (newCss != css) opener.set("style", newCss); } } //Validate tag before modifying the top of the stack validator.validate(t); /************************ * Adding to/subtracting from the stack * All tags that aren't eaten (like pp, se, pagebreak, paragraph-attribute, record-attribute) go through here eventually. */ //Should throw an exception if there are any orphaned or mismatched tag pairs. stack.process(t); //Strict and tag pairs //Write tag out(t); } /** * Creats a new token from the specified string, and attaches the original parsing token. * * @param s * @return * @throws folioxml.folio.InvalidMarkupException */ public SlxToken newToken(String s) throws InvalidMarkupException { SlxToken t = new SlxToken(s); //add reference to current if (input != null) t.sourceToken = input.sourceToken; return t; } /** * Creates a matching closing tag for the specified opening tag. Attches the original parsing token. * * @param t * @return */ public SlxToken makeClosingTag(SlxToken t) throws InvalidMarkupException { SlxToken s = new SlxToken(); s.setTagName(t.getTagName()); s.type = t.type; s.tagType = SlxToken.TagType.Closing; s.startsNewContext = t.startsNewContext; s.isGhost = t.isGhost; s.sourceToken = input.sourceToken; String type = t.get("type"); if (type != null) s.set("type", type); return s; } /** * Closes any ghost tags floating at the top of the stack. Uses writeTag() * * @throws folioxml.folio.InvalidMarkupException */ public void closeGhosts() throws InvalidMarkupException { SlxToken g; //Close ghost tags while ((g = stack.topGhost()) != null) { boolean isCurrentRecord = input != null && input.matches("record") && input.isClosing(); if (!compatMode || (!isCurrentRecord && !"characterstyle".equalsIgnoreCase(g.get("type")))) warn("Closing tag not found. Inserting closing tag automatically", g); writeTag(makeClosingTag(g)); //Writing the closing tag will remove it from the stack } } public boolean compatMode = true; public void warn(String message) { warn(message, input); } public void warn(String message, SlxToken t) { if (silent) return; System.out.println(message); printToken(t); if (t != input) { System.out.print("Triggered by: "); printToken(input); } } public void printToken(SlxToken t) { System.out.print("{ " + t + " : "); if (t.sourceToken != null && t.sourceToken.info != null) { if (t.sourceToken.info.text != null) System.out.print(t.sourceToken.info.text); System.out.println(); System.out.print(" " + t.sourceToken.info.toString()); } System.out.println(" }"); } }