package org.emile.cirilo.business; import org.emile.cirilo.*; import org.emile.cirilo.business.HashEntry; import java.io.*; import java.util.*; import java.util.regex.*; import java.net.*; import org.apache.log4j.Logger; import org.jdom.Element; import org.jdom.Document; import org.jdom.input.SAXBuilder; import org.jdom.output.Format; import org.jdom.output.XMLOutputter; import org.jdom.xpath.XPath; public class CantusConverter { private static Logger log = Logger.getLogger(CantusConverter.class); public static enum Types { EOT, TYPUS, TIMETERM_1, TIMETERM_2, TEXT, CANTO, NEUME, PAGE, WHITESPACE, DOT, COMMA, PERSON, PLACE, FUNCTION, RASUR, XASUR, RUSUR, MASUR, EOA, EOI, CHOICE, DEL, INS, MARGINAL, COMMENT}; EnumSet<Types> Typus = EnumSet.of(Types.CANTO, Types.NEUME, Types.TYPUS, Types.WHITESPACE); EnumSet<Types> Emendations = EnumSet.of(Types.RASUR, Types.RUSUR, Types.MASUR, Types.XASUR); EnumSet<Types> Entities = EnumSet.of(Types.PERSON, Types.PLACE, Types.FUNCTION, Types.DEL); EnumSet<Types> Description = EnumSet.of(Types.TEXT, Types.DOT, Types.COMMA, Types.WHITESPACE, Types.PAGE); EnumSet<Types> Text = EnumSet.of(Types.PERSON, Types.PLACE, Types.FUNCTION, Types.TEXT, Types.WHITESPACE, Types.TYPUS, Types.DOT); EnumSet<Types> All = EnumSet.of(Types.PERSON, Types.PLACE, Types.FUNCTION, Types.DEL,Types.TEXT, Types.DOT, Types.COMMA, Types.WHITESPACE, Types.PAGE, Types.COMMA, Types.DOT, Types.CANTO, Types.NEUME, Types.TYPUS, Types.CHOICE); private ArrayList<String> TYPES; private org.jdom.Document target; private SAXBuilder builder; private XMLOutputter op; private String lastdiv; private Element currSegment_1; private Element currSegment_2; private Element ins; private String VARIANT = ""; private boolean urtext; public CantusConverter(String VARIANT) { this.VARIANT = VARIANT; this.urtext = this.VARIANT.contains(":"); if (this.urtext) { String v[] = this.VARIANT.split(":"); this.VARIANT = v[1]; } this.readConfig(); } public org.jdom.Document transform(org.jdom.Document tei) { try { target = tei; builder = new SAXBuilder(); target.getRootElement().addNamespaceDeclaration(Common.xmlns_cantus); Format format = Format.getRawFormat(); format.setEncoding("UTF-8"); op = new XMLOutputter(format); XPath xPath = XPath.newInstance("//t:editionStmt/t:edition"); xPath.addNamespace(Common.xmlns_tei_p5); if (!VARIANT.isEmpty()) { Element edition = (Element) xPath.selectSingleNode(target); edition.removeChildren("date",Common.xmlns_tei_p5); Element witDetail = new Element("witDetail",Common.xmlns_tei_p5); witDetail.setAttribute("wit",VARIANT); edition.addContent(witDetail); } xPath = XPath.newInstance("//t:titleStmt"); xPath.addNamespace(Common.xmlns_tei_p5); Element ts = (Element) xPath.selectSingleNode(target); ts.removeChildren("author",Common.xmlns_tei_p5); xPath = XPath.newInstance("//t:body/t:div"); xPath.addNamespace(Common.xmlns_tei_p5); List <Element> divs = xPath.selectNodes(tei); XPath qPath = XPath.newInstance("//t:body"); qPath.addNamespace(Common.xmlns_tei_p5); Element body = (Element) qPath.selectSingleNode(target); body.removeChildren("div", Common.xmlns_tei_p5); body.setAttribute("space","preserve",Common.xmlns_xml); for (Element div: divs) { List<Element> ps = div.getChildren("p", Common.xmlns_tei_p5); String shead = op.outputString( div.getChild("head", Common.xmlns_tei_p5)) .replaceAll("%D::(.*?)%", "ß+$1+ß") .replaceAll("[?]","+") .replaceAll("/!", "/µ") .replaceAll("#", "~") .replaceAll("\\$", "¥") .replaceAll("[|]","§") .replaceAll("<.*?>","") .replaceAll("[\n\r]","") .replaceAll("\\[","{") .replaceAll("\\]","}") .replaceAll(" "," ") .replaceAll(" "," ") .replaceAll(" "," ") .replaceAll(" "," ") .replaceAll(" "," ") .trim(); Element feast = new Element("div",Common.xmlns_ntei_p5); Element head = new Element("head",Common.xmlns_ntei_p5); shead = parseVariant(shead); // head.setText(shead); parseEmendations(head, shead); feast.addContent(head); body.addContent(feast); ArrayList<Element> segments_1 = new ArrayList<Element>(); segments_1.add(new Element("div",Common.xmlns_ntei_p5)); currSegment_1 = segments_1.get(segments_1.size()-1); ArrayList<Element> segments_2 = new ArrayList<Element>(); currSegment_2 = null; ins = null; for (Element p: ps) { String buf = op.outputString(p) .replaceAll("[|]G[|]","Γ") .replaceAll("/!", "/µ") .replaceAll("[?]","+") .replaceAll("[|]","§") .replaceAll("[\n\r]","") .replaceAll("[{]","!") .replaceAll("[}]","|") .replaceAll("%D::(.*?)%", "ß+$1+ß") .replaceAll("#", "~") .replaceAll("<hi rend=\"Person.*?>","Ö") .replaceAll("<hi rend=\"Ort.*?>","Ü") .replaceAll("<hi rend=\"Funktion.*?>","Ä") .replaceAll("<hi rend=\"Neume.*?>","#") .replaceAll("<hi rend=\"Incipit.*?>","{") .replaceAll("<hi rend=\"Time:1\">","Δ") .replaceAll("<hi rend=\"Time:2\">","Θ") .replaceAll(" </hi>","</hi> ") .replaceAll("<hi.*?>","") .replaceAll("</hi>","}") .replaceAll("<.*?>","") .replaceAll("(\\w)\\{","$1 {") .replaceAll("\\{ ","{") .replaceAll(" ([.,])","$1") .replaceAll(" \\}","}") .replaceAll("\\]\\{","] {") .replaceAll("\\[[ ]*\\]","") .replaceAll("[.][ ]*[.]",".") .replaceAll("[.]\\}","}.") // .replaceAll("\\}(\\W)","} $1") .replaceAll("\\} ([.,§$])","}$1") .replaceAll("[.](\\w)",". $1") .replaceAll(" "," ") .replaceAll(" "," ") .replaceAll(" "," ") .replaceAll(" "," ") .replaceAll(" "," ") .replaceAll("§<", "§ <") .replaceAll("\\$", "¥") .replaceAll("\\} #", "\\}#") .replaceAll("\\*", "") .replaceAll("\\[KOMMENTAR\\]", "KOMMENTAR"); log.debug(buf); if (!VARIANT.isEmpty()) { Pattern p0 = Pattern.compile("(//)(.*?)(//)"); Matcher m0 = p0.matcher(buf); StringBuffer sb = new StringBuffer(); while (m0.find()) { String variant = ""; String s = m0.group(); s = s.substring(2,s.length()-1); log.info(s); boolean test = this.urtext && s.contains("µ"); Pattern p1 = Pattern.compile("("+(test ? "µ" : "")+".*?)~(.*?)/"); Matcher m1 = p1.matcher(s); while (m1.find()) { String[] a = m1.group().substring(0, m1.group().length()-1).split("~"); if (test || a[0].contains(VARIANT)) { variant = a[1]; break; } } try { m0.appendReplacement(sb," φ "+ variant+" ƕ "); } catch (Exception qe) { qe.printStackTrace(); } } m0.appendTail(sb); buf = sb.toString(); } ArrayList<String> segs = new ArrayList<String>(); String line = ""; boolean inline = false; for (int i=0;i<buf.length();i++) { if (buf.charAt(i) == '!') { if (!line.trim().isEmpty()) { segs.add(line); line=""; } line = line + buf.charAt(i); inline = true; } else if (buf.charAt(i) == '|') { if (!line.trim().isEmpty()) { segs.add(line); line=""; } inline = false; /* } else if (buf.charAt(i) == '.' && !inline) { line = line + buf.charAt(i); if (!line.trim().isEmpty()) { segs.add(line); line=""; } */ } else { line = line + buf.charAt(i); } } if (!line.trim().isEmpty()) { segs.add(line); } /* Element stream = new Element("quote",Common.xmlns_ntei_p5); for (String s: segs) { Element l = new Element("l",Common.xmlns_ntei_p5); l.setText(s); stream.addContent(l); } currSegment_1.addContent(stream); */ boolean mode = false; Parser parser = new Parser(); lastdiv = null; for (String s: segs) { if (s.contains("!")) { s = s.replaceAll("!",""); } parser.set(s); Types q; while ((q = parser.next()) != parser.types.EOT) { if(q == parser.types.TIMETERM_1) { segments_1.add(new Element("div",Common.xmlns_ntei_p5)); currSegment_1 = segments_1.get(segments_1.size()-1); currSegment_1.setAttribute("type","time:1"); if (parser.getInsertMode()) currSegment_1.setAttribute("subtype","addition"); if (parser.getRasurMode()) currSegment_1.setAttribute("subtype","rasur"); if (parser.getMarginalMode()) currSegment_1.setAttribute("subtype","marginal"); Element ab = new Element("ab",Common.xmlns_ntei_p5); ab.setAttribute("ana","#head"); String bp =parser.getEntity().trim(); if (bp.contains("€")) { String[] pre = bp.split("[€]"); Element seg = new Element("seg",Common.xmlns_ntei_p5); seg.setText(pre[0]); ab.addContent(seg);; Element label = addPage("label",pre[1]); ab.addContent(label); } else { Element label = addPage("label",bp); ab.addContent(label); } lastdiv = ab.getText(); currSegment_1.addContent(ab); segments_2 = new ArrayList<Element>(); currSegment_2 = null; mode = false; continue; } if (q == parser.types.TIMETERM_2) { segments_2.add(new Element("ab",Common.xmlns_ntei_p5)); currSegment_2 = segments_2.get(segments_2.size()-1); currSegment_2.setAttribute("type","time:2"); if (parser.getInsertMode()) currSegment_2.setAttribute("subtype","addition"); if (parser.getRasurMode()) currSegment_2.setAttribute("subtype","rasur"); if (parser.getMarginalMode()) currSegment_2.setAttribute("subtype","marginal"); Element hd = addPage("label",parser.getEntity().trim()); currSegment_2.addContent(hd); currSegment_1.addContent(currSegment_2); mode = true; continue; } if (!mode) { segments_2.add(new Element("ab",Common.xmlns_ntei_p5)); currSegment_2 = segments_2.get(segments_2.size()-1); if (parser.getInsertMode()) currSegment_2.setAttribute("subtype","addition"); if (parser.getRasurMode()) currSegment_2.setAttribute("subtype","rasur"); if (parser.getMarginalMode()) currSegment_2.setAttribute("subtype","marginal"); currSegment_1.addContent(currSegment_2); mode = true; } if (q == parser.types.INS) { ins = new Element("add",Common.xmlns_ntei_p5); if (currSegment_2 == null) currSegment_1.addContent(ins); else currSegment_2.addContent(ins); continue; } if (q == parser.types.EOI) { ins = null; continue; } if (q == parser.types.RUSUR) { Element del = new Element("del",Common.xmlns_ntei_p5); del.setAttribute("type","rasur"); if (ins != null) { ins.addContent(del); } else { if (currSegment_2 == null) currSegment_1.addContent(del); else currSegment_2.addContent(del); } continue; } if (q == parser.types.RASUR || q == parser.types.MASUR | q == parser.types.XASUR) { Element del = new Element("del",Common.xmlns_ntei_p5); del.setAttribute("type","rasur"); if (q == parser.types.RASUR) del.setAttribute("subtype","signingover"); if (q == parser.types.XASUR) del.setAttribute("subtype","deciphered"); if (q == parser.types.MASUR) del.setAttribute("subtype","marginal"); Element add = new Element("add",Common.xmlns_ntei_p5); del.addContent(add); while (All.contains(parser.foresee())) { q= parser.next(); if (Entities.contains(q)) { add = Entities (q, parser, add); } else if (Description.contains(q)) { add = Description(q, parser, add); } else if (q == parser.types.TYPUS) { add = Cantus(q, parser, add); } else if (q == parser.types.CANTO || q == Types.NEUME) { add = Neumes(q, parser, add); } else if (q == parser.types.CHOICE) { add = Choice(q, parser, add); } else if (q == parser.types.PAGE) { add = Page(q, parser, add); } } if (ins != null) { ins.addContent(del); } else { if (currSegment_2 == null) currSegment_1.addContent(del); else currSegment_2.addContent(del); } continue; } if (q == parser.types.MARGINAL ) { Element add = new Element("add",Common.xmlns_ntei_p5); add.setAttribute("type","marginal"); while (All.contains(parser.foresee())) { q= parser.next(); if (Entities.contains(q)) { add = Entities (q, parser, add); } else if (Description.contains(q)) { add = Description(q, parser, add); } else if (q == parser.types.TYPUS) { add = Cantus(q, parser, add); } else if (q == parser.types.CANTO || q == Types.NEUME) { add = Neumes(q, parser, add); } else if (q == parser.types.CHOICE) { add = Choice(q, parser, add); } } if (ins != null) { ins.addContent(add); } else { if (currSegment_2 == null) currSegment_1.addContent(add); else currSegment_2.addContent(add); } continue; } if (q == parser.types.COMMENT ) { Element note = new Element("note",Common.xmlns_ntei_p5); note = Description(q, parser, note); if (ins != null) { ins.addContent(note); } else { if (currSegment_2 == null) currSegment_1.addContent(note); else currSegment_2.addContent(note); } continue; } if (q == parser.types.TYPUS) { ins = Cantus(q, parser, ins); continue; } if (q == parser.types.CANTO || q == Types.NEUME) { ins = Neumes(q, parser, ins); continue; } if (Entities.contains(q)) { ins = Entities(q, parser, ins); continue; } if (Description.contains(q)) { ins = Description(q, parser, ins); continue; } if (q == parser.types.CHOICE) { ins = Choice(q, parser, ins); continue; } if (q == parser.types.PAGE) { ins = Page(q, parser, ins); continue; } } } } for (Element e: segments_1) { if (e.getChildren().size() > 0) feast.addContent(e); } } StringBuffer sb = new StringBuffer(); Pattern p0 = Pattern.compile("φ(.*?)ƕ"); Matcher m0 = p0.matcher(op.outputString(target).replaceAll(" φ ", "φ").replaceAll(" ƕ ", "ƕ")); Document snippet = new Document(); int i = 0; while (m0.find()) { i++; String s = m0.group().replaceAll("φ", "<metamark>").replaceAll("ƕ", "</metamark>"); try { if (m0.group().equals("φƕ")) { s=m0.group().replaceAll("φ", "<metamark function=\"variant\" xml:id=\"V\\."+new Integer(i).toString()+"\" />").replaceAll("ƕ",""); } else { String p =m0.group().replaceAll("φ", "<metamark function=\"variant\" xml:id=\"V\\."+new Integer(i).toString()+"\">").replaceAll("ƕ", "</metamark>").replaceAll("l:",""); snippet = builder.build(new StringReader(p)); s =m0.group().replaceAll("φ", "<metamark function=\"variant\" xml:id=\"V\\."+new Integer(i).toString()+"\">").replaceAll("ƕ", "</metamark>"); } } catch (Exception q) { s=m0.group().replaceAll("φ", "<metamark function=\"variant\" xml:id=\"V\\."+new Integer(i).toString()+"\" />").replaceAll("ƕ", "<metamark corresp=\"#V\\."+new Integer(i).toString()+"\" />"); }; m0.appendReplacement(sb,s); } m0.appendTail(sb); String xml = sb.toString() .replaceAll("¬", "").replaceAll("</ab><ab","</ab> <ab") .replaceAll("ö\\+","<note type=\"supplied\">") .replaceAll("\\+ö","</note>") .replaceAll("ä\\+","<note type=\"supplied\">") .replaceAll("\\+ä","</note>") .replaceAll("<metamark function=\"variant\" xml:id=\"(V.[0-9]+)\"> </metamark>", "<metamark function=\"variant\" xml:id=\"$1\"/>") .replaceAll("<metamark function=\"variant\" xml:id=\"(V.[0-9]+)\"></metamark>", "<metamark function=\"variant\" xml:id=\"$1\"/>") .replaceAll(" <metamark","<metamark") .replaceAll("\\s+"," ") .replaceAll("°",".") .replaceAll("ß\\+(.*?)\\+ß", "<del>$1</del>") .replaceAll("<ab> </ab>","") .replaceAll("Γ","<gap/>") .replaceAll("§(.*?)::(.*?)§","<choice><sic>$1</sic><corr>$2</corr></choice>") .replaceAll(" ,",",") .replaceAll(" \\.","\\.") .replaceAll("<del></del>","") .replaceAll("<del/>","") .replaceAll(" </metamark><metamark function=\"variant\" xml.id=\"(V.[0-9]+)\">(\\.|,)", "</metamark><metamark function=\"variant\" xml:id=\"$1\">$2") .replaceAll(" </metamark><metamark function=\"variant\" xml.id=\"(V.[0-9]+)\"[ ]*/>(\\.|,)", "</metamark><metamark function=\"variant\" xml:id=\"$1\"/>$2") .replaceAll(" </metamark><metamark function=\"variant\" xml.id=\"(V.[0-9]+)\"[ ]*/><metamark function=\"variant\" xml.id=\"(V.[0-9]+)\"[ ]*/>(\\.|,)", "</metamark><metamark function=\"variant\" xml:id=\"$1\"/><metamark function=\"variant\" xml:id=\"$2\"/>$3") .replaceAll(" </metamark><metamark function=\"variant\" xml.id=\"(V.[0-9]+)\"[ ]*/><metamark function=\"variant\" xml.id=\"(V.[0-9]+)\">(\\.|,)", "</metamark><metamark function=\"variant\" xml:id=\"$1\"/><metamark function=\"variant\" xml:id=\"$2\">$3") .replaceAll(" </metamark></l:([A-Z]{2,6})>(\\.|,)","</metamark></l:$1>$2") .replaceAll(" </metamark></l:([A-Z]{2,6})><metamark function=\"variant\" xml.id=\"(V.[0-9]+)\"[ ]*/>(\\.|,)","</metamark></l:$1><metamark function=\"variant\" xml:id=\"$2\"/>$3") .replaceAll(" </metamark></l:([A-Z]{2,6})><metamark function=\"variant\" xml.id=\"(V.[0-9]+)\">(\\.|,)","</metamark></l:$1><metamark function=\"variant\" xml:id=\"$2\">$3") .replaceAll(" </metamark></l:([A-Z]{2,6})><metamark function=\"variant\" xml.id=\"(V.[0-9]+)\"[ ]*/><metamark function=\"variant\" xml.id=\"(V.[0-9]+)\"[ ]*/>(\\.|,)","</metamark></l:$1><metamark function=\"variant\" xml:id=\"$2\"/><metamark function=\"variant\" xml:id=\"$3\"/>$4") .replaceAll(" <note>(.*?)</note> ", "<note>$1</note>") .replaceAll("\\.<", ". <") .replaceAll("(\\.|,)<","$1 <" ) .replaceAll("@", " ") .replaceAll(" </metamark>(\\.|,)","</metamark>$1") .replaceAll("Ä\\+","<note type=\"supplied\">").replaceAll("\\+Ä","</note>") .replaceAll("[+]","<unclear />"); try { target = builder.build(new org.xml.sax.InputSource(new ByteArrayInputStream(xml.getBytes("utf-8")))); } catch (Exception x) { log.info(xml); } xPath = XPath.newInstance("//t:div[preceding-sibling::t:div[1][t:ab/t:add[@type='marginal' and .='']]]"); xPath.addNamespace(Common.xmlns_tei_p5); List<Element> list = xPath.selectNodes(target); for (Element m: list) { m.setAttribute("subtype", "marginal"); } xPath = XPath.newInstance("//t:div[preceding-sibling::t:div[1][t:ab/t:del[@type='rasur' and t:add ='']]]"); xPath.addNamespace(Common.xmlns_tei_p5); list = xPath.selectNodes(target); for (Element m: list) { m.setAttribute("subtype", "rasur"); } xPath = XPath.newInstance("//t:ab/t:add[@type='marginal' and .='']"); xPath.addNamespace(Common.xmlns_tei_p5); list = xPath.selectNodes(target); for (Element m: list) { m.getParent().removeContent(m); } xPath = XPath.newInstance("//t:del[@type='rasur' and t:add ='']"); xPath.addNamespace(Common.xmlns_tei_p5); list = xPath.selectNodes(target); for (Element m: list) { m.getParent().removeContent(m); } xPath = XPath.newInstance("//t:add[@type='marginal' and .='']"); xPath.addNamespace(Common.xmlns_tei_p5); list = xPath.selectNodes(target); for (Element m: list) { m.getParent().removeContent(m); } } catch (Exception e) { log.error(e.getLocalizedMessage(),e); } return target; } private Element Choice(Types q, Parser parser, Element elem) { String sp = parser.getEntity(); int ipos = sp.indexOf("::"); Element choice = new Element("choice",Common.xmlns_ntei_p5); Element orig = new Element("sic",Common.xmlns_ntei_p5); Element corr = new Element("corr",Common.xmlns_ntei_p5); orig.setText(sp.substring(0,ipos)); corr.setText(sp.substring(ipos+2 )); choice.addContent(orig); choice.addContent(corr); if (elem != null) { elem.addContent(choice); } else { if (currSegment_2 == null) currSegment_1.addContent(choice); else currSegment_2.addContent(choice); } return elem; } private Element Cantus(Types q, Parser parser, Element elem) { boolean conj = parser.getConjecture(); Element seg = new Element(parser.getEntity(),Common.xmlns_cantus); if (conj) seg.setAttribute("subtype","supplied"); while(true) { Types x = parser.foresee(); if (Types.WHITESPACE == x || Types.PAGE == x) parser.next(); else break; } while (Typus.contains(parser.foresee())) { q= parser.next(); if (q == Types.NEUME) { Element phr = new Element("phr",Common.xmlns_ntei_p5); phr.setAttribute("type","neume"); parseEmendations(phr, parser.getEntity().trim()); // seg.addContent(phr); } else { parseEmendations(seg, parser.getEntity().trim()); } } if (elem != null) { elem.addContent(seg); } else { if (currSegment_2 == null) currSegment_1.addContent(seg); else currSegment_2.addContent(seg); } return elem; } private Element Entities (Types q, Parser parser, Element elem) { Element seg = addPage("seg",parser.getEntity()); if (q == Types.PERSON) seg.setAttribute("ana", "#person"); else if (q == Types.PLACE) seg.setAttribute("ana", "#place"); else if (q == Types.FUNCTION) seg.setAttribute("ana", "#function"); else if (q == Types.DEL) seg.setAttribute("ana", "#strikethrough"); if (elem != null) { elem.addContent(seg); } else { if (currSegment_2 == null) currSegment_1.addContent(seg); else currSegment_2.addContent(seg); } return elem; } private Element addPage (String name, String content) { Element elem = new Element(name,Common.xmlns_ntei_p5); if (content.contains("(")) { String a[] = content.split("[()]"); elem.addContent(a[0]); Element pb = new Element("pb",Common.xmlns_ntei_p5); pb.setAttribute("n", a[1]); elem.addContent(pb); elem.addContent(a[2]); } else { elem.setText(content); } return elem; } private Element Neumes (Types q, Parser parser, Element elem) { String typ = "NO"; if (lastdiv != null && lastdiv.equals("Officium")) { typ = "IN"; lastdiv = null; } Element seg = new Element(typ,Common.xmlns_cantus); if (parser.getConjecture()) seg.setAttribute("subtype","supplied"); if (q == Types.NEUME) { Element phr = new Element("phr",Common.xmlns_ntei_p5); phr.setAttribute("type","neume"); parseEmendations(phr, parser.getEntity().trim()); seg.addContent(phr); } else { parseEmendations(seg, parser.getEntity().trim()); } if (elem != null) { elem.addContent(seg); } else { if (currSegment_2 == null) currSegment_1.addContent(seg); else currSegment_2.addContent(seg); } return elem; } private Element Page(Types q, Parser parser, Element elem) { Element pb = new Element("pb",Common.xmlns_ntei_p5); pb.setAttribute("n", parser.getEntity()); if (elem != null) { elem.addContent(pb); } else { if (currSegment_2 == null) currSegment_1.addContent(pb); else currSegment_2.addContent(pb); } return elem; } private Element Description (Types q, Parser parser, Element elem) { while (true) { if (q == Types.PAGE) { Element pb = new Element("pb",Common.xmlns_ntei_p5); pb.setAttribute("n",parser.getEntity()); if (elem != null) { elem.addContent(pb); } else { if (currSegment_2 == null) currSegment_1.addContent(pb); else currSegment_2.addContent(pb); } if (parser.foresee() == Types.WHITESPACE) parser.next(); } else { if (elem != null) { elem.addContent(parser.getEntity()); } else { if (currSegment_2 == null) currSegment_1.addContent(parser.getEntity()); else currSegment_2.addContent(parser.getEntity()); } } if (!Description.contains(parser.foresee())) break; q= parser.next(); } return elem; } private void parseEmendations(Element seg, String s) { int bp = 0; String ch; String buf =""; while (bp < s.length()) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals("§")) { if (!buf.isEmpty()) { seg.addContent(buf); buf = ""; } String sp = ""; if (s.substring(bp).startsWith("R§")) { bp+=2; Element del = new Element("del",Common.xmlns_ntei_p5); del.setAttribute("type","rasur"); seg.addContent(del); } else if (s.substring(bp).startsWith("R::")) { bp+=3; while (true) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals("§") || bp > s.length()-1) { break; }; sp+=ch; } if (!sp.isEmpty()) { Element del = new Element("del",Common.xmlns_ntei_p5); Element add = new Element("add",Common.xmlns_ntei_p5); del.setAttribute("type","rasur"); del.setAttribute("subtype","signingover"); add.setText(sp.trim()); del.addContent(add); seg.addContent(del); } } else if (s.substring(bp).startsWith("RM::")) { bp+=4; while (true) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals("§") || bp > s.length()-1) { break; }; sp+=ch; } if (!sp.isEmpty()) { Element del = new Element("del",Common.xmlns_ntei_p5); Element add = new Element("add",Common.xmlns_ntei_p5); del.setAttribute("type","rasur"); add.setAttribute("subtype","marginal"); add.setText(sp.trim()); del.addContent(add); seg.addContent(del); } } else if (s.substring(bp).startsWith("RE::")) { bp+=4; while (true) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals("§") || bp > s.length()-1) { break; }; sp+=ch; } if (!sp.isEmpty()) { Element del = new Element("del",Common.xmlns_ntei_p5); Element add = new Element("add",Common.xmlns_ntei_p5); del.setAttribute("type","rasur"); add.setAttribute("subtype","deciphered"); add.setText(sp.trim()); del.addContent(add); seg.addContent(del); } } else if (s.substring(bp).startsWith("M::")) { bp+=3; while (true) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals("§") || bp > s.length()-1) { break; }; sp+=ch; } if (!sp.isEmpty()) { Element add = new Element("add",Common.xmlns_ntei_p5); add.setAttribute("type","marginal"); add.setText(sp.trim()); seg.addContent(add); } } else if (s.substring(bp).startsWith("I::")) { bp+=3; while (true) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals("§") || bp > s.length()-1) { break; }; sp+=ch; } if (!sp.isEmpty()) { Element note = new Element("note",Common.xmlns_ntei_p5); note.setText(sp.trim()); seg.addContent(note); } } else { while (true) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals("§") || bp > s.length()-1) { break; }; sp+=ch; } if (sp.contains("::")) { int ipos = sp.indexOf("::"); Element choice = new Element("choice",Common.xmlns_ntei_p5); Element orig = new Element("sic",Common.xmlns_ntei_p5); Element corr = new Element("corr",Common.xmlns_ntei_p5); orig.setText(sp.substring(0,ipos)); corr.setText(sp.substring(ipos+2 )); choice.addContent(orig); choice.addContent(corr); seg.addContent(choice); } } } else if (ch.equals("(")) { if (!buf.isEmpty()) { seg.addContent(buf); buf = ""; } String sp = ""; while (true) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals(")") || bp > s.length()-1) break; sp+=ch; } Element pb = new Element("pb",Common.xmlns_ntei_p5); pb.setAttribute("n",sp); seg.addContent(pb); } else if (ch.equals("{")) { if (!buf.isEmpty()) { seg.addContent(buf); buf = ""; } String sp = ""; while (true) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals("}") || bp > s.length()-1) break; sp+=ch; } Element note = new Element("note",Common.xmlns_ntei_p5); note.setText(sp); note.setAttribute("type","supplied"); seg.addContent(note); } else if (ch.equals("[")) { if (!buf.isEmpty()) { seg.addContent(buf); buf = ""; } String sp = ""; while (true) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals("]") || bp > s.length()-1) break; sp+=ch; } Element ins = new Element("ins",Common.xmlns_ntei_p5); ins.setText(sp); seg.addContent(ins); } else if (ch.equals("¥")) { if (!buf.isEmpty()) { seg.addContent(buf); buf = ""; } String sp = ""; bp+=3; while (true) { ch = String.valueOf(s.charAt(bp++)); if (ch.equals("¥") || bp > s.length()-1) break; sp+=ch; } Element add = new Element("add",Common.xmlns_ntei_p5); add.setText(sp); seg.addContent(add); } else { buf+=ch; } } if (!buf.isEmpty()) { seg.addContent(buf); buf = ""; } } private void readConfig() { boolean times_1 = false, times_2 = false, types = false; TYPES = new ArrayList<String>(); try { String homeDir = new File(System.getProperty("user.home")).getAbsolutePath(); File config = new File(homeDir+System.getProperty("file.separator")+"cantus.conf"); InputStream is = null; if (config.exists()) { is = new FileInputStream(config); } else { is = Cirilo.class.getResourceAsStream("cantus.conf"); } BufferedReader in = new BufferedReader(new InputStreamReader(is)); String line; while ((line = in.readLine()) != null) { if (line.equals("%Types")) {times_1=false; times_2=false; types=true; continue;} if (types) { TYPES.add(line.trim()); TYPES.add("["+line.trim()+"]"); TYPES.add("_"+line.trim()); TYPES.add("[_"+line.trim()+"]"); } } in.close(); } catch (Exception e) { log.error(e.getLocalizedMessage(),e); } } private String parseVariant(String buf) { if (!VARIANT.isEmpty()) { Pattern p0 = Pattern.compile("(//)(.*?)(//)"); Matcher m0 = p0.matcher(buf); StringBuffer sb = new StringBuffer(); while (m0.find()) { String variant = ""; String s = m0.group(); s = s.substring(2,s.length()-1); log.info(s); boolean test = this.urtext && s.contains("µ"); Pattern p1 = Pattern.compile("("+(test ? "µ" : "")+".*?)~(.*?)/"); Matcher m1 = p1.matcher(s); while (m1.find()) { String[] a = m1.group().substring(0, m1.group().length()-1).split("~"); if (test || a[0].contains(VARIANT)) { variant = a[1]; break; } } try { m0.appendReplacement(sb," φ "+ variant+ " ƕ "); } catch (Exception e) { e.printStackTrace(); } } m0.appendTail(sb); buf = sb.toString(); } return buf; } private String getCanto(String s) { String id = ""; /* id = "|cao:nul"; try { if (s.contains(" ")) s+="*"; String canto = CANTOS.get(s); if (canto == null) { URL url = new URL("http://glyph.uni-graz.at/cocoon/cantus/get?incipit="+s); InputStream in = getInputStream(url); Document cao = builder.build(in); XPath xpath = XPath.newInstance("//c:cantus/c:id"); xpath.addNamespace( Common.xmlns_cantus ); List ids = (List) xpath.selectNodes(cao); if (ids.size() > 0) { id ="|cao:"; for (Iterator iter = ids.iterator(); iter.hasNext();) { try { Element oid = (Element) iter.next(); id+=oid.getText()+";"; } catch (Exception q) {} } CANTOS.put(s,id); } } else { id = canto; } } catch (Exception e) { log.error(e.getLocalizedMessage(),e); } */ return id; } private static InputStream getInputStream(URL url) { InputStream in = null; try { HttpURLConnection con = (HttpURLConnection) url.openConnection(); con.setRequestMethod("GET"); con.connect(); in = con.getInputStream(); } catch (IOException e) { log.error(e.getLocalizedMessage(),e); } return in; } private String trunc(Types t) { String s = t.toString(); return "|"+s.substring(0,2); } private class Parser { public Types types; public static final String SEPARATOR = " ,.$¥§(%~¦¬"; private String buf; private String entity; private int bp; private boolean conjecture; private boolean addition; private boolean rasur; private boolean marginal; public Parser() { } public void set( String s) { addition = false; rasur = false; marginal = false; if (s.startsWith("¥E¥")) { addition = true; s = s.substring(3); } if (s.startsWith("§M::") && s.trim().endsWith("§")) { marginal = true; s = s.substring(4,s.length()-1); } if (s.startsWith("§R::") && s.trim().endsWith("§")) { rasur = true; s = s.substring(4,s.length()-1); } buf = s; bp = 0; } public Types next() { String ch; entity = ""; if (bp > buf.length()-1) return types.EOT; ch = String.valueOf(buf.charAt(bp++)); conjecture = false; if (ch.equals(".") || ch.equals("¬")) { entity = ch; return log(types.DOT); } else if (ch.equals(",")) { entity = ","; return log(types.COMMA); } else if (ch.equals(" ")) { entity = " "; return log(types.WHITESPACE); } else if (ch.equals("{")) { while (true) { ch = String.valueOf(buf.charAt(bp++)); if (ch.equals("}") || bp > buf.length()-1) break; entity+=ch; } return log(types.CANTO); } else if (ch.equals("¥")) { if (buf.substring(bp).startsWith("E::")) { bp+=3; return log(Types.INS); } return log(Types.EOI); } else if (ch.equals("§")) { if (buf.substring(bp).startsWith("R§")) { bp+=2; return log(types.RUSUR); } else if (buf.substring(bp).startsWith("R::")) { bp+=3; return log(types.RASUR); } else if (buf.substring(bp).startsWith("RM::")) { bp+=4; return log(types.MASUR); } else if (buf.substring(bp).startsWith("RE::")) { bp+=4; return log(types.XASUR); } else if (buf.substring(bp).startsWith("M::")) { bp+=3; return log(types.MARGINAL); } else if (buf.substring(bp).startsWith("I::")) { bp+=3; return log(types.COMMENT); } else { int cp = bp; while (true) { if (bp > buf.length()-1) break; ch = String.valueOf(buf.charAt(bp++)); if (ch.equals("§")) break; if (ch.equals("¥")) { bp = cp; // +1; return log(types.EOA); } entity+=ch; } if (entity.contains("::")) { return log(types.CHOICE); } else { bp = cp; // +1; return log(types.EOA); } } } else if (ch.equals("(")) { while (true) { ch = String.valueOf(buf.charAt(bp++)); if (ch.equals(")") || bp > buf.length()-1) break; entity+=ch; } return log(types.PAGE); } else if (ch.equals("#")) { while (true) { ch = String.valueOf(buf.charAt(bp++)); if (ch.equals("}") || bp > buf.length()-1) break; entity+=ch; } return log(types.NEUME); } else if (ch.equals("ü")) { while (true) { ch = String.valueOf(buf.charAt(bp++)); if (ch.equals("}") || bp > buf.length()-1) break; entity+=ch; } return log(types.DEL); } else if (ch.equals("Ö")) { while (true) { ch = String.valueOf(buf.charAt(bp++)); if (ch.equals("}") || bp > buf.length()-1) break; entity+=ch; } return log(types.PERSON); } else if (ch.equals("Ä")) { while (true) { ch = String.valueOf(buf.charAt(bp++)); if (ch.equals("}") || bp > buf.length()) break; entity+=ch; } return log(types.FUNCTION); } else if (ch.equals("Ü")) { while (true) { ch = String.valueOf(buf.charAt(bp++)); if (ch.equals("}") || bp > buf.length()-1) break; entity+=ch; } return log(types.PLACE); } else if (ch.equals("Ü")) { while (true) { ch = String.valueOf(buf.charAt(bp++)); if (ch.equals("}") || bp > buf.length()-1) break; entity+=ch; } return log(types.PLACE); } else if (ch.equals("Δ")) { while (true) { ch = String.valueOf(buf.charAt(bp++)); if (ch.equals("[")) conjecture = true; if (ch.equals("}") || bp > buf.length()-1) break; entity+=ch; } entity = entity.replaceAll("\\[","ä+").replaceAll("\\]","+ä"); return log(types.TIMETERM_1); } else if (ch.equals("Θ")) { while (true) { ch = String.valueOf(buf.charAt(bp++)); if (ch.equals("}") || bp > buf.length()-1) break; entity+=ch; } entity = entity.replaceAll("\\[","ä+").replaceAll("\\]","+ä"); return log(types.TIMETERM_2); } else { int cp = bp; while (true) { if (SEPARATOR.contains(ch) || bp > buf.length()-1) { break; }; if (ch.equals("[")) conjecture = true; entity+=ch; ch = String.valueOf(buf.charAt(bp++)); } if( (SEPARATOR.contains(ch) && bp <= buf.length()) || bp <= buf.length()-1 ) { bp--;} else {entity+=ch;} if (TYPES.contains(entity)) { return log(types.TYPUS); } entity = entity.replaceAll("\\[","ö+").replaceAll("\\]","+ö"); return log(types.TEXT); } } private Types log (Types q) { // System.out.println(q+"!"+entity); return q; } public Types foresee() { int tbp = bp; Types result = next(); bp = tbp; return result; } public boolean getInsertMode() { boolean mode = addition; addition = false; return mode; // } // public boolean getRasurMode() { boolean mode = rasur; return mode; // } // public boolean getMarginalMode() { boolean mode = marginal; return mode; // } // public boolean getConjecture() { return conjecture; } public String getEntity() { return entity.replaceAll("\\[","").replaceAll("\\]",""); } } }