package com.cognitionis.timeml_basickit; import com.cognitionis.timeml_basickit.comparators.AscStringTimexMapComparator; import java.io.*; import java.util.*; import java.util.regex.*; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import com.cognitionis.utils_basickit.FileUtils; import java.text.SimpleDateFormat; /** * Class with static methods to handle a TimeML file and convert it o a Java Object of TimML class * @author Héctor Llorens * @since 2011 */ public class TML_file_utils { /** * Read a tml file and return a TML object * * Print stats after reading * * @param tmlfile * @return */ public static TimeML ReadTml2Object(String tmlfile) { TimeML tml_object = null; Timex dctTimex = null; HashMap<String, Timex> timexes = new HashMap<String, Timex>(); HashMap<String, Event> events = new HashMap<String, Event>(); HashMap<String, Event> makeinstances = new HashMap<String, Event>(); ArrayList<Link> links = new ArrayList<Link>(); try { File file = new File(tmlfile); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(file); doc.getDocumentElement().normalize(); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { System.out.println("Root element " + doc.getDocumentElement().getNodeName()); } Element dct = ((Element) ((NodeList) ((Element) doc.getElementsByTagName("DCT").item(0)).getElementsByTagName("TIMEX3")).item(0)); if (dct != null) { dctTimex = new Timex(dct.getAttribute("tid"), dct.getTextContent(), dct.getAttribute("type"), dct.getAttribute("value"), tmlfile, -1, -1, true); // probably add to timxes.... if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { System.out.println("DCT: " + dct.getAttribute("tid") + " " + dct.getAttribute("value")); } } NodeList text = doc.getElementsByTagName("TEXT"); String current_tag = ""; // TEXT if (text.getLength() > 1) { throw new Exception("More than one TEXT tag found."); } Element TextElmnt = (Element) text.item(0); // If not ELEMENT NODE will throw exception //load everything and make sure it is properly linked current_tag = "TIMEX3"; NodeList current_node = TextElmnt.getElementsByTagName(current_tag); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { System.out.println("There's " + current_node.getLength() + " " + current_tag + "."); } for (int s = 0; s < current_node.getLength(); s++) { Element element = (Element) current_node.item(s); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { System.out.println(element.getNodeName() + "(" + element.getAttribute("tid") + ", " + element.getAttribute("type") + ", " + element.getAttribute("value") + "): " + element.getTextContent()); } timexes.put(element.getAttribute("tid"), new Timex(element.getAttribute("tid"), element.getTextContent(), element.getAttribute("type"), element.getAttribute("value"), tmlfile, -1, -1)); } current_tag = "EVENT"; current_node = TextElmnt.getElementsByTagName(current_tag); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { System.out.println("There's " + current_node.getLength() + " " + current_tag + "."); } for (int s = 0; s < current_node.getLength(); s++) { Element element = (Element) current_node.item(s); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { System.out.println(element.getNodeName() + "(" + element.getAttribute("eid") + ", " + element.getAttribute("class") + "): " + element.getTextContent()); } events.put(element.getAttribute("tid"), new Event(dct.getAttribute("eid"), dct.getTextContent(), dct.getAttribute("class"), tmlfile, -1, -1)); } current_tag = "MAKEINSTANCE"; current_node = doc.getElementsByTagName(current_tag); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { System.out.println("There's " + current_node.getLength() + " " + current_tag + "."); } for (int s = 0; s < current_node.getLength(); s++) { Element element = (Element) current_node.item(s); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { System.out.println(element.getNodeName() + "(" + element.getAttribute("eiid") + ", " + element.getAttribute("eventID") + ")"); } Event auxe = new Event(dct.getAttribute("eventID"), dct.getTextContent(), dct.getAttribute("class"), tmlfile, -1, -1); auxe.set_eiid(element.getAttribute("eiid")); makeinstances.put(element.getAttribute("eiid"), auxe); } current_tag = "TLINK"; current_node = doc.getElementsByTagName(current_tag); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { System.out.println("There's " + current_node.getLength() + " " + current_tag + "."); } for (int s = 0; s < current_node.getLength(); s++) { Element element = (Element) current_node.item(s); String relType = element.getAttribute("relType"); String linkType = "unkonwn"; if (relType.matches("(DURING|DURING_INV|IDENTITY)")) { relType = "SIMULTANEOUS"; } String entity1 = null; String entity2 = null; // event-event if (element.hasAttribute("eventInstanceID") && element.hasAttribute("relatedToEventInstance")) { linkType = "tlink-event-event"; entity1 = element.getAttribute("eventInstanceID"); entity2 = element.getAttribute("relatedToEventInstance"); // Order by id (for normalization) if (Integer.parseInt(entity1.substring(2)) > Integer.parseInt(entity2.substring(2))) { entity1 = entity2; entity2 = element.getAttribute("eventInstanceID"); relType = Link.reverseRelationCategory(relType); } } // event-time if (element.hasAttribute("eventInstanceID") && element.hasAttribute("relatedToTime")) { linkType = "tlink-event-timex"; entity1 = element.getAttribute("eventInstanceID"); entity2 = element.getAttribute("relatedToTime"); } if (element.hasAttribute("timeID") && element.hasAttribute("relatedToEventInstance")) { linkType = "tlink-event-timex"; entity1 = element.getAttribute("relatedToEventInstance"); entity2 = element.getAttribute("timeID"); relType = Link.reverseRelationCategory(relType); } // time-time if (element.hasAttribute("timeID") && element.hasAttribute("relatedToTime")) { linkType = "tlink-timex-timex"; entity1 = element.getAttribute("timeID"); entity2 = element.getAttribute("relatedToTime"); // Order by id (for normalization) if (Integer.parseInt(entity1.substring(1)) > Integer.parseInt(entity2.substring(1))) { entity1 = entity2; entity2 = element.getAttribute("timeID"); relType = Link.reverseRelationCategory(relType); } } if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { System.out.println(element.getNodeName() + "(" + element.getAttribute("lid") + ", " + linkType + ", " + entity1 + ", " + entity2 + ", " + relType + ")"); } links.add(new Link(element.getAttribute("lid"), linkType, relType, entity1, entity2, tmlfile)); } tml_object = new TimeML(dctTimex, timexes, events, events, links); } catch (Exception e) { System.err.println("Errors found (TML_file_utils):\n\t" + e.toString() + "\n"); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); } return null; } return tml_object; } /** * Converts a TimeML 1.2 file into a inline ISO-TimeML file * * @param tmlfile * @return */ public static String TML2ISOTML(String tmlfile) { String outputfile = null; try { String filecontents = null; File file = new File(tmlfile); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(file); doc.getDocumentElement().normalize(); //System.out.println("Root element " + doc.getDocumentElement().getNodeName()); String current_tag = ""; filecontents = FileUtils.readFileAsString(tmlfile, "UTF-8"); HashMap<String, String> instancedevents = new HashMap<String, String>(); current_tag = "MAKEINSTANCE"; NodeList makeinstances = doc.getElementsByTagName(current_tag); System.out.println("There's " + makeinstances.getLength() + " " + current_tag + "."); for (int s = 0; s < makeinstances.getLength(); s++) { Element element = (Element) makeinstances.item(s); if (!instancedevents.containsKey(element.getAttribute("eventID"))) { System.out.println("Removing: " + element.getNodeName() + "(" + element.getAttribute("eiid") + ", " + element.getAttribute("eventID") + ")"); instancedevents.put(element.getAttribute("eventID"), "ok"); String addition = " eiid=\"" + element.getAttribute("eiid") + "\""; if (!element.getAttribute("pos").equals("")) { addition += " pos=\"" + element.getAttribute("pos") + "\""; } if (!element.getAttribute("tense").equals("")) { addition += " tense=\"" + element.getAttribute("tense") + "\""; } if (!element.getAttribute("aspect").equals("")) { addition += " aspect=\"" + element.getAttribute("aspect") + "\""; } if (!element.getAttribute("polarity").equals("")) { addition += " polarity=\"" + element.getAttribute("polarity") + "\""; } if (!element.getAttribute("modality").equals("")) { addition += " modality=\"" + element.getAttribute("modality") + "\""; } filecontents = filecontents.replaceFirst("(<EVENT [^>]*eid=\"" + element.getAttribute("eventID") + "\"[^>]*)>", "$1" + addition + ">"); filecontents = filecontents.replaceFirst("<MAKEINSTANCE ([^/]*eiid=\"" + element.getAttribute("eiid") + "\"[^/]*)[/][^>]*>[ \t]*\n", ""); } else { System.out.println("Replacing: " + element.getNodeName() + "(" + element.getAttribute("eiid") + ", " + element.getAttribute("eventID") + ")"); // add class from event Pattern MY_PATTERN = Pattern.compile("<EVENT [^>]*eid=\"" + element.getAttribute("eventID") + "\"[^>]*>"); Matcher m = MY_PATTERN.matcher(filecontents); m.find(); String eclass = m.group(0).replaceFirst(".*class=\"([^\"]+)\".*", "$1"); filecontents = filecontents.replaceFirst("<MAKEINSTANCE ([^/]*eiid=\"" + element.getAttribute("eiid") + "\"[^/]*)/[^>]*>", "<EVENT $1 class=\"" + eclass + "\" />"); } } filecontents = filecontents.replaceAll("eventID=\"", "eid=\""); if (filecontents.contains("MAKEINSTANCE")) { throw new Exception("Some makeinstances have not been replaced..." + tmlfile); } BufferedWriter outfile = null; try { outputfile = tmlfile + ".isotml"; outfile = new BufferedWriter(new FileWriter(outputfile)); outfile.write(filecontents); } finally { if (outfile != null) { outfile.close(); } } } catch (Exception e) { System.err.println("Errors found (TML_file_utils):\n\t" + e.toString() + "\n"); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); } return null; } return outputfile; } /** * Converts a TimeML 1.2 file into a inline ISO-TimeML file * * @param tmlfile * @return */ public static String ISOTML2TML(String tmlfile) { String outputfile = null; try { String filecontents = null; File file = new File(tmlfile); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(file); doc.getDocumentElement().normalize(); //System.out.println("Root element " + doc.getDocumentElement().getNodeName()); NodeList text = doc.getElementsByTagName("TEXT"); String current_tag = ""; Element TextElmnt = (Element) text.item(0); // If not ELEMENT NODE will throw exception //load everything and make sure it is properly linked filecontents = FileUtils.readFileAsString(tmlfile, "UTF-8"); String makeinstances2add = ""; current_tag = "EVENT"; NodeList textevents = TextElmnt.getElementsByTagName(current_tag); System.out.println("There's " + textevents.getLength() + " " + current_tag + "."); for (int s = 0; s < textevents.getLength(); s++) { Element element = (Element) textevents.item(s); makeinstances2add += "\n<MAKEINSTANCE eventID=\"" + element.getAttribute("eid") + "\" eiid=\"" + element.getAttribute("eiid") + "\""; if (!element.getAttribute("pos").equals("")) { makeinstances2add += " pos=\"" + element.getAttribute("pos") + "\""; } if (!element.getAttribute("tense").equals("")) { makeinstances2add += " tense=\"" + element.getAttribute("tense") + "\""; } if (!element.getAttribute("aspect").equals("")) { makeinstances2add += " aspect=\"" + element.getAttribute("aspect") + "\""; } if (!element.getAttribute("polarity").equals("")) { makeinstances2add += " polarity=\"" + element.getAttribute("polarity") + "\""; } if (!element.getAttribute("modality").equals("")) { makeinstances2add += " modality=\"" + element.getAttribute("modality") + "\""; } makeinstances2add += " />"; filecontents = filecontents.replaceFirst("(<EVENT [^>]*eiid=\"" + element.getAttribute("eiid") + "\"[^>]*)>", "<EVENT eid=\"" + element.getAttribute("eid") + "\" class=\"" + element.getAttribute("class") + "\">"); } filecontents = filecontents.replaceAll("<EVENT ([^/]* /)", "<MAKEINSTANCE $1"); filecontents = filecontents.replaceAll("<MAKEINSTANCE ([^/]*)eid=([^/]*/)", "<MAKEINSTANCE $1eventID=$2"); filecontents = filecontents.replaceFirst("</TEXT>", "</TEXT>\n" + makeinstances2add); BufferedWriter outfile = null; try { outputfile = tmlfile + ".tml"; outfile = new BufferedWriter(new FileWriter(outputfile)); outfile.write(filecontents); } finally { if (outfile != null) { outfile.close(); } } } catch (Exception e) { System.err.println("Errors found (TML_file_utils):\n\t" + e.toString() + "\n"); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); } return null; } return outputfile; } /** * Converts a TimeML 1.2 file into a non-tagged TE3 TimeML input * get TE3-input, TE3input from tml * * @param tmlfile * @return */ public static String TML2TE3(String tmlfile) { String outputfile = null; try { String line; boolean textfound = false; String header = ""; String footer = ""; String text = ""; //process header (and dct)/text/footer outputfile = tmlfile + ".TE3input"; BufferedWriter te3writer = new BufferedWriter(new FileWriter(new File(outputfile))); BufferedReader TE3inputReader = new BufferedReader(new FileReader(new File(tmlfile))); try { // read out header while ((line = TE3inputReader.readLine()) != null) { if (line.length() > 0) { // break on TEXT if (line.matches(".*<TEXT>.*")) { textfound = true; break; } } header += line + "\n"; } if (!textfound) { throw new Exception("Premature end of file (" + tmlfile + ")"); } // read out text while ((line = TE3inputReader.readLine()) != null) { if (line.length() > 0) { // break on TEXT if (line.matches(".*</TEXT>.*")) { textfound = false; break; } } text += line.replaceAll("<[^>]*>", "") + "\n"; } if (textfound) { throw new Exception("Premature end of file (" + tmlfile + ")"); } // read out footer while ((line = TE3inputReader.readLine()) != null) { line = line.replaceAll("<(!--|[TSA]LINK|MAKEINSTANCE)[^>]*>", "").trim(); if (line.length() > 0) { footer += line + "\n"; } } te3writer.write(header + "\n"); te3writer.write("\n<TEXT>\n" + text + "</TEXT>\n"); te3writer.write(footer + "\n"); System.err.println("Processing file: " + tmlfile); } finally { if (TE3inputReader != null) { TE3inputReader.close(); } if (te3writer != null) { te3writer.close(); } } } catch (Exception e) { System.err.println("Errors found (TML_file_utils):\n\t" + e.toString() + "\n"); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); } return null; } return outputfile; } /** * Converts a TimeML 1.2 file into a non-taged links TimeML input (only entities) * * @param tmlfile * @return */ public static String TML2onlyEntities(String tmlfile) { String outputfile = null; try { String line; boolean textfound = false; String header = ""; String footer = ""; String text = ""; //process header (and dct)/text/footer outputfile = tmlfile + ".TE3input"; // same extension but it contains tags BufferedWriter te3writer = new BufferedWriter(new FileWriter(new File(outputfile))); BufferedReader TE3inputReader = new BufferedReader(new FileReader(new File(tmlfile))); try { // read out header while ((line = TE3inputReader.readLine()) != null) { if (line.length() > 0) { // break on TEXT if (line.matches(".*<TEXT>.*")) { textfound = true; break; } } header += line + "\n"; } if (!textfound) { throw new Exception("Premature end of file (" + tmlfile + ")"); } // read out text while ((line = TE3inputReader.readLine()) != null) { if (line.length() > 0) { // break on TEXT if (line.matches(".*</TEXT>.*")) { textfound = false; break; } } text += line + "\n"; } if (textfound) { throw new Exception("Premature end of file (" + tmlfile + ")"); } // read out footer while ((line = TE3inputReader.readLine()) != null) { line = line.replaceAll("<(!--|[TSA]LINK|MAKEINSTANCE)[^>]*>", "").trim(); if (line.length() > 0) { footer += line + "\n"; } } te3writer.write(header + "\n"); te3writer.write("\n<TEXT>\n" + text + "</TEXT>\n"); te3writer.write(footer + "\n"); System.err.println("Processing file: " + tmlfile); } finally { if (TE3inputReader != null) { TE3inputReader.close(); } if (te3writer != null) { te3writer.close(); } } } catch (Exception e) { System.err.println("Errors found (TML_file_utils):\n\t" + e.toString() + "\n"); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); } return null; } return outputfile; } /** * Given a TimeML file, adds the implicit tref-tref links. * It searches for explicit dates and add their timex-timex implicit * temporal relations creating a time backbone of the document. * This trust the timex date values over other annotated information. * (removes original links if explicitly incongruent??) * IN REALITY THIS DOES NOT USE A TIMEGRAPH SO CONSISTENCY IS NOT CHECKED... * * @param tmlfile * @return */ public static String TML_add_tref_tref_links(String tmlfile) { String outputfile = null; ArrayList<Link> tref_tref_links = new ArrayList<Link>(); ArrayList<Link> original_links = new ArrayList<Link>(); HashMap<String, Timex> refs = new HashMap<String, Timex>(); try { String filecontents = null; File file = new File(tmlfile); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); //System.out.println("PARSING "+file); Document doc = db.parse(file); doc.getDocumentElement().normalize(); Timex dctTimex = null; Element dct = ((Element) ((NodeList) ((Element) doc.getElementsByTagName("DCT").item(0)).getElementsByTagName("TIMEX3")).item(0)); if (dct != null) { dctTimex = new Timex(dct.getAttribute("tid"), dct.getTextContent(), dct.getAttribute("type"), dct.getAttribute("value"), tmlfile, -1, -1, true); } else { throw new Exception("NO DCT FOUND: " + tmlfile); } refs.put(dctTimex.get_id(), dctTimex); //System.out.println("Root element " + doc.getDocumentElement().getNodeName()); NodeList text = doc.getElementsByTagName("TEXT"); String current_tag = ""; Element TextElmnt = (Element) text.item(0); // If not ELEMENT NODE will throw exception filecontents = FileUtils.readFileAsString(tmlfile, "UTF-8"); // Get reference Timexes current_tag = "TIMEX3"; NodeList texttimexes = TextElmnt.getElementsByTagName(current_tag); for (int s = 0; s < texttimexes.getLength(); s++) { Element element = (Element) texttimexes.item(s); if (element.getAttribute("type").matches("(?:DATE|TIME)")) { // omit weeks, season, quarters, half, trimesters... if (element.getAttribute("value").matches("[0-9]+.*") && !element.getAttribute("value").matches(".*-(W|FA|WI|SP|SU|Q|T|H).*")) { Timex reftimex = new Timex(element.getAttribute("tid"), element.getTextContent(), element.getAttribute("type"), element.getAttribute("value"), file.getName(), -1, -1); if (reftimex == null || reftimex.get_date() == null) { System.err.println("Omitted ref timex: " + reftimex.get_value()); } else { refs.put(element.getAttribute("tid"), reftimex); } } } } NodeList current_node = doc.getElementsByTagName("TLINK"); int count = (refs.size() * 2) - 1; // original links id starts with n-1 (there is no l0). why? probably that is too much... something safe anyway for (int s = 0; s < current_node.getLength(); s++) { count++; Element element = (Element) current_node.item(s); String relType = element.getAttribute("relType"); String linkType = "unkonwn"; String entity1 = null; String entity2 = null; // event-event if (element.hasAttribute("eventInstanceID") && element.hasAttribute("relatedToEventInstance")) { linkType = "tlink-event-event"; entity1 = element.getAttribute("eventInstanceID"); entity2 = element.getAttribute("relatedToEventInstance"); // Order by id (for normalization) if (Integer.parseInt(entity1.substring(2)) > Integer.parseInt(entity2.substring(2))) { entity1 = entity2; entity2 = element.getAttribute("eventInstanceID"); relType = Link.reverseRelationCategory(relType); } } // event-time if (element.hasAttribute("eventInstanceID") && element.hasAttribute("relatedToTime")) { linkType = "tlink-event-timex"; entity1 = element.getAttribute("eventInstanceID"); entity2 = element.getAttribute("relatedToTime"); } if (element.hasAttribute("timeID") && element.hasAttribute("relatedToEventInstance")) { linkType = "tlink-event-timex"; entity1 = element.getAttribute("relatedToEventInstance"); entity2 = element.getAttribute("timeID"); relType = Link.reverseRelationCategory(relType); } // time-time if (element.hasAttribute("timeID") && element.hasAttribute("relatedToTime")) { linkType = "tlink-timex-timex"; entity1 = element.getAttribute("timeID"); entity2 = element.getAttribute("relatedToTime"); // ommit links that will be already introduced in tref-tref chain if (refs.containsKey(entity1) || refs.containsKey(entity2)) { continue; } // Order by id (for normalization) if (Integer.parseInt(entity1.substring(1)) > Integer.parseInt(entity2.substring(1))) { entity1 = entity2; entity2 = element.getAttribute("timeID"); relType = Link.reverseRelationCategory(relType); } } original_links.add(new Link("l" + count, linkType, relType, entity1, entity2, tmlfile)); } //System.out.println(refs); // this could be more sophisticated is uses lower bound to sort first TreeMap<String, Timex> sorted_refs = new TreeMap(new AscStringTimexMapComparator(refs)); sorted_refs.putAll(refs); // build the new links over sorted refs, for each timex ref there is one new relation... right? String improved_links = ""; Timex last_timex = null; Timex last_includer_timex = null; count = 0; for (Timex t : sorted_refs.values()) { // includer if (last_includer_timex != null) { count++; String reltype = "BEFORE"; if (last_includer_timex.get_date().equals(t.get_date()) || t.get_value().startsWith(last_includer_timex.get_value())) { if (last_includer_timex.get_value().equals(t.get_value())) { reltype = "SIMULTANEOUS"; tref_tref_links.add(new Link("l" + count, "tlink-timex-timex", reltype, last_includer_timex.get_id(), t.get_id(), tmlfile)); } else { int lastincl = last_includer_timex.get_value().length(); int tl = t.get_value().length(); if (lastincl > tl) { reltype = "IS_INCLUDED"; tref_tref_links.add(new Link("l" + count, "tlink-timex-timex", reltype, last_includer_timex.get_id(), t.get_id(), tmlfile)); last_includer_timex = t; } else { if (tl > lastincl) { reltype = "INCLUDES"; tref_tref_links.add(new Link("l" + count, "tlink-timex-timex", reltype, last_includer_timex.get_id(), t.get_id(), tmlfile)); } else { System.err.println("Special values made SIMULATNEOUS: " + last_includer_timex.get_value() + " " + t.get_value()); reltype = "SIMULTANEOUS"; tref_tref_links.add(new Link("l" + count, "tlink-timex-timex", reltype, last_includer_timex.get_id(), t.get_id(), tmlfile)); } } } } else { tref_tref_links.add(new Link("l" + count, "tlink-timex-timex", reltype, last_includer_timex.get_id(), t.get_id(), tmlfile)); last_timex = t; last_includer_timex = null; } } // last timex (if not considered before) if (last_timex == null || last_timex.get_id().equals(t.get_id())) { last_timex = t; } else { count++; // before, simultaneous, is_included String reltype = "BEFORE"; if (last_timex.get_date().equals(t.get_date()) || t.get_value().startsWith(last_timex.get_value())) { if (last_timex.get_value().equals(t.get_value())) { reltype = "SIMULTANEOUS"; tref_tref_links.add(new Link("l" + count, "tlink-timex-timex", reltype, last_timex.get_id(), t.get_id(), tmlfile)); } else { int lastl = last_timex.get_value().length(); int tl = t.get_value().length(); // for seasons and TODs keep it as it is // weeks-special case /*if (last_timex.get_value().contains("W")) { if(lastincl<8) lastincl = 8; if(lastincl>8) lastincl = 9; } if (t.get_value().contains("W")) { if(tl<8) tl = 8; if(tl>8) tl = 9; }*/ if (lastl > tl) { reltype = "IS_INCLUDED"; tref_tref_links.add(new Link("l" + count, "tlink-timex-timex", reltype, last_timex.get_id(), t.get_id(), tmlfile)); last_includer_timex = t; } else { if (tl > lastl) { reltype = "INCLUDES"; tref_tref_links.add(new Link("l" + count, "tlink-timex-timex", reltype, last_timex.get_id(), t.get_id(), tmlfile)); last_includer_timex = last_timex; last_timex = t; } else { System.err.println("Special values made SIMULATNEOUS: " + last_timex.get_value() + " " + t.get_value()); reltype = "SIMULTANEOUS"; tref_tref_links.add(new Link("l" + count, "tlink-timex-timex", reltype, last_timex.get_id(), t.get_id(), tmlfile)); } } } } else { tref_tref_links.add(new Link("l" + count, "tlink-timex-timex", reltype, last_timex.get_id(), t.get_id(), tmlfile)); last_timex = t; } } } // remove all links filecontents = filecontents.replaceAll("<[TSAR]LINK[^>]*>[^\\n]*\\n", ""); // create new links string (1.tref_tref + 2.original) // addAll only adds the links that do not break the timegraph concistency //TODO: IN THIS CASE EVERYTHING IS ADDED!!!! BAD, create a tg and only add relations that can be tref_tref_links.addAll(original_links); for (int i = 0; i < tref_tref_links.size(); i++) { Link l = tref_tref_links.get(i); if (l.get_type().startsWith("tlink-event-timex")) { improved_links += "<TLINK lid=\"" + l.get_id() + "\" relType=\"" + l.get_category().toUpperCase() + "\" eventInstanceID=\"" + l.get_id1() + "\" relatedToTime=\"" + l.get_id2() + "\" />\n"; } if (l.get_type().startsWith("tlink-event-event")) { improved_links += "<TLINK lid=\"" + l.get_id() + "\" relType=\"" + l.get_category().toUpperCase() + "\" eventInstanceID=\"" + l.get_id1() + "\" relatedToEventInstance=\"" + l.get_id2() + "\" />\n"; } if (l.get_type().startsWith("tlink-timex-timex")) { improved_links += "<TLINK lid=\"" + l.get_id() + "\" relType=\"" + l.get_category().toUpperCase() + "\" timeID=\"" + l.get_id1() + "\" relatedToTime=\"" + l.get_id2() + "\" />\n"; } } // put all links just above the TimeML closing tag filecontents = filecontents.replaceFirst("</TimeML>", "\n" + improved_links + "\n</TimeML>\n"); BufferedWriter outfile = null; try { outputfile = tmlfile + ".tref-links"; outfile = new BufferedWriter(new FileWriter(outputfile)); outfile.write(filecontents); } finally { if (outfile != null) { outfile.close(); } } } catch (Exception e) { System.err.println("Errors found (TML_file_utils):\n\t" + e.toString() + "\n"); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); } return null; } return outputfile; } /** * Converts a plain text file into TE3-input file * * @param plainfile * @return */ public static String Plain2TE3(String plainfile) { String outputfile = null; try { String line; boolean textfound = false; String header = ""; String footer = ""; String text = ""; //process header (and dct)/text/footer outputfile = plainfile + ".TE3input"; BufferedWriter te3writer = new BufferedWriter(new FileWriter(new File(outputfile))); BufferedReader inputReader = new BufferedReader(new FileReader(new File(plainfile))); try { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); String dctvalue = sdf.format(new Date()); te3writer.write("<?xml version=\"1.0\" ?>"); te3writer.write("\n<TimeML xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:noNamespaceSchemaLocation=\"http://timeml.org/timeMLdocs/TimeML_1.2.1.xsd\">\n"); te3writer.write("\n<DOCID>" + (new File(plainfile)).getName() + "</DOCID>\n"); te3writer.write("\n<DCT><TIMEX3 tid=\"t0\" type=\"DATE\" value=\"" + dctvalue + "\" temporalFunction=\"false\" functionInDocument=\"CREATION_TIME\">" + dctvalue + "</TIMEX3></DCT>\n"); // read out text while ((line = inputReader.readLine()) != null) { text += line + "\n"; } te3writer.write("\n<TEXT>\n" + text + "</TEXT>\n"); te3writer.write("</TimeML>\n"); } finally { if (inputReader != null) { inputReader.close(); } if (te3writer != null) { te3writer.close(); } } } catch (Exception e) { System.err.println("Errors found (TML_file_utils):\n\t" + e.toString() + "\n"); if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) { e.printStackTrace(System.err); } return null; } return outputfile; } }