package hr.fer.zemris.takelab.uima.annotator.hunpos; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger; public class HunPosAnnotionTranslator { private List<HunPosAnnotationMapping> mappings; public HunPosAnnotionTranslator() { mappings = new ArrayList<HunPosAnnotationMapping>(); loadTranslations(); } private void loadTranslations() { BufferedReader reader = null; InputStream is = null; try { File jarFile = new File(this.getClass().getProtectionDomain().getCodeSource().getLocation().getPath()); if(jarFile.isFile()) { is = getClass().getClassLoader().getResourceAsStream("croatian/TagTranslation.conf"); } else { File tagFile = new File(new File(jarFile, ".."), "resources/croatian/TagTranslation.conf"); is = new FileInputStream(tagFile); } reader = new BufferedReader(new InputStreamReader(is)); Pattern reRule = Pattern.compile("^\\s*\"([^\"]+)\"\\s*=\\s*\"([^\"]+)\"\\s*$"); String line; while((line = reader.readLine()) != null) { if(line.trim().isEmpty()) continue; Matcher m = reRule.matcher(line); if(!m.matches()) { Logger.printError("Error matching HunPos annotation translation rule : " + line); continue; } try { mappings.add(new HunPosAnnotationMapping(m.group(1), m.group(2))); } catch (Exception e) { Logger.printError("Invalid regex in HunPos annotation matching rule " + m.group(1)); continue; } } } catch (FileNotFoundException e) { Logger.printError("Cannot find the HunPos annotation translation rules file."); } catch (IOException e) { Logger.printError("Error reading HunPos annotation translation rules file."); } finally { try { if(reader != null) { reader.close(); } } catch (IOException e) { Logger.printError("An error occured while closing the file."); } } } public String translate(String annotation) { for(HunPosAnnotationMapping mapping : this.mappings) { if(mapping.match(annotation)) { return mapping.getTranslation(); } } //Welp, we failed, return it unchanged return annotation; } }