package de.unihd.dbs.uima.annotator.heideltime.resources; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.net.URISyntaxException; import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.jar.JarEntry; import java.util.jar.JarFile; import java.util.regex.Matcher; import java.util.regex.Pattern; import de.unihd.dbs.uima.annotator.heideltime.utilities.Logger; public class ResourceScanner { private static ResourceScanner INSTANCE = null; /** * singleton producer. * @return singleton instance of ResourceScanner */ public static ResourceScanner getInstance() { if(INSTANCE == null) { synchronized(ResourceScanner.class) { if(INSTANCE == null) { INSTANCE = new ResourceScanner(); } } } return INSTANCE; } private final String path = "resources"; private Set<String> languages = new HashSet<String>(); private Map<String, ResourceMap> repatterns = new HashMap<String, ResourceMap>(); private Map<String, ResourceMap> normalizations = new HashMap<String, ResourceMap>(); private Map<String, ResourceMap> rules = new HashMap<String, ResourceMap>(); private ResourceScanner() { String jarFilePath = null; try { jarFilePath = this.getClass().getProtectionDomain().getCodeSource().getLocation().toURI().getPath(); } catch (URISyntaxException e) { e.printStackTrace(); } if (jarFilePath != null) { File jarFile = new File(jarFilePath); if (jarFile.isFile()) { // scan the interior of a jar file JarFile jar = null; try { jar = new JarFile(jarFile); } catch (IOException e1) { e1.printStackTrace(); } final Enumeration<JarEntry> entries = jar.entries(); HashMap<String, JarEntry> jarContents = new HashMap<String, JarEntry>(); while (entries.hasMoreElements()) { JarEntry je = entries.nextElement(); String name = je.getName(); jarContents.put(name, je); } try { jar.close(); } catch (IOException e) { e.printStackTrace(); } this.scanValidInsideResourcesFolder(jarContents); // scan the "resources" folder outside of a jar file File outFolder = jarFile.getParentFile(); this.scanValidOutsideResourcesFolder(outFolder); } else { // scan the immediate folders of the local classpath this.scanValidOutsideResourcesFolder(jarFile); // scan the folder "../resources" if it exists File outFolder = new File(jarFile.getParentFile(), path); if (outFolder.exists()) { this.scanValidOutsideResourcesFolder(outFolder); } } } // populate languages list languages.addAll(repatterns.keySet()); } public static void main(String[] args) { @SuppressWarnings("unused") ResourceScanner rs = null; try { rs = new ResourceScanner(); } catch (Exception e) { e.printStackTrace(); } } private void scanValidInsideResourcesFolder(HashMap<String, JarEntry> jarContents) { HashMap<String, JarEntry> topLevelEntries = new HashMap<String, JarEntry>(); for(Entry<String, JarEntry> entry : jarContents.entrySet()) { if(entry.getValue().isDirectory() && entry.getKey().matches("^[^\\/]+/$")) { topLevelEntries.put(entry.getKey(), entry.getValue()); } } for (Entry<String, JarEntry> entry : topLevelEntries.entrySet()) { String language = entry.getKey().substring(0, entry.getKey().length() - 1); Pattern repatternPattern = Pattern.compile(language + "/repattern/resources_repattern_(.+)\\.txt$"); Pattern normalizationPattern = Pattern.compile(language + "/normalization/resources_normalization_(.+)\\.txt$"); Pattern rulePattern = Pattern.compile(language + "/rules/resources_rules_(.+)\\.txt$"); if (entry.getValue().isDirectory()) { Logger.printDetail(ResourceScanner.class, "Testing " + entry.getKey()); /* * our conditions for something being a resources folder: the resource * folder must contain at least the following folders: * * + <language name> * |- repattern * |- normalization * |- rules */ Boolean repatternExists = false; Boolean normalizationExists = false; Boolean ruleExists = false; for(String entryName : jarContents.keySet()) { if(!repatternExists && repatternPattern.matcher(entryName).matches()) { repatternExists = true; } if(!normalizationExists && normalizationPattern.matcher(entryName).matches()) { normalizationExists = true; } if(!ruleExists && rulePattern.matcher(entryName).matches()) { ruleExists = true; } } if(!repatternExists || !normalizationExists || !ruleExists) { Logger.printDetail(ResourceScanner.class, "We need at least one readable resource file of each type to run."); continue; } Logger.printDetail(ResourceScanner.class, "Valid resource folder."); // at this point, the folder is obviously a language resource folder => collect streams this.repatterns.put(language, new ResourceMap()); this.normalizations.put(language, new ResourceMap()); this.rules.put(language, new ResourceMap()); for(Entry<String, JarEntry> je : jarContents.entrySet()) { Matcher m1 = repatternPattern.matcher(je.getKey()); Matcher m2 = normalizationPattern.matcher(je.getKey()); Matcher m3 = rulePattern.matcher(je.getKey()); if(m1.matches()) { this.repatterns.get(language).putInnerFile(m1.group(1), je.getKey()); } if(m2.matches()) { this.normalizations.get(language).putInnerFile(m2.group(1), je.getKey()); } if(m3.matches()) { this.rules.get(language).putInnerFile(m3.group(1), je.getKey()); } } } } } private void scanValidOutsideResourcesFolder(File resourcePath) { Pattern repatternPattern = Pattern.compile("resources_repattern_(.+)\\.txt$"); Pattern normalizationPattern = Pattern.compile("resources_normalization_(.+)\\.txt$"); Pattern rulePattern = Pattern.compile("resources_rules_(.+)\\.txt$"); File[] pathContents = resourcePath.listFiles(); for (File supposedLanguagePath : pathContents) { String language = supposedLanguagePath.getName(); if (supposedLanguagePath.isDirectory()) { Logger.printDetail(ResourceScanner.class, "Testing " + supposedLanguagePath.getAbsolutePath()); if (!supposedLanguagePath.exists()) { Logger.printDetail(ResourceScanner.class, "This path doesn't exist."); continue; } /* * our conditions for something being a resources folder: the resource * folder must contain at least the following folders: * * + <language name> * |- repattern * |- normalization * |- rules */ File repatternFolder = new File(supposedLanguagePath, "repattern"); File normalizationFolder = new File(supposedLanguagePath, "normalization"); File ruleFolder = new File(supposedLanguagePath, "rules"); if (!repatternFolder.exists() || !repatternFolder.canRead() || !repatternFolder.isDirectory() || !normalizationFolder.exists() || !normalizationFolder.canRead() || !normalizationFolder.isDirectory() || !ruleFolder.exists() || !ruleFolder.canRead() || !ruleFolder.isDirectory()) { Logger.printDetail(ResourceScanner.class, "We need at least the folders repattern, normalization and rules in this folder."); continue; } /* * furthermore, we require at least one repattern file, one * normalization file and one rule file named in this pattern: * * - resources_repattern_re<name of pattern>.txt * - resources_normalization_norm<name of normalization>.txt * - resources_rules_<date|time|duration|set>rules.txt */ FilenameFilter txtFilter = new FilenameFilter() { @Override public boolean accept(File arg0, String arg1) { return arg1.endsWith(".txt"); } }; File[] repatternFiles = repatternFolder.listFiles(txtFilter); File[] normalizationFiles = normalizationFolder.listFiles(txtFilter); File[] ruleFiles = ruleFolder.listFiles(txtFilter); if (repatternFiles.length == 0 || normalizationFiles.length == 0 || ruleFiles.length == 0 || !repatternFiles[0].exists() || !repatternFiles[0].canRead() || !repatternFiles[0].isFile() || !normalizationFiles[0].exists() || !normalizationFiles[0].canRead() || !normalizationFiles[0].isFile() || !ruleFiles[0].exists() || !ruleFiles[0].canRead() || !ruleFiles[0].isFile()) { Logger.printDetail(ResourceScanner.class, "We need at least one readable resource file of each type to run."); continue; } Logger.printDetail(ResourceScanner.class, "Valid resource folder."); // at this point, the folder is obviously a language resource folder => collect streams this.repatterns.put(language, new ResourceMap()); for(File f : repatternFiles) { Matcher m = repatternPattern.matcher(f.getName()); if(m.matches()) { this.repatterns.get(language).putOuterFile(m.group(1), f); } } this.normalizations.put(language, new ResourceMap()); for(File f : normalizationFiles) { Matcher m = normalizationPattern.matcher(f.getName()); if(m.matches()) { this.normalizations.get(language).putOuterFile(m.group(1), f); } } this.rules.put(language, new ResourceMap()); for(File f : ruleFiles) { Matcher m = rulePattern.matcher(f.getName()); if(m.matches()) { this.rules.get(language).putOuterFile(m.group(1), f); } } } } } public ResourceMap getRepatterns(String language) { return repatterns.get(language); } public ResourceMap getNormalizations(String language) { return normalizations.get(language); } public ResourceMap getRules(String language) { return rules.get(language); } public final Set<String> getDetectedResourceFolders() { return languages; } }