package edu.harvard.wcfia.yoshikoder.document.tokenizer; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLClassLoader; import java.util.HashMap; import java.util.Locale; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.jar.JarEntry; import java.util.jar.JarFile; import java.util.logging.Level; import java.util.logging.Logger; import edu.harvard.wcfia.yoshikoder.util.FileUtil; public class TokenizerSource { private static Logger log = Logger.getLogger("edu.harvard.wcfia.yoshikoder.document.tokenizer.TokenizerSource"); public static final String propertiesFilename = "tokenizer.properties"; public static final String UNLOADED_TOKENIZER = "Unloaded Tokenizer"; protected Map localeToTM; protected Map tMToTokenizer; protected File pluginsDirectory; public TokenizerSource(File pluginsDir){ log.info("In TokenizerSource contructor"); pluginsDirectory = pluginsDir; if (!pluginsDirectory.exists()) pluginsDirectory.mkdirs(); tMToTokenizer = new HashMap(); localeToTM = new HashMap(); log.info("initializing with the contents of " + pluginsDir); File[] contents = pluginsDir.listFiles(); // fill up the maps for (int ii=0; ii<contents.length; ii++){ try { log.info("Getting metadata for existing plugin: " + contents[ii]); TM tm = getPluginMetadata(contents[ii]); tm.location = contents[ii]; tMToTokenizer.put(tm, UNLOADED_TOKENIZER); for (int jj=0; jj<tm.supportedLocales.length; jj++) localeToTM.put(tm.supportedLocales[jj], tm); } catch (PluginException pe){ log.log(Level.WARNING, "Error importing existing plugin metadata", pe); } } } public Set getAvailableTokenizerPlugins(){ log.info("Returning the keySet from TM->Tokenizer map"); return tMToTokenizer.keySet(); } public Tokenizer getTokenizerPlugin(Locale loc) throws PluginException { TM tm = (TM)localeToTM.get(loc); if (tm == null) return null; Object tok = tMToTokenizer.get(tm); if (!tok.equals(TokenizerSource.UNLOADED_TOKENIZER)) return (Tokenizer)tok; else { Tokenizer t = loadTokenizer(tm); return t; } } public void removeTokenizerPlugin(TM tm) { log.info("Deleting the tokenizer"); tMToTokenizer.remove(tm); for (int ii=0; ii<tm.supportedLocales.length; ii++) localeToTM.remove(tm.supportedLocales[ii]); boolean del = tm.location.delete(); log.info("Did we successfully delete from " + tm.location + "? " + del); } /** * Removes the existing plugin, then adds the plugin in jarFile. * @param existingPlugin * @param jarFile * @return tokenizer plugin metadata * @throws PluginException */ public TM replaceTokenizerPlugin(TM existingPlugin, File jarFile) throws PluginException { removeTokenizerPlugin(existingPlugin); addTokenizerPlugin(jarFile); return existingPlugin; } protected TM getPluginMetadata(File jarFile) throws PluginException { log.info("Examining "+ jarFile.getAbsolutePath()); Properties props = null; try { JarFile jf = new JarFile(jarFile); JarEntry e = jf.getJarEntry(propertiesFilename); InputStream is = jf.getInputStream(e); props = new Properties(); props.load(is); } catch (IOException ioe){ throw new PluginException("Couldn't read the jar file properties", ioe); } String cname = props.getProperty("classname"); String name = props.getProperty("name"); String description = props.getProperty("description", ""); String locales = props.getProperty("locales"); if (cname==null || name==null || locales==null) throw new PluginException("Missing property in " + propertiesFilename); String[] line = locales.split("[ ]+"); Locale[] locs = new Locale[line.length]; for (int ii=0; ii<locs.length; ii++) locs[ii] = FileUtil.parseLocale(line[ii]); TM tm = new TM(name, description, null, cname, locs); // equals ignores location return tm; } public TM addTokenizerPlugin(File jarFile) throws PluginException{ log.info("Adding a tokenizer from jar file: " + jarFile); TM tm = getPluginMetadata(jarFile); boolean contained = tMToTokenizer.containsKey(tm); log.info("Checking whether we have this tokenizer already"); if (contained) throw new DuplicatePluginException("Tokenizer already exists"); File unique = makeUniqueName(jarFile); log.info("Made unique name: " + unique); log.info("Assigning unique name in metadata"); tm.location = unique; log.info("Copying in the filesystem"); try { FileUtil.copyInputStream( new FileInputStream(jarFile), new FileOutputStream(tm.location)); } catch (IOException ioe){ throw new PluginException("Could not transfer plugin to the plugin directory", ioe); } log.info("Inserting tokenizer metadata into TM->Tokenizer map"); tMToTokenizer.put(tm, UNLOADED_TOKENIZER); log.info("Inserting tokenizer metadata into Locale->TM map"); for (int ii=0; ii<tm.supportedLocales.length; ii++) localeToTM.put(tm.supportedLocales[ii], tm); return tm; } protected boolean clashes(File f, File directory){ File[] files = directory.listFiles(); for (int ii=0; ii<files.length; ii++){ if (files[ii].getName().equals(f.getName())) return true; } return false; } protected File makeUniqueName(File jarFile){ File newname = new File(pluginsDirectory, jarFile.getName()); int ii=1; while (clashes(newname, pluginsDirectory)){ newname = new File(pluginsDirectory, ii + "-" + jarFile.getName()); ii++; } return newname; } protected Tokenizer loadTokenizer(TM tm) throws PluginException { try { URL url = new URL("file://" + tm.location.getAbsolutePath()); URLClassLoader ucl = new URLClassLoader(new URL[]{url}); Class klass = ucl.loadClass(tm.classname); Object obj = klass.newInstance(); Tokenizer tokenizer = (Tokenizer)obj; tMToTokenizer.put(tm, tokenizer); return tokenizer; } catch (MalformedURLException ex){ throw new PluginException(ex); } catch (ClassNotFoundException cnf){ throw new PluginException(cnf); } catch (InstantiationException ie){ throw new PluginException(ie); } catch (IllegalAccessException iae){ throw new PluginException(iae); } } public String toString(){ StringBuffer sb = new StringBuffer(); sb.append("Plugins directory " + pluginsDirectory.getAbsolutePath() + "\n"); sb.append("Locale -> Metadata: " + localeToTM + "\n"); sb.append("Metadata -> Tokenizer: " + tMToTokenizer); return sb.toString(); } }