package org.maltparser.core.config; import java.io.*; import java.net.MalformedURLException; import java.net.URL; import java.util.*; import java.util.jar.JarEntry; import java.util.jar.JarFile; import java.util.jar.JarInputStream; import java.util.jar.JarOutputStream; import org.maltparser.core.config.version.Versioning; import org.maltparser.core.exception.MaltChainedException; import org.maltparser.core.helper.HashSet; import org.maltparser.core.helper.SystemInfo; import org.maltparser.core.helper.SystemLogger; import org.maltparser.core.helper.URLFinder; import org.maltparser.core.io.dataformat.DataFormatInstance; import org.maltparser.core.io.dataformat.DataFormatManager; import org.maltparser.core.io.dataformat.DataFormatSpecification.DataStructure; import org.maltparser.core.io.dataformat.DataFormatSpecification.Dependency; import org.maltparser.core.options.OptionManager; import org.maltparser.core.symbol.SymbolTableHandler; import org.maltparser.core.symbol.trie.TrieSymbolTableHandler; /** * This class contains methods for handle the configuration directory. * * @author Johan Hall */ public class ConfigurationDir { protected static final int BUFFER = 4096; protected File configDirectory; protected String name; protected String type; protected File workingDirectory; protected URL url = null; protected int containerIndex; protected BufferedWriter infoFile = null; protected String createdByMaltParserVersion; private SymbolTableHandler symbolTables; private DataFormatManager dataFormatManager; private HashMap<String, DataFormatInstance> dataFormatInstances; private URL inputFormatURL; private URL outputFormatURL; /** * Creates a configuration directory from a mco-file specified by an URL. * * @param url an URL to a mco-file * @throws MaltChainedException */ public ConfigurationDir(URL url) throws MaltChainedException { initWorkingDirectory(); setUrl(url); initNameNTypeFromInfoFile(url); // initData(); } /** * Creates a new configuration directory or a configuration directory from a * mco-file * * @param name the name of the configuration * @param type the type of configuration * @param containerIndex the container index * @throws MaltChainedException */ public ConfigurationDir(String name, String type, int containerIndex) throws MaltChainedException { setContainerIndex(containerIndex); initWorkingDirectory(); if (name != null && name.length() > 0 && type != null && type.length() > 0) { setName(name); setType(type); } else { throw new ConfigurationException("The configuration name is not specified. "); } setConfigDirectory(new File(workingDirectory.getPath() + File.separator + getName())); String mode = OptionManager.instance().getOptionValue(containerIndex, "config", "flowchart").toString().trim(); if (mode.equals("parse")) { // During parsing also search for the MaltParser configuration file in the class path File mcoPath = new File(workingDirectory.getPath() + File.separator + getName() + ".mco"); if (!mcoPath.exists()) { String classpath = System.getProperty("java.class.path"); String[] items = classpath.split(System.getProperty("path.separator")); boolean found = false; for (String item : items) { File candidateDir = new File(item); if (candidateDir.exists() && candidateDir.isDirectory()) { File candidateConfigFile = new File(candidateDir.getPath() + File.separator + getName() + ".mco"); if (candidateConfigFile.exists()) { initWorkingDirectory(candidateDir.getPath()); setConfigDirectory(new File(workingDirectory.getPath() + File.separator + getName())); found = true; break; } } } if (found == false) { throw new ConfigurationException("Couldn't find the MaltParser configuration file: " + getName() + ".mco"); } } } } public void initDataFormat() throws MaltChainedException { String inputFormatName = OptionManager.instance().getOptionValue(containerIndex, "input", "format").toString().trim(); String outputFormatName = OptionManager.instance().getOptionValue(containerIndex, "output", "format").toString().trim(); final URLFinder f = new URLFinder(); if (configDirectory != null && configDirectory.exists()) { if (outputFormatName.length() == 0 || inputFormatName.equals(outputFormatName)) { URL inputFormatURL = f.findURLinJars(inputFormatName); if (inputFormatURL != null) { outputFormatName = inputFormatName = this.copyToConfig(inputFormatURL); } else { outputFormatName = inputFormatName = this.copyToConfig(inputFormatName); } } else { URL inputFormatURL = f.findURLinJars(inputFormatName); if (inputFormatURL != null) { inputFormatName = this.copyToConfig(inputFormatURL); } else { inputFormatName = this.copyToConfig(inputFormatName); } URL outputFormatURL = f.findURLinJars(outputFormatName); if (inputFormatURL != null) { outputFormatName = this.copyToConfig(outputFormatURL); } else { outputFormatName = this.copyToConfig(outputFormatName); } } OptionManager.instance().overloadOptionValue(containerIndex, "input", "format", inputFormatName); } else { if (outputFormatName.length() == 0) { outputFormatName = inputFormatName; } } dataFormatInstances = new HashMap<String, DataFormatInstance>(3); inputFormatURL = findURL(inputFormatName); outputFormatURL = findURL(outputFormatName); if (outputFormatURL != null) { try { InputStream is = outputFormatURL.openStream(); } catch (FileNotFoundException e) { outputFormatURL = f.findURL(outputFormatName); } catch (IOException e) { outputFormatURL = f.findURL(outputFormatName); } } else { outputFormatURL = f.findURL(outputFormatName); } dataFormatManager = new DataFormatManager(inputFormatURL, outputFormatURL); String mode = OptionManager.instance().getOptionValue(containerIndex, "config", "flowchart").toString().trim(); if (mode.equals("parse")) { symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE); // symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE); } else { symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE); } if (dataFormatManager.getInputDataFormatSpec().getDataStructure() == DataStructure.PHRASE) { if (mode.equals("learn")) { Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies(); for (Dependency dep : deps) { URL depFormatURL = f.findURLinJars(dep.getUrlString()); if (depFormatURL != null) { this.copyToConfig(depFormatURL); } else { this.copyToConfig(dep.getUrlString()); } } } else if (mode.equals("parse")) { Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies(); String nullValueStategy = OptionManager.instance().getOptionValue(containerIndex, "singlemalt", "null_value").toString(); for (Dependency dep : deps) { // URL depFormatURL = f.findURLinJars(dep.getUrlString()); DataFormatInstance dataFormatInstance = dataFormatManager.getDataFormatSpec(dep.getDependentOn()).createDataFormatInstance(symbolTables, nullValueStategy); addDataFormatInstance(dataFormatManager.getDataFormatSpec(dep.getDependentOn()).getDataFormatName(), dataFormatInstance); dataFormatManager.setInputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn())); // dataFormatManager.setOutputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn())); } } } } private URL findURL(String specModelFileName) throws MaltChainedException { URL url = null; File specFile = this.getFile(specModelFileName); if (specFile.exists()) { try { url = new URL("file:///" + specFile.getAbsolutePath()); } catch (MalformedURLException e) { throw new MaltChainedException("Malformed URL: " + specFile, e); } } else { url = this.getConfigFileEntryURL(specModelFileName); } return url; } /** * Creates an output stream writer, where the corresponding file will be * included in the configuration directory * * @param fileName a file name * @param charSet a char set * @return an output stream writer for writing to a file within the * configuration directory * @throws MaltChainedException */ public OutputStreamWriter getOutputStreamWriter(String fileName, String charSet) throws MaltChainedException { try { return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath() + File.separator + fileName), charSet); } catch (FileNotFoundException e) { throw new ConfigurationException("The file '" + fileName + "' cannot be created. ", e); } catch (UnsupportedEncodingException e) { throw new ConfigurationException("The char set '" + charSet + "' is not supported. ", e); } } /** * Creates an output stream writer, where the corresponding file will be * included in the configuration directory. Uses UTF-8 for character * encoding. * * @param fileName a file name * @return an output stream writer for writing to a file within the * configuration directory * @throws MaltChainedException */ public OutputStreamWriter getOutputStreamWriter(String fileName) throws MaltChainedException { try { return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath() + File.separator + fileName, true), "UTF-8"); } catch (FileNotFoundException e) { throw new ConfigurationException("The file '" + fileName + "' cannot be created. ", e); } catch (UnsupportedEncodingException e) { throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e); } } /** * This method acts the same as getOutputStreamWriter with the difference * that the writer append in the file if it already exists instead of * deleting the previous content before starting to write. * * @param fileName a file name * @return an output stream writer for writing to a file within the * configuration directory * @throws MaltChainedException */ public OutputStreamWriter getAppendOutputStreamWriter(String fileName) throws MaltChainedException { try { return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath() + File.separator + fileName, true), "UTF-8"); } catch (FileNotFoundException e) { throw new ConfigurationException("The file '" + fileName + "' cannot be created. ", e); } catch (UnsupportedEncodingException e) { throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e); } } /** * Creates an input stream reader for reading a file within the * configuration directory * * @param fileName a file name * @param charSet a char set * @return an input stream reader for reading a file within the * configuration directory * @throws MaltChainedException */ public InputStreamReader getInputStreamReader(String fileName, String charSet) throws MaltChainedException { try { return new InputStreamReader(new FileInputStream(configDirectory.getPath() + File.separator + fileName), charSet); } catch (FileNotFoundException e) { throw new ConfigurationException("The file '" + fileName + "' cannot be found. ", e); } catch (UnsupportedEncodingException e) { throw new ConfigurationException("The char set '" + charSet + "' is not supported. ", e); } } /** * Creates an input stream reader for reading a file within the * configuration directory. Uses UTF-8 for character encoding. * * @param fileName a file name * @return an input stream reader for reading a file within the * configuration directory * @throws MaltChainedException */ public InputStreamReader getInputStreamReader(String fileName) throws MaltChainedException { return getInputStreamReader(fileName, "UTF-8"); } public JarEntry getConfigFileEntry(String fileName) throws MaltChainedException { File mcoPath = new File(workingDirectory.getPath() + File.separator + getName() + ".mco"); try { JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); JarEntry entry = mcoFile.getJarEntry(getName() + '/' + fileName); if (entry == null) { entry = mcoFile.getJarEntry(getName() + '\\' + fileName); } return entry; } catch (FileNotFoundException e) { throw new ConfigurationException("The file entry '" + fileName + "' in mco-file '" + mcoPath + "' cannot be found. ", e); } catch (IOException e) { throw new ConfigurationException("The file entry '" + fileName + "' in mco-file '" + mcoPath + "' cannot be found. ", e); } } public InputStream getInputStreamFromConfigFileEntry(String fileName) throws MaltChainedException { File mcoPath = new File(workingDirectory.getPath() + File.separator + getName() + ".mco"); try { JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); JarEntry entry = mcoFile.getJarEntry(getName() + '/' + fileName); if (entry == null) { entry = mcoFile.getJarEntry(getName() + '\\' + fileName); } if (entry == null) { throw new FileNotFoundException(); } return mcoFile.getInputStream(entry); } catch (FileNotFoundException e) { throw new ConfigurationException("The file entry '" + fileName + "' in the mco file '" + mcoPath + "' cannot be found. ", e); } catch (IOException e) { throw new ConfigurationException("The file entry '" + fileName + "' in the mco file '" + mcoPath + "' cannot be loaded. ", e); } } public InputStreamReader getInputStreamReaderFromConfigFileEntry(String fileName, String charSet) throws MaltChainedException { File mcoPath = new File(workingDirectory.getPath() + File.separator + getName() + ".mco"); try { JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); JarEntry entry = mcoFile.getJarEntry(getName() + '/' + fileName); if (entry == null) { entry = mcoFile.getJarEntry(getName() + '\\' + fileName); } if (entry == null) { throw new FileNotFoundException(); } return new InputStreamReader(mcoFile.getInputStream(entry), charSet); } catch (FileNotFoundException e) { throw new ConfigurationException("The file entry '" + fileName + "' in the mco file '" + mcoPath + "' cannot be found. ", e); } catch (UnsupportedEncodingException e) { throw new ConfigurationException("The char set '" + charSet + "' is not supported. ", e); } catch (IOException e) { throw new ConfigurationException("The file entry '" + fileName + "' in the mco file '" + mcoPath + "' cannot be loaded. ", e); } } public InputStreamReader getInputStreamReaderFromConfigFile(String fileName) throws MaltChainedException { return getInputStreamReaderFromConfigFileEntry(fileName, "UTF-8"); } /** * Returns a file handler object of a file within the configuration * directory * * @param fileName a file name * @return a file handler object of a file within the configuration * directory * @throws MaltChainedException */ public File getFile(String fileName) throws MaltChainedException { return new File(configDirectory.getPath() + File.separator + fileName); } public URL getConfigFileEntryURL(String fileName) throws MaltChainedException { File mcoPath = new File(workingDirectory.getPath() + File.separator + getName() + ".mco"); try { if (!mcoPath.exists()) { throw new ConfigurationException("Couldn't find mco-file '" + mcoPath.getAbsolutePath() + "'"); } // new URL("file", null, mcoPath.getAbsolutePath()); URL url = new URL("jar:" + new URL("file", null, mcoPath.getAbsolutePath()) + "!/" + getName() + '/' + fileName + "\n"); try { InputStream is = url.openStream(); is.close(); } catch (IOException e) { url = new URL("jar:" + new URL("file", null, mcoPath.getAbsolutePath()) + "!/" + getName() + '\\' + fileName + "\n"); } return url; } catch (MalformedURLException e) { throw new ConfigurationException("Couldn't find the URL '" + "jar:" + mcoPath.getAbsolutePath() + "!/" + getName() + '/' + fileName + "'", e); } } /** * Copies a file into the configuration directory. * * @param source a path to file * @throws MaltChainedException */ public String copyToConfig(File source) throws MaltChainedException { byte[] readBuffer = new byte[BUFFER]; String destination = configDirectory.getPath() + File.separator + source.getName(); try { BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source)); BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER); int n; while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) { bos.write(readBuffer, 0, n); } bos.flush(); bos.close(); bis.close(); } catch (FileNotFoundException e) { throw new ConfigurationException("The source file '" + source + "' cannot be found or the destination file '" + destination + "' cannot be created when coping the file. ", e); } catch (IOException e) { throw new ConfigurationException("The source file '" + source + "' cannot be copied to destination '" + destination + "'. ", e); } return source.getName(); } public String copyToConfig(String fileUrl) throws MaltChainedException { final URLFinder f = new URLFinder(); URL url = f.findURL(fileUrl); if (url == null) { throw new ConfigurationException("The file or URL '" + fileUrl + "' could not be found. "); } return copyToConfig(url); } public String copyToConfig(URL url) throws MaltChainedException { if (url == null) { throw new ConfigurationException("URL could not be found. "); } byte[] readBuffer = new byte[BUFFER]; String destFileName = url.getPath(); int indexSlash = destFileName.lastIndexOf('/'); if (indexSlash == -1) { indexSlash = destFileName.lastIndexOf('\\'); } if (indexSlash != -1) { destFileName = destFileName.substring(indexSlash + 1); } String destination = configDirectory.getPath() + File.separator + destFileName; try { BufferedInputStream bis = new BufferedInputStream(url.openStream()); BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER); int n; while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) { bos.write(readBuffer, 0, n); } bos.flush(); bos.close(); bis.close(); } catch (FileNotFoundException e) { throw new ConfigurationException("The destination file '" + destination + "' cannot be created when coping the file. ", e); } catch (IOException e) { throw new ConfigurationException("The URL '" + url + "' cannot be copied to destination '" + destination + "'. ", e); } return destFileName; } /** * Removes the configuration directory, if it exists and it contains a .info * file. * * @throws MaltChainedException */ public void deleteConfigDirectory() throws MaltChainedException { if (!configDirectory.exists()) { return; } File infoFile = new File(configDirectory.getPath() + File.separator + getName() + "_" + getType() + ".info"); if (infoFile.exists()) { deleteConfigDirectory(configDirectory); } else { throw new ConfigurationException("There exists a directory that is not a MaltParser configuration directory. "); } } private void deleteConfigDirectory(File directory) throws MaltChainedException { if (directory.exists()) { File[] files = directory.listFiles(); for (int i = 0; i < files.length; i++) { if (files[i].isDirectory()) { deleteConfigDirectory(files[i]); } else { files[i].delete(); } } } else { throw new ConfigurationException("The directory '" + directory.getPath() + "' cannot be found. "); } directory.delete(); } /** * Returns a file handler object for the configuration directory * * @return a file handler object for the configuration directory */ public File getConfigDirectory() { return configDirectory; } protected void setConfigDirectory(File dir) { this.configDirectory = dir; } /** * Creates the configuration directory * * @throws MaltChainedException */ public void createConfigDirectory() throws MaltChainedException { checkConfigDirectory(); configDirectory.mkdir(); createInfoFile(); } protected void checkConfigDirectory() throws MaltChainedException { if (configDirectory.exists() && !configDirectory.isDirectory()) { throw new ConfigurationException("The configuration directory name already exists and is not a directory. "); } if (configDirectory.exists()) { deleteConfigDirectory(); } } protected void createInfoFile() throws MaltChainedException { infoFile = new BufferedWriter(getOutputStreamWriter(getName() + "_" + getType() + ".info")); try { infoFile.write("CONFIGURATION\n"); infoFile.write("Configuration name: " + getName() + "\n"); infoFile.write("Configuration type: " + getType() + "\n"); infoFile.write("Created: " + new Date(System.currentTimeMillis()) + "\n"); infoFile.write("\nSYSTEM\n"); infoFile.write("Operating system architecture: " + System.getProperty("os.arch") + "\n"); infoFile.write("Operating system name: " + System.getProperty("os.name") + "\n"); infoFile.write("JRE vendor name: " + System.getProperty("java.vendor") + "\n"); infoFile.write("JRE version number: " + System.getProperty("java.version") + "\n"); infoFile.write("\nMALTPARSER\n"); infoFile.write("Version: " + SystemInfo.getVersion() + "\n"); infoFile.write("Build date: " + SystemInfo.getBuildDate() + "\n"); Set<String> excludeGroups = new HashSet<String>(); excludeGroups.add("system"); infoFile.write("\nSETTINGS\n"); infoFile.write(OptionManager.instance().toStringPrettyValues(containerIndex, excludeGroups)); infoFile.flush(); } catch (IOException e) { throw new ConfigurationException("Could not create the maltparser info file. "); } } /** * Returns a writer to the configuration information file * * @return a writer to the configuration information file * @throws MaltChainedException */ public BufferedWriter getInfoFileWriter() throws MaltChainedException { return infoFile; } /** * Creates the malt configuration file (.mco). This file is compressed. * * @throws MaltChainedException */ public void createConfigFile() throws MaltChainedException { try { JarOutputStream jos = new JarOutputStream(new FileOutputStream(workingDirectory.getPath() + File.separator + getName() + ".mco")); // configLogger.info("Creates configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco' ...\n"); createConfigFile(configDirectory.getPath(), jos); jos.close(); } catch (FileNotFoundException e) { throw new ConfigurationException("The maltparser configurtation file '" + workingDirectory.getPath() + File.separator + getName() + ".mco" + "' cannot be found. ", e); } catch (IOException e) { throw new ConfigurationException("The maltparser configurtation file '" + workingDirectory.getPath() + File.separator + getName() + ".mco" + "' cannot be created. ", e); } } private void createConfigFile(String directory, JarOutputStream jos) throws MaltChainedException { byte[] readBuffer = new byte[BUFFER]; try { File zipDir = new File(directory); String[] dirList = zipDir.list(); int bytesIn; for (int i = 0; i < dirList.length; i++) { File f = new File(zipDir, dirList[i]); if (f.isDirectory()) { String filePath = f.getPath(); createConfigFile(filePath, jos); continue; } FileInputStream fis = new FileInputStream(f); String entryPath = f.getPath().substring(workingDirectory.getPath().length() + 1); entryPath = entryPath.replace('\\', '/'); JarEntry entry = new JarEntry(entryPath); jos.putNextEntry(entry); while ((bytesIn = fis.read(readBuffer)) != -1) { jos.write(readBuffer, 0, bytesIn); } fis.close(); } } catch (FileNotFoundException e) { throw new ConfigurationException("The directory '" + directory + "' cannot be found. ", e); } catch (IOException e) { throw new ConfigurationException("The directory '" + directory + "' cannot be compressed into a mco file. ", e); } } public void copyConfigFile(File in, File out, Versioning versioning) throws MaltChainedException { try { JarFile jar = new JarFile(in); JarOutputStream tempJar = new JarOutputStream(new FileOutputStream(out)); byte[] buffer = new byte[BUFFER]; int bytesRead; final StringBuilder sb = new StringBuilder(); final URLFinder f = new URLFinder(); for (Enumeration<JarEntry> entries = jar.entries(); entries.hasMoreElements();) { JarEntry inEntry = (JarEntry) entries.nextElement(); InputStream entryStream = jar.getInputStream(inEntry); JarEntry outEntry = versioning.getJarEntry(inEntry); if (!versioning.hasChanges(inEntry, outEntry)) { tempJar.putNextEntry(outEntry); while ((bytesRead = entryStream.read(buffer)) != -1) { tempJar.write(buffer, 0, bytesRead); } } else { tempJar.putNextEntry(outEntry); BufferedReader br = new BufferedReader(new InputStreamReader(entryStream)); String line; sb.setLength(0); while ((line = br.readLine()) != null) { sb.append(line); sb.append('\n'); } String outString = versioning.modifyJarEntry(inEntry, outEntry, sb); tempJar.write(outString.getBytes()); } } if (versioning.getFeatureModelXML() != null && versioning.getFeatureModelXML().startsWith("/appdata")) { int index = versioning.getFeatureModelXML().lastIndexOf('/'); BufferedInputStream bis = new BufferedInputStream(f.findURLinJars(versioning.getFeatureModelXML()).openStream()); tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName() + "/" + versioning.getFeatureModelXML().substring(index + 1))); int n; while ((n = bis.read(buffer, 0, BUFFER)) != -1) { tempJar.write(buffer, 0, n); } bis.close(); } if (versioning.getInputFormatXML() != null && versioning.getInputFormatXML().startsWith("/appdata")) { int index = versioning.getInputFormatXML().lastIndexOf('/'); BufferedInputStream bis = new BufferedInputStream(f.findURLinJars(versioning.getInputFormatXML()).openStream()); tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName() + "/" + versioning.getInputFormatXML().substring(index + 1))); int n; while ((n = bis.read(buffer, 0, BUFFER)) != -1) { tempJar.write(buffer, 0, n); } bis.close(); } tempJar.flush(); tempJar.close(); jar.close(); } catch (IOException e) { throw new ConfigurationException("", e); } } protected void initNameNTypeFromInfoFile(URL url) throws MaltChainedException { if (url == null) { throw new ConfigurationException("The URL cannot be found. "); } try { JarEntry je; JarInputStream jis = new JarInputStream(url.openConnection().getInputStream()); while ((je = jis.getNextJarEntry()) != null) { String entryName = je.getName(); if (entryName.endsWith(".info")) { int indexUnderScore = entryName.lastIndexOf('_'); int indexSeparator = entryName.lastIndexOf(File.separator); if (indexSeparator == -1) { indexSeparator = entryName.lastIndexOf('/'); } if (indexSeparator == -1) { indexSeparator = entryName.lastIndexOf('\\'); } int indexDot = entryName.lastIndexOf('.'); if (indexUnderScore == -1 || indexDot == -1) { throw new ConfigurationException("Could not find the configuration name and type from the URL '" + url.toString() + "'. "); } setName(entryName.substring(indexSeparator + 1, indexUnderScore)); setType(entryName.substring(indexUnderScore + 1, indexDot)); setConfigDirectory(new File(workingDirectory.getPath() + File.separator + getName())); jis.close(); return; } } } catch (IOException e) { throw new ConfigurationException("Could not find the configuration name and type from the URL '" + url.toString() + "'. ", e); } } /** * Prints the content of the configuration information file to the system * logger * * @throws MaltChainedException */ public void echoInfoFile() throws MaltChainedException { checkConfigDirectory(); JarInputStream jis; try { if (url == null) { jis = new JarInputStream(new FileInputStream(workingDirectory.getPath() + File.separator + getName() + ".mco")); } else { jis = new JarInputStream(url.openConnection().getInputStream()); } JarEntry je; while ((je = jis.getNextJarEntry()) != null) { String entryName = je.getName(); if (entryName.endsWith(getName() + "_" + getType() + ".info")) { int c; while ((c = jis.read()) != -1) { SystemLogger.logger().info((char) c); } } } jis.close(); } catch (FileNotFoundException e) { throw new ConfigurationException("Could not print configuration information file. The configuration file '" + workingDirectory.getPath() + File.separator + getName() + ".mco" + "' cannot be found. ", e); } catch (IOException e) { throw new ConfigurationException("Could not print configuration information file. ", e); } } /** * Unpacks the malt configuration file (.mco). * * @throws MaltChainedException */ public void unpackConfigFile() throws MaltChainedException { checkConfigDirectory(); JarInputStream jis; try { if (url == null) { jis = new JarInputStream(new FileInputStream(workingDirectory.getPath() + File.separator + getName() + ".mco")); } else { jis = new JarInputStream(url.openConnection().getInputStream()); } unpackConfigFile(jis); jis.close(); } catch (FileNotFoundException e) { throw new ConfigurationException("Could not unpack configuration. The configuration file '" + workingDirectory.getPath() + File.separator + getName() + ".mco" + "' cannot be found. ", e); } catch (IOException e) { if (configDirectory.exists()) { deleteConfigDirectory(); } throw new ConfigurationException("Could not unpack configuration. ", e); } initCreatedByMaltParserVersionFromInfoFile(); } protected void unpackConfigFile(JarInputStream jis) throws MaltChainedException { try { JarEntry je; byte[] readBuffer = new byte[BUFFER]; SortedSet<String> directoryCache = new TreeSet<String>(); while ((je = jis.getNextJarEntry()) != null) { String entryName = je.getName(); if (entryName.startsWith("/")) { entryName = entryName.substring(1); } if (entryName.endsWith(File.separator) || entryName.endsWith("/")) { return; } int index = -1; if (File.separator.equals("\\")) { entryName = entryName.replace('/', '\\'); index = entryName.lastIndexOf("\\"); } else if (File.separator.equals("/")) { entryName = entryName.replace('\\', '/'); index = entryName.lastIndexOf("/"); } if (index > 0) { String dirName = entryName.substring(0, index); if (!directoryCache.contains(dirName)) { File directory = new File(workingDirectory.getPath() + File.separator + dirName); if (!(directory.exists() && directory.isDirectory())) { if (!directory.mkdirs()) { throw new ConfigurationException("Unable to make directory '" + dirName + "'. "); } directoryCache.add(dirName); } } } if (new File(workingDirectory.getPath() + File.separator + entryName).isDirectory() && new File(workingDirectory.getPath() + File.separator + entryName).exists()) { continue; } BufferedOutputStream bos; try { bos = new BufferedOutputStream(new FileOutputStream(workingDirectory.getPath() + File.separator + entryName), BUFFER); } catch (FileNotFoundException e) { throw new ConfigurationException("Could not unpack configuration. The file '" + workingDirectory.getPath() + File.separator + entryName + "' cannot be unpacked. ", e); } int n; while ((n = jis.read(readBuffer, 0, BUFFER)) != -1) { bos.write(readBuffer, 0, n); } bos.flush(); bos.close(); } } catch (IOException e) { throw new ConfigurationException("Could not unpack configuration. ", e); } } /** * Returns the name of the configuration directory * * @return the name of the configuration directory */ public String getName() { return name; } protected void setName(String name) { this.name = name; } /** * Returns the type of the configuration directory * * @return the type of the configuration directory */ public String getType() { return type; } protected void setType(String type) { this.type = type; } /** * Returns a file handler object for the working directory * * @return a file handler object for the working directory */ public File getWorkingDirectory() { return workingDirectory; } /** * Initialize the working directory * * @throws MaltChainedException */ public void initWorkingDirectory() throws MaltChainedException { try { initWorkingDirectory(OptionManager.instance().getOptionValue(containerIndex, "config", "workingdir").toString()); } catch (NullPointerException e) { throw new ConfigurationException("The configuration cannot be found.", e); } } /** * Initialize the working directory according to the path. If the path is * equals to "user.dir" or current directory, then the current directory * will be the working directory. * * @param pathPrefixString the path to the working directory * @throws MaltChainedException */ public void initWorkingDirectory(String pathPrefixString) throws MaltChainedException { if (pathPrefixString == null || pathPrefixString.equalsIgnoreCase("user.dir") || pathPrefixString.equalsIgnoreCase(".")) { workingDirectory = new File(System.getProperty("user.dir")); } else { workingDirectory = new File(pathPrefixString); } if (workingDirectory == null || !workingDirectory.isDirectory()) { new ConfigurationException("The specified working directory '" + pathPrefixString + "' is not a directory. "); } } /** * Returns the URL to the malt configuration file (.mco) * * @return the URL to the malt configuration file (.mco) */ public URL getUrl() { return url; } protected void setUrl(URL url) { this.url = url; } /** * Returns the option container index * * @return the option container index */ public int getContainerIndex() { return containerIndex; } /** * Sets the option container index * * @param containerIndex a option container index */ public void setContainerIndex(int containerIndex) { this.containerIndex = containerIndex; } /** * Returns the version number of MaltParser which created the malt * configuration file (.mco) * * @return the version number of MaltParser which created the malt * configuration file (.mco) */ public String getCreatedByMaltParserVersion() { return createdByMaltParserVersion; } /** * Sets the version number of MaltParser which created the malt * configuration file (.mco) * * @param createdByMaltParserVersion a version number of MaltParser */ public void setCreatedByMaltParserVersion(String createdByMaltParserVersion) { this.createdByMaltParserVersion = createdByMaltParserVersion; } public void initCreatedByMaltParserVersionFromInfoFile() throws MaltChainedException { try { BufferedReader br = new BufferedReader(getInputStreamReaderFromConfigFileEntry(getName() + "_" + getType() + ".info", "UTF-8")); String line; while ((line = br.readLine()) != null) { if (line.startsWith("Version: ")) { setCreatedByMaltParserVersion(line.substring(31)); break; } } br.close(); } catch (FileNotFoundException e) { throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e); } catch (IOException e) { throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e); } } public void versioning() throws MaltChainedException { initCreatedByMaltParserVersionFromInfoFile(); SystemLogger.logger().info("\nCurrent version : " + SystemInfo.getVersion() + "\n"); SystemLogger.logger().info("Parser model version : " + createdByMaltParserVersion + "\n"); if (SystemInfo.getVersion() == null) { throw new ConfigurationException("Couln't determine the version of MaltParser"); } else if (createdByMaltParserVersion == null) { throw new ConfigurationException("Couln't determine the version of the parser model"); } else if (SystemInfo.getVersion().equals(createdByMaltParserVersion)) { SystemLogger.logger().info("The parser model " + getName() + ".mco has already the same version as the current version of MaltParser. \n"); return; } File mcoPath = new File(workingDirectory.getPath() + File.separator + getName() + ".mco"); File newMcoPath = new File(workingDirectory.getPath() + File.separator + getName() + "." + SystemInfo.getVersion().trim() + ".mco"); Versioning versioning = new Versioning(name, type, mcoPath, createdByMaltParserVersion); if (!versioning.support(createdByMaltParserVersion)) { SystemLogger.logger().warn("The parser model '" + name + ".mco' is created by MaltParser " + getCreatedByMaltParserVersion() + ", which cannot be converted to a MaltParser " + SystemInfo.getVersion() + " parser model.\n"); SystemLogger.logger().warn("Please retrain the parser model with MaltParser " + SystemInfo.getVersion() + " or download MaltParser " + getCreatedByMaltParserVersion() + " from http://maltparser.org/download.html\n"); return; } SystemLogger.logger().info("Converts the parser model '" + mcoPath.getName() + "' into '" + newMcoPath.getName() + "'....\n"); copyConfigFile(mcoPath, newMcoPath, versioning); } protected void checkNConvertConfigVersion() throws MaltChainedException { if (createdByMaltParserVersion.startsWith("1.0")) { SystemLogger.logger().info(" Converts the MaltParser configuration "); SystemLogger.logger().info("1.0"); SystemLogger.logger().info(" to "); SystemLogger.logger().info(SystemInfo.getVersion()); SystemLogger.logger().info("\n"); File[] configFiles = configDirectory.listFiles(); for (int i = 0, n = configFiles.length; i < n; i++) { if (configFiles[i].getName().endsWith(".mod")) { configFiles[i].renameTo(new File(configDirectory.getPath() + File.separator + "odm0." + configFiles[i].getName())); } if (configFiles[i].getName().endsWith(getName() + ".dsm")) { configFiles[i].renameTo(new File(configDirectory.getPath() + File.separator + "odm0.dsm")); } if (configFiles[i].getName().equals("savedoptions.sop")) { configFiles[i].renameTo(new File(configDirectory.getPath() + File.separator + "savedoptions.sop.old")); } if (configFiles[i].getName().equals("symboltables.sym")) { configFiles[i].renameTo(new File(configDirectory.getPath() + File.separator + "symboltables.sym.old")); } } try { BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath() + File.separator + "savedoptions.sop.old")); BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath() + File.separator + "savedoptions.sop")); String line; while ((line = br.readLine()) != null) { if (line.startsWith("0\tguide\tprediction_strategy")) { bw.write("0\tguide\tdecision_settings\tT.TRANS+A.DEPREL\n"); } else { bw.write(line); bw.write('\n'); } } br.close(); bw.flush(); bw.close(); new File(configDirectory.getPath() + File.separator + "savedoptions.sop.old").delete(); } catch (FileNotFoundException e) { throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); } catch (IOException e) { throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); } try { BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath() + File.separator + "symboltables.sym.old")); BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath() + File.separator + "symboltables.sym")); String line; while ((line = br.readLine()) != null) { if (line.startsWith("AllCombinedClassTable")) { bw.write("T.TRANS+A.DEPREL\n"); } else { bw.write(line); bw.write('\n'); } } br.close(); bw.flush(); bw.close(); new File(configDirectory.getPath() + File.separator + "symboltables.sym.old").delete(); } catch (FileNotFoundException e) { throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e); } catch (IOException e) { throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e); } } if (!createdByMaltParserVersion.startsWith("1.3")) { SystemLogger.logger().info(" Converts the MaltParser configuration "); SystemLogger.logger().info(createdByMaltParserVersion); SystemLogger.logger().info(" to "); SystemLogger.logger().info(SystemInfo.getVersion()); SystemLogger.logger().info("\n"); new File(configDirectory.getPath() + File.separator + "savedoptions.sop").renameTo(new File(configDirectory.getPath() + File.separator + "savedoptions.sop.old")); try { BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath() + File.separator + "savedoptions.sop.old")); BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath() + File.separator + "savedoptions.sop")); String line; while ((line = br.readLine()) != null) { int index = line.indexOf('\t'); int container = 0; if (index > -1) { container = Integer.parseInt(line.substring(0, index)); } if (line.startsWith(container + "\tnivre\tpost_processing")) { } else if (line.startsWith(container + "\tmalt0.4\tbehavior")) { if (line.endsWith("true")) { SystemLogger.logger().info("MaltParser 1.3 doesn't support MaltParser 0.4 emulation."); br.close(); bw.flush(); bw.close(); deleteConfigDirectory(); System.exit(0); } } else if (line.startsWith(container + "\tsinglemalt\tparsing_algorithm")) { bw.write(container); bw.write("\tsinglemalt\tparsing_algorithm\t"); if (line.endsWith("NivreStandard")) { bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcStandardFactory"); } else if (line.endsWith("NivreEager")) { bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcEagerFactory"); } else if (line.endsWith("CovingtonNonProjective")) { bw.write("class org.maltparser.parser.algorithm.covington.CovingtonNonProjFactory"); } else if (line.endsWith("CovingtonProjective")) { bw.write("class org.maltparser.parser.algorithm.covington.CovingtonProjFactory"); } bw.write('\n'); } else { bw.write(line); bw.write('\n'); } } br.close(); bw.flush(); bw.close(); new File(configDirectory.getPath() + File.separator + "savedoptions.sop.old").delete(); } catch (FileNotFoundException e) { throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); } catch (IOException e) { throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); } } } /** * Terminates the configuration directory * * @throws MaltChainedException */ public void terminate() throws MaltChainedException { if (infoFile != null) { try { infoFile.flush(); infoFile.close(); } catch (IOException e) { throw new ConfigurationException("Could not close configuration information file. ", e); } } symbolTables = null; // configuration = null; } /* * (non-Javadoc) @see java.lang.Object#finalize() */ @Override protected void finalize() throws Throwable { try { if (infoFile != null) { infoFile.flush(); infoFile.close(); } } finally { super.finalize(); } } public SymbolTableHandler getSymbolTables() { return symbolTables; } public void setSymbolTables(SymbolTableHandler symbolTables) { this.symbolTables = symbolTables; } public DataFormatManager getDataFormatManager() { return dataFormatManager; } public void setDataFormatManager(DataFormatManager dataFormatManager) { this.dataFormatManager = dataFormatManager; } public Set<String> getDataFormatInstanceKeys() { return dataFormatInstances.keySet(); } public boolean addDataFormatInstance(String key, DataFormatInstance dataFormatInstance) { if (!dataFormatInstances.containsKey(key)) { dataFormatInstances.put(key, dataFormatInstance); return true; } return false; } public DataFormatInstance getDataFormatInstance(String key) { return dataFormatInstances.get(key); } public int sizeDataFormatInstance() { return dataFormatInstances.size(); } public DataFormatInstance getInputDataFormatInstance() { return dataFormatInstances.get(dataFormatManager.getInputDataFormatSpec().getDataFormatName()); } public URL getInputFormatURL() { return inputFormatURL; } public URL getOutputFormatURL() { return outputFormatURL; } }