package com.cognitionis.nlp_files;
import com.cognitionis.utils_basickit.FileUtils;
import java.io.*;
import java.net.JarURLConnection;
import java.net.URL;
/**
* @author Héctor Llorens
* @since 2011
*/
public abstract class NLPFile {
public static enum Subclasses {
PipesFile, PlainFile, TreebankFile, XMLFile;
}
protected File f;
protected URL url;
protected InputStream inputstream;
protected String language;
protected String encoding;
protected String extension;
protected Boolean isWellFormatted;
/**
* Creates the object and loads a file into it
* @param filename
*/
public NLPFile(String filename) {
try {
filename = FileUtils.ensureURL(filename);
if (!FileUtils.URL_exists(filename)) {
throw new FileNotFoundException("File does not exist: " + filename);
}
// this strategy implies creating a temp file in the filesystem instead of directly reading it as input stream
// TODO: the ideal situation would be that the parent just checks for file existence and the child creates the InputStream and reads and loads the file
// But it might not be the case for plain or XML files... will see
url = new URL(filename);
if (url.getProtocol().equals("file")) {
f = new File(url.toURI());
this.inputstream=new FileInputStream(f);
}
if (url.getProtocol().equals("jar")) {
//f = new File(connection.getJarFileURL().toURI());
f = new File(filename); //url.getPath()
//System.out.println("getting it "+filename+" from jar: "+connection.getJarFileURL().toURI());
JarURLConnection connection = (JarURLConnection) url.openConnection();
inputstream = connection.getInputStream();
}
this.encoding = FileUtils.getEncoding(inputstream);
this.extension = FileUtils.getExtension(filename);
} catch (Exception e) {
System.err.println("Errors found (" + this.getClass().getSimpleName() + "):\n\t" + e.toString() + "\n");
if (System.getProperty("DEBUG") != null && System.getProperty("DEBUG").equalsIgnoreCase("true")) {
e.printStackTrace(System.err);
}
this.f = null;
}
}
/**
* Basic format check
*
* @return String: the canonical path to the created file
*/
public abstract Boolean isWellFormatted();
/**
* Creates a plain file from any type of NLPFile.
*
* @return String: the canonical path to the created file
*/
public abstract String toPlain(String filename);
public File getFile() {
return this.f;
}
public void setEncoding(String e) {
this.encoding = e;
}
public String getEncoding() {
return this.encoding;
}
public void setLanguage(String lang) {
this.language = lang;
}
public String getLanguage() {
return this.language;
}
public String getExtension() {
return this.extension;
}
public void overrideExtension(String newext) {
this.extension = newext;
}
}