/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
/*
* @author Ariel Schwartz
* @author Gaurav Bhalotia
*/
package org.erasmusmc.dataimport.Medline.xmlparsers.medline;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.erasmusmc.dataimport.Medline.xmlparsers.GenericXMLParser;
/**
* This class the parses a medline document, extends the generic xml parser
*/
public class ParseAll extends GenericXMLParser {
private static String dir = null;
public static String server;
public static String database;
public static String port;
public static String username;
public static String password;
protected static void usage() {
System.err.println("Usage: [-server <servername>][-database <databasename>][-port <portnumber>][-user <username>][-password <password>] <dir containing medline.xml files>");
System.err.println("\tIf the extension is .gz it will unzip the file, afterwards deletes the unzipped file.");
System.err.println("\tAlso check out the README.txt in the org.erasmusmc.dataimport.Medline package");
System.err.println("\t-usage or -help displays this message");
System.exit(1);
}
static public void main(String[] args) throws Exception {
readParameters(args);
File directory = new File(dir);
List<File> fileList = getFileListing(directory);
boolean cont = false;
Iterator<File> filesIter = fileList.iterator();
while (filesIter.hasNext()) {
String fn = filesIter.next().getAbsolutePath();
//Use these lines if you don't want to start at the beginning:
//if (fn.contains("0769"))
// cont = true;
//if (cont)
if (fn.endsWith(".gz")) {
copyStream(new GZIPInputStream(new FileInputStream(fn)), new FileOutputStream(fn.substring(0, fn.length() - 3)));
MedlineParser.main(new String[] { "-validate", fn.substring(0, fn.length() - 3) });
File del = new File(fn.substring(0, fn.length() - 3));
del.delete();
}
}
}
private static void readParameters(String[] args) {
int i = 0;
while (i < args.length){
if (args[i].equals("-usage")) {
usage();
} else if (args[i].equals("-help")) {
usage();
} else if (args[i].equals("-server")) {
i++;
server = args[i];
} else if (args[i].equals("-database")) {
i++;
database = args[i];
} else if (args[i].equals("-port")) {
i++;
port = args[i];
} else if (args[i].equals("-user")) {
i++;
username = args[i];
} else if (args[i].equals("-password")) {
i++;
password = args[i];
} else {
dir = args[i];
if (i != args.length - 1) {
usage();
}
}
i++;
}
if (dir == null) {
usage();
}
}
static private List<File> getFileListing(File aStartingDir) throws FileNotFoundException {
validateDirectory(aStartingDir);
List<File> result = new ArrayList<File>();
File[] filesAndDirs = aStartingDir.listFiles();
List<File> filesDirs = Arrays.asList(filesAndDirs);
Iterator<File> filesIter = filesDirs.iterator();
File file = null;
while (filesIter.hasNext()) {
file = filesIter.next();
result.add(file); //always add, even if directory
if (!file.isFile()) {
//must be a directory
//recursive call!
List<File> deeperList = getFileListing(file);
result.addAll(deeperList);
}
}
Collections.sort(result);
return result;
}
/**
* Directory is valid if it exists, does not represent a file, and can be read.
*/
static private void validateDirectory(File aDirectory) throws FileNotFoundException {
if (aDirectory == null) {
throw new IllegalArgumentException("Directory should not be null.");
}
if (!aDirectory.exists()) {
throw new FileNotFoundException("Directory does not exist: " + aDirectory);
}
if (!aDirectory.isDirectory()) {
throw new IllegalArgumentException("Is not a directory: " + aDirectory);
}
if (!aDirectory.canRead()) {
throw new IllegalArgumentException("Directory cannot be read: " + aDirectory);
}
}
public static final void copyInputStream(InputStream in, OutputStream out) throws IOException {
byte[] buffer = new byte[1024];
int len;
while ((len = in.read(buffer)) >= 0)
out.write(buffer, 0, len);
in.close();
out.close();
}
static private String[] Unzip(String zippedfile) throws IOException {
Enumeration entries;
ZipFile zipFile;
zipFile = new ZipFile(zippedfile);
String[] result = new String[] {};
entries = zipFile.entries();
while (entries.hasMoreElements()) {
ZipEntry entry = (ZipEntry) entries.nextElement();
if (entry.isDirectory()) {
continue;
}
System.err.println("Extracting file: " + entry.getName());
copyInputStream(zipFile.getInputStream(entry), new BufferedOutputStream(new FileOutputStream(dir + "/" + entry.getName())));
String file = entry.getName();
File delfile = new File(file);
delfile.deleteOnExit();
String[] newResult = new String[result.length + 1];
System.arraycopy(result, 0, newResult, 0, result.length);
newResult[result.length] = dir + "/" + file;
result = newResult;
}
return result;
}
private static final void copyStream(InputStream source, OutputStream dest){
int bufferSize = 1024;
int bytes;
byte[] buffer;
buffer = new byte[bufferSize];
try {
while ((bytes = source.read(buffer)) != -1) {
if (bytes == 0) {
bytes = source.read();
if (bytes < 0)
break;
dest.write(bytes);
dest.flush();
continue;
}
dest.write(buffer, 0, bytes);
dest.flush();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}