package com.alcatel_lucent.nz.wnmsextract.reader;
/*
* This file is part of wnmsextract.
*
* wnmsextract is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* wnmsextract is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.log4j.Logger;
import com.alcatel_lucent.nz.wnmsextract.document.ALUFileFilter;
import com.alcatel_lucent.nz.wnmsextract.document.DocumentType;
/**
* FileSelector's purpose is to pull/unzip extract data from a defined source and leave
* copies of the reqd xml files in a common directory
* @author jnramsay
*
*/
public abstract class FileSelector {
private static Logger jlog = Logger.getLogger("com.alcatel_lucent.nz.wnmsextract.schedule.FileSelector");
private static final String FILENAME_DF = "yyyyMMdd";
private static final int BUFFER = 2048;
private static final int MIN_FILE_SIZE = 10;
private static final int MAX_UNGZIP_RETRIES = 10;
protected Calendar calendar;
private File tdir;
private File sdir;
private String identifier;
protected DocumentType doctype;
protected int retry_counter;
protected List<File> allfiles;
public abstract void extract();
/**
* Returns a list of files matching the requested doctype in the current extract directory
* @return
*/
public List<File> getFileList(){
//Set the date to filter on
ALUFileFilter ff = (ALUFileFilter) doctype.getFileFilter();
ff.setAcceptDate(calendar);
// Loop through all the files in the temppath
List<File> filteredFiles = new ArrayList<File>();
for(File f3 : (getCalTempPath()).listFiles())
{
// If the filter is accepted add the files to the new list
if (ff.accept(f3))
{
filteredFiles.add(f3);
}
}
Collections.sort(filteredFiles);
return filteredFiles;
}
public void setDocType(DocumentType doctype){
this.doctype = doctype;
}
public DocumentType getDocType(){
return this.doctype;
}
public abstract String docPath();
//the cal is the required date
public void setCalendar(Calendar calendar){
this.calendar = calendar;
setTempPath(new File(Extractor.chooseTempPath()+File.separator+calendarToString(this.calendar)));
}
//getters setters for temp/src pathes
public void setTempPath(File tdir) {
this.tdir = tdir;
}
public File getTempPath() {
return tdir;
}
//TODO. tidy this up
//sticks a date str at the end of the temp directory
public File getCalTempPath() {
String calpath = getTempPath().getAbsolutePath()+File.separator+calendarToString(calendar);
if(!(new File(calpath)).exists()){
(new File(calpath)).mkdir();
}
return new File(calpath);
}
public void setSourcePath(File sdir) {
this.sdir = sdir;
}
public File getSourcePath() {
return sdir;
}
//sticks a date str at the end of the source directory
public File getCalSourcePath() {
String calpath = getSourcePath().getAbsolutePath()+File.separator+calendarToString(calendar);
if(!(new File(calpath)).exists()){
(new File(calpath)).mkdir();
}
return new File(calpath);
}
// Returns a string of the calendar in the format yyyyMMdd
public static String calendarToString(Calendar cal)
{
DateFormat dateFormat = new SimpleDateFormat(FILENAME_DF);
return(dateFormat.format(cal.getTime()));
}
//Unzip methods
/**
* Top unzip method. extract tarfile to constituent parts processing gzips
* along the way e.g. yyyyMMdd.zip->/yyyyMMdd/INode-CH_RNC01/A2010...zip
*/
protected void unzip1(File zipfile) throws FileNotFoundException {
try {
ZipArchiveInputStream zais = new ZipArchiveInputStream(new FileInputStream(zipfile));
ZipArchiveEntry z1 = null;
while ((z1 = zais.getNextZipEntry())!=null){
if(z1.isDirectory()){
/*hack to add vcc identifier because fucking ops cant rename a simple file*/
if (z1.getName().contains("account")) identifier = ".vcc";
else identifier = "";
}
else{
String fn = z1.getName().substring(z1.getName().lastIndexOf("/"));
File f = new File(getCalTempPath()+fn);
FileOutputStream fos = new FileOutputStream(f);
BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER);
int n = 0;
byte[] content = new byte[BUFFER];
while (-1 != (n = zais.read(content))) {
fos.write(content, 0, n);
}
bos.flush();
bos.close();
fos.close();
File unz = null;
if (f.getName().endsWith("zip"))
unz = unzip3(f);
else unz = ungzip(f);
if(unz!=null)
allfiles.add(unz);
f.delete();
}
}
zais.close();
}
catch(IOException ioe){
jlog.fatal("IO read error :: "+ioe);
}
}
/**
* The nested unzip method. Given a zip stream, decompress and store in file in temp_dir.
* Replaced by unzip3.
*/
protected File unzip2(File zf) throws FileNotFoundException {
//File f = null;
String rename = zf.getAbsolutePath().replaceFirst("\\.zip", identifier+".xml");//.replaceFirst("\\.gz", ".xml");
File f = new File(rename);
try {
FileInputStream fis = new FileInputStream(zf);
FileOutputStream fos = new FileOutputStream(rename);
ZipInputStream zin = new ZipInputStream(fis);
final byte[] content = new byte[BUFFER];
int n = 0;
while (-1 != (n = zin.read(content))) {
fos.write(content, 0, n);
}
fos.flush();
fos.close();
fis.close();
zin.close();
}
catch (IOException ioe) {
jlog.error("Error processing Zip "+zf+" Excluding! :: "+ioe);
return null;
}
//try again... what could go wrong
if (checkMinFileSize(f) && retry_counter<MAX_UNGZIP_RETRIES){
retry_counter++;
f.delete();
f = unzip2(zf);
}
return f;
}
protected File unzip3(File zf) throws FileNotFoundException {
//File f = null;
String rename = zf.getAbsolutePath().replaceFirst("\\.zip", identifier+".xml");//.replaceFirst("\\.gz", ".xml");
File f = new File(rename);
try {
FileInputStream fis = new FileInputStream(zf);
ZipInputStream zin = new ZipInputStream(fis);
ZipEntry ze;
final byte[] content = new byte[BUFFER];
while((ze = zin.getNextEntry())!=null){
f = new File(getCalTempPath()+File.separator+ze.getName());
FileOutputStream fos = new FileOutputStream(f);
BufferedOutputStream bos = new BufferedOutputStream(fos, content.length);
int n = 0;
while (-1 != (n = zin.read(content))) {
bos.write(content, 0, n);
}
bos.flush();
bos.close();
}
fis.close();
zin.close();
}
catch (IOException ioe) {
jlog.error("Error processing Zip "+zf+" Excluding! :: "+ioe);
return null;
}
//try again... what could go wrong
/*
if (checkMinFileSize(f) && retry_counter<MAX_UNGZIP_RETRIES){
retry_counter++;
f.delete();
f = unzip2(zf);
}
*/
return f;
}
/**
* extract tarfile to constituent parts processing gzips along the way
* yyyyMMdd.tar->/yyyyMMdd/INode-CH_RNC01/A2010...gz
*/
protected void untar(File tf) throws FileNotFoundException {
try {
TarArchiveInputStream tais = new TarArchiveInputStream(new FileInputStream(tf));
TarArchiveEntry t1 = null;
while ((t1 = tais.getNextTarEntry())!=null){
if(t1.isDirectory()){
if (t1.getName().contains("account")) identifier = ".vcc";
else identifier = "";
}
else{
String fn = t1.getName().substring(t1.getName().lastIndexOf("/"));
File f = new File(getCalTempPath()+fn);
FileOutputStream fos = new FileOutputStream(f);
BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER);
int n = 0;
byte[] content = new byte[BUFFER];
while (-1 != (n = tais.read(content))) {
fos.write(content, 0, n);
}
bos.flush();
bos.close();
fos.close();
File unz = null;
if (f.getName().endsWith("zip"))
unz = unzip3(f);
else unz = ungzip(f);
if(unz!=null)
allfiles.add(unz);
f.delete();
}
}
tais.close();
}
catch(IOException ioe){
jlog.fatal("IO read error :: "+ioe);
}
}
/**
* ungzip. Given a gzip stream, decompress and store in file in temp_dir
*/
protected File ungzip(File gzf) throws FileNotFoundException {
//File f = null;
String rename = gzf.getAbsolutePath().replaceFirst("\\.gz", identifier+".xml");
File f = new File(rename);
try {
FileInputStream fis = new FileInputStream(gzf);
FileOutputStream fos = new FileOutputStream(rename);
GzipCompressorInputStream gzin = new GzipCompressorInputStream(fis);
final byte[] content = new byte[BUFFER];
int n = 0;
while (-1 != (n = gzin.read(content))) {
fos.write(content, 0, n);
}
fos.flush();
fos.close();
fis.close();
gzin.close();
}
catch (IOException ioe) {
jlog.error("Error processing GZip "+gzf+" Excluding! :: "+ioe);
return null;
}
//try again... what could go wrong
if (checkMinFileSize(f) && retry_counter<MAX_UNGZIP_RETRIES){
retry_counter++;
f.delete();
f = ungzip(gzf);
}
return f;
}
private boolean checkMinFileSize(File f){
if (f.length()<MIN_FILE_SIZE) return true;
return false;
}
}