package com.alcatel_lucent.nz.wnmsextract.reader; /* * This file is part of wnmsextract. * * wnmsextract is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * wnmsextract is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.Calendar; import org.apache.log4j.Logger; /* * FileProcess workflow. * 1) Instantiate a new instance * 2) Set date of interest with setCalendar(Calendar c) * 3) Call extract() to process selected date .tar * [SD/yyyyMMdd.tar -> TD/yyyMMdd/*.gz -> TD/yyyyMMdd/*.xml] * 4) Call fetchFileList(Doctype d) to return a list of available XML files */ /** * TNZ extension of fileselector. Differences from the abstract include * URL location, SSL auth and scrape vs unzip */ public class TNZArchiveReader extends FileSelector { private static Logger jlog = Logger.getLogger("com.alcatel_lucent.nz.wnmsextract.schedule.TNZArchiveReader"); //private static final int MIN_FILE_SIZE = 10; //private static final int MAX_UNGZIP_RETRIES = 100; /** Constant defining the number of files expected in an archive to consider it to have unzipped * fully and/or not be corrupt */ private static final int ENOUGH_FILES = 10000; //private static final int BUFFER = 2048; //private List<File> allfiles; //private int retry_counter; private static TNZArchiveReader reader; /** Singleton getInstance method */ public static synchronized TNZArchiveReader getInstance() { if(reader == null){ reader = new TNZArchiveReader(); } return reader; } /** Private Constructor */ private TNZArchiveReader(){ this.retry_counter = 0; //default cal is yesterday this.calendar = Calendar.getInstance(); this.calendar.add(Calendar.DATE,-1); //setSourcePath(new File(Extractor.chooseSourcePath())); //setTempPath(new File(Extractor.chooseTempPath()+"/"+calendarToString(this.calendar))); this.allfiles = new ArrayList<File>(); } /** * Extractor method which when called will check source_dir for new valid tar * and process it to temp_dir for reading by parser */ @Override public void extract() { File f = null; try { //if the temp dir (eg c:\temp\TNZ) doesn't exist, create it if (checkAvailability(getTempPath().getAbsolutePath()) == null) getTempPath().mkdir(); //if the date specific temp dir (eg C:\temp\TNZ\20101231) doesn't exist, create it if (checkAvailability(getCalTempPath().getAbsolutePath()) == null) getCalTempPath().mkdir(); //if the temp dir is ~empty (less than N files in it) fill it if(getCalTempPath().listFiles().length<ENOUGH_FILES){ //if tar is not available check for zip if((f = checkAvailability(getCalSourcePath()+"-VCC.tar")) == null){ //if zip is not available quit if((f = checkAvailability(getCalSourcePath()+".zip")) == null){ jlog.error("no zip/tar found matching "+expectedFileName()); throw new FileNotFoundException("No tar/zip available for "+expectedFileName()); } else unzip1(f); } else untar(f); } } catch (FileNotFoundException fnfe){ System.err.println("Source file/data not avaliable "+fnfe); System.err.println("primary:"+getSourcePath()+File.separator+expectedFileName()+"(not necessarily the missing file)"); System.exit(1); } //return f1; } //utility classes. this functionality has moved to the superclass. these are kept as comments //since their operation is slightly different and might be needed if file formats change again /* *//** * extract tarfile to constituent parts processing gzips along the way * yyyyMMdd.tar->/yyyyMMdd/INode-CH_RNC01/A2010...gz *//* private void untar(File tarfile) throws FileNotFoundException { try { TarArchiveInputStream tais = new TarArchiveInputStream(new FileInputStream(tarfile)); TarArchiveEntry t1 = null; while ((t1 = tais.getNextTarEntry())!=null){ if(t1.isDirectory()){you can make directories here if you want} else{ String fn = t1.getName().substring(t1.getName().lastIndexOf("/")); File f = new File(getTempPath()+fn); FileOutputStream fos = new FileOutputStream(f); BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER); int n = 0; byte[] content = new byte[BUFFER]; while (-1 != (n = tais.read(content))) { fos.write(content, 0, n); } bos.flush(); bos.close(); fos.close(); File unz = ungzip(f); if(unz!=null) allfiles.add(unz); f.delete(); } } tais.close(); } catch(IOException ioe){ jlog.fatal("IO read error :: "+ioe); } } *//** * extract tarfile to constituent parts processing gzips along the way * yyyyMMdd.zip->/yyyyMMdd/INode-CH_RNC01/A2010...zip *//* private static void unzip1(File zipfile) throws FileNotFoundException { try { ZipArchiveInputStream zais = new ZipArchiveInputStream(new FileInputStream(zipfile)); ZipArchiveEntry z1 = null; while ((z1 = zais.getNextZipEntry())!=null){ if(z1.isDirectory()){you can make directories here if you want} else{ String fn = z1.getName().substring(z1.getName().lastIndexOf("/")); File f = new File(getTempPath()+fn); FileOutputStream fos = new FileOutputStream(f); BufferedOutputStream bos = new BufferedOutputStream(fos, BUFFER); int n = 0; byte[] content = new byte[BUFFER]; while (-1 != (n = zais.read(content))) { fos.write(content, 0, n); } bos.flush(); bos.close(); fos.close(); File unz = unzip2(f); if(unz!=null) allfiles.add(unz); f.delete(); } } zais.close(); } catch(IOException ioe){ jlog.fatal("IO read error :: "+ioe); } } *//** * ungzip. Given a gzip stream, decompress and store in file in temp_dir *//* private File ungzip(File gzf) throws FileNotFoundException { //File f = null; String rename = gzf.getAbsolutePath().replaceFirst("\\.gz", ".xml"); File f = new File(rename); try { FileInputStream fis = new FileInputStream(gzf); FileOutputStream fos = new FileOutputStream(rename); GzipCompressorInputStream gzin = new GzipCompressorInputStream(fis); final byte[] content = new byte[BUFFER]; int n = 0; while (-1 != (n = gzin.read(content))) { fos.write(content, 0, n); } fos.flush(); fos.close(); fis.close(); gzin.close(); } catch (IOException ioe) { jlog.error("Error processing GZip "+gzf+" Excluding! :: "+ioe); return null; } //try again... what could go wrong if (checkMinFileSize(f) && retry_counter<MAX_UNGZIP_RETRIES){ retry_counter++; f.delete(); f = ungzip(gzf); } return f; } *//** * ungzip. Given a gzip stream, decompress and store in file in temp_dir *//* private File unzip2(File zf) throws FileNotFoundException { //File f = null; String rename = zf.getAbsolutePath().replaceFirst("\\.zip", ".xml"); File f = new File(rename); try { FileInputStream fis = new FileInputStream(zf); FileOutputStream fos = new FileOutputStream(rename); ZipInputStream zin = new ZipInputStream(fis); final byte[] content = new byte[BUFFER]; int n = 0; while (-1 != (n = zin.read(content))) { fos.write(content, 0, n); } fos.flush(); fos.close(); fis.close(); zin.close(); } catch (IOException ioe) { jlog.error("Error processing Zip "+zf+" Excluding! :: "+ioe); return null; } //try again... what could go wrong if (checkMinFileSize(f) && retry_counter<MAX_UNGZIP_RETRIES){ retry_counter++; f.delete(); f = unzip2(zf); } return f; } */ /** * Check for file or directory in location. Return it if it exists * @param file * @return File returns if available */ private File checkAvailability(String file){ if ((new File(file)).exists()){ return new File(file); } else return null; } /* private boolean checkMinFileSize(File f){ if (f.length()<MIN_FILE_SIZE) return true; return false; } */ private String expectedFileName(){ return calendarToString(calendar); } /** * A method for identifying similarly named files based on the relative location. Used for * INodeVcc files which have the same name structure and INode files but are found in a different * directory * @return Path fragment */ @Override public String docPath(){ switch(doctype){ case WNMS_INodeVcc: return "account"; default: return "stats"; } } }