/* * The Kuali Financial System, a comprehensive financial management system for higher education. * * Copyright 2005-2014 The Kuali Foundation * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.kuali.kfs.gl.batch; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.UUID; import org.apache.commons.io.FileUtils; import org.kuali.kfs.sys.KFSConstants; import org.kuali.kfs.sys.context.SpringContext; import org.kuali.rice.core.api.config.property.ConfigurationService; /** * This class... */ public class BatchSortUtil { private static org.apache.log4j.Logger LOG = org.apache.log4j.Logger.getLogger(BatchSortUtil.class); private static File tempDir; private static File getTempDirectory() { if ( tempDir == null ) { tempDir = new File( SpringContext.getBean(ConfigurationService.class).getPropertyValueAsString(KFSConstants.TEMP_DIRECTORY_KEY) ); } return tempDir; } static public void sortTextFileWithFields(String inputFileName, String outputFileName, @SuppressWarnings("rawtypes") Comparator comparator){ // create a directory for the interim files String tempSortDirName = UUID.randomUUID().toString(); File tempSortDir = new File( getTempDirectory(), tempSortDirName ); // ensure the directory is empty FileUtils.deleteQuietly(tempSortDir); try { FileUtils.forceMkdir(tempSortDir); } catch (IOException ex) { LOG.fatal( "Unable to create temporary sort directory", ex ); throw new RuntimeException( "Unable to create temporary sort directory", ex ); } //LOG.info("Sorting input file " + inputFileName + " into temp directory " + tempSortDir); int numFiles = sortToTempFiles( inputFileName, tempSortDir, comparator ); //LOG.info("Merging " + numFiles + " temp files from temp directory into output file " + outputFileName); // now that the sort is complete - merge the sorted files mergeFiles(tempSortDir, numFiles, outputFileName, comparator); File sortedFile = new File(outputFileName); /* if (sortedFile.exists()) { LOG.info("Successfully merged input file " + inputFileName + " to output file " + outputFileName); } */ // remove the temporary sort directory FileUtils.deleteQuietly(tempSortDir); /* LOG.info("Successfully deleted temp directory " + tempSortDir); if (sortedFile.canRead()) { LOG.info("Sorted file " + outputFileName + " is readable upon completion of flat file sorting."); } else { LOG.error("Sorted file " + outputFileName + " can't be read upon completion of flat file sorting."); } */ } static int linesPerFile = 10000; /* Code below derived from code originally written by Sammy Larbi and * downloaded from www.codeodor.com. * * http://www.codeodor.com/index.cfm/2007/5/14/Re-Sorting-really-BIG-files---the-Java-source-code/1208 */ private static int sortToTempFiles(String inputFileName, File tempSortDir, Comparator<String> comparator) { BufferedReader inputFile; try { inputFile = new BufferedReader(new FileReader(inputFileName)); //LOG.info("Successfully opened input file " + inputFileName); } catch ( FileNotFoundException ex ) { LOG.fatal( "Unable to find input file: " + inputFileName, ex ); throw new RuntimeException( "Unable to find input file: " + inputFileName, ex ); } try { String line = ""; ArrayList<String> batchLines = new ArrayList<String>( linesPerFile ); int numFiles = 0; while ( line !=null ) { // get 10k rows for ( int i = 0; i < linesPerFile; i++ ) { line = inputFile.readLine(); if ( line != null ) { batchLines.add(line); } } // sort the rows // batchLines = mergeSort(batchLines, comparator); Collections.sort(batchLines, comparator); // write to disk BufferedWriter bw = new BufferedWriter(new FileWriter( new File( tempSortDir, "chunk_" + numFiles ) )); for( int i = 0; i < batchLines.size(); i++) { bw.append(batchLines.get(i)).append('\n'); //LOG.info("Writing temp sort file chunk_" + numFiles + " to tempSortDir " + tempSortDir); } bw.close(); //LOG.info("Closed temp sort file chunk_" + numFiles); numFiles++; batchLines.clear(); // empty the array for the next pass } inputFile.close(); //LOG.info("Successfully closed input file " + inputFileName); return numFiles; } catch (Exception ex) { LOG.fatal( "Exception processing sort to temp files.", ex ); throw new RuntimeException( ex ); } } private static void mergeFiles(File tempSortDir, int numFiles, String outputFileName, Comparator<String> comparator ) { try { ArrayList<FileReader> mergefr = new ArrayList<FileReader>( numFiles ); ArrayList<BufferedReader> mergefbr = new ArrayList<BufferedReader>( numFiles ); // temp buffer for writing - contains the minimum record from each file ArrayList<String> fileRows = new ArrayList<String>( numFiles ); BufferedWriter bw = new BufferedWriter(new FileWriter(outputFileName)); //LOG.info("Successfully opened output file " + outputFileName); boolean someFileStillHasRows = false; // Iterate over all the files, getting the first line in each file for ( int i = 0; i < numFiles; i++) { // open a file reader for each file mergefr.add(new FileReader(new File( tempSortDir, "chunk_"+i) ) ); mergefbr.add(new BufferedReader(mergefr.get(i))); // get the first row String line = mergefbr.get(i).readLine(); if (line != null) { fileRows.add(line); someFileStillHasRows = true; } else { fileRows.add(null); } } while (someFileStillHasRows) { String min = null; int minIndex = 0; // index of the file with the minimum record // init for later compare - assume the first file has the minimum String line = fileRows.get(0); if (line!=null) { min = line; minIndex = 0; } else { min = null; minIndex = -1; } // determine the minimum record of the top lines of each file // check which one is min for( int i = 1; i < fileRows.size(); i++ ) { line = fileRows.get(i); if ( line != null ) { if ( min != null ) { if( comparator.compare(line, min) < 0 ) { minIndex = i; min = line; } } else { min = line; minIndex = i; } } } if (minIndex < 0) { someFileStillHasRows=false; } else { // write to the sorted file bw.append(fileRows.get(minIndex)).append('\n'); // get another row from the file that had the min line = mergefbr.get(minIndex).readLine(); if (line != null) { fileRows.set(minIndex,line); } else { // file is out of rows, set to null so it is ignored fileRows.set(minIndex,null); } } // check if one still has rows for( int i = 0; i < fileRows.size(); i++) { someFileStillHasRows = false; if(fileRows.get(i)!=null) { if (minIndex < 0) { throw new RuntimeException( "minIndex < 0 and row found in chunk file " + i + " : " + fileRows.get(i) ); } someFileStillHasRows = true; break; } } // check the actual files one more time if (!someFileStillHasRows) { //write the last one not covered above for(int i=0; i<fileRows.size(); i++) { if (fileRows.get(i) == null) { line = mergefbr.get(i).readLine(); if (line!=null) { someFileStillHasRows=true; fileRows.set(i,line); } } } } } // close all the files bw.close(); //LOG.info("Successfully closed output file " + outputFileName); for(BufferedReader br : mergefbr ) { br.close(); } for(FileReader fr : mergefr ) { fr.close(); } } catch (Exception ex) { LOG.error( "Exception merging the sorted files", ex ); throw new RuntimeException( "Exception merging the sorted files", ex ); } } }