/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloudera.knittingboar.utils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.zip.GZIPInputStream;
import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.mahout.math.Vector;
public class Utils {
public static void UnTarAndZipGZFile(final File inputFile,
final File outputDir) throws FileNotFoundException, IOException,
ArchiveException {
System.out.println( "Path: " + inputFile.getParent() );
// unGzip(inputFile, new File(inputFile.getParent()));
String new_path = inputFile.getPath().replaceFirst(".gz", "");
System.out.println("Tar File: " + new_path);
unTar(new File(new_path), outputDir);
// now cleanup tmp .tar file
}
/**
* Untar an input file into an output file.
*
* The output file is created in the output folder, having the same name as
* the input file, minus the '.tar' extension.
*
* @param inputFile
* the input .tar file
* @param outputDir
* the output directory file.
* @throws IOException
* @throws FileNotFoundException
*
* @return The {@link List} of {@link File}s with the untared content.
* @throws ArchiveException
*/
private static List<File> unTar(final File inputFile, final File outputDir)
throws FileNotFoundException, IOException, ArchiveException {
System.out.println(String.format("Untaring %s to dir %s.", inputFile
.getAbsolutePath(), outputDir.getAbsolutePath()));
final List<File> untaredFiles = new LinkedList<File>();
final InputStream is = new FileInputStream(inputFile);
final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory()
.createArchiveInputStream("tar", is);
TarArchiveEntry entry = null;
while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
final File outputFile = new File(outputDir, entry.getName());
if (entry.isDirectory()) {
System.out.println(String.format(
"Attempting to write output directory %s.", outputFile
.getAbsolutePath()));
if (!outputFile.exists()) {
System.out.println(String.format(
"Attempting to create output directory %s.", outputFile
.getAbsolutePath()));
if (!outputFile.mkdirs()) {
throw new IllegalStateException(String.format(
"Couldn't create directory %s.", outputFile.getAbsolutePath()));
}
}
} else {
System.out.println(String.format("Creating output file %s.", outputFile
.getAbsolutePath()));
final OutputStream outputFileStream = new FileOutputStream(outputFile);
IOUtils.copy(debInputStream, outputFileStream);
outputFileStream.close();
}
untaredFiles.add(outputFile);
}
debInputStream.close();
return untaredFiles;
}
/**
* Ungzip an input file into an output file.
* <p>
* The output file is created in the output folder, having the same name as
* the input file, minus the '.gz' extension.
*
* @param inputFile
* the input .gz file
* @param outputDir
* the output directory file.
* @throws IOException
* @throws FileNotFoundException
*
* @return The {@File} with the ungzipped content.
*/
private static File unGzip(final File inputFile, final File outputDir)
throws FileNotFoundException, IOException {
System.out.println(String.format("Ungzipping %s to dir %s.", inputFile
.getAbsolutePath(), outputDir.getAbsolutePath()));
final File outputFile = new File(outputDir, inputFile.getName().substring(
0, inputFile.getName().length() - 3));
final GZIPInputStream in = new GZIPInputStream(new FileInputStream(
inputFile));
final FileOutputStream out = new FileOutputStream(outputFile);
for (int c = in.read(); c != -1; c = in.read()) {
out.write(c);
}
in.close();
out.close();
return outputFile;
}
public static void PrintVector(Vector v) {
boolean first = true;
Iterator<Vector.Element> nonZeros = v.iterator();
while (nonZeros.hasNext()) {
Vector.Element vec_loc = nonZeros.next();
if (!first) {
System.out.print(",");
} else {
first = false;
}
System.out.print(" " + vec_loc.get());
}
System.out.println("");
}
public static void PrintVectorSection(Vector v, int num) {
boolean first = true;
Iterator<Vector.Element> nonZeros = v.iterator();
int cnt = 0;
while (nonZeros.hasNext()) {
Vector.Element vec_loc = nonZeros.next();
if (!first) {
System.out.print(",");
} else {
first = false;
}
System.out.print(" " + vec_loc.get());
if (cnt > num) {
break;
}
cnt++;
}
System.out.println(" ######## ");
}
public static void PrintVectorNonZero(Vector v) {
boolean first = true;
Iterator<Vector.Element> nonZeros = v.iterateNonZero();
while (nonZeros.hasNext()) {
Vector.Element vec_loc = nonZeros.next();
if (!first) {
System.out.print(",");
} else {
first = false;
}
System.out.print(" " + vec_loc.get());
}
System.out.println("");
}
public static void PrintVectorSectionNonZero(Vector v, int size) {
boolean first = true;
Iterator<Vector.Element> nonZeros = v.iterateNonZero();
int cnt = 0;
while (nonZeros.hasNext()) {
Vector.Element vec_loc = nonZeros.next();
if (!first) {
System.out.print(",");
} else {
first = false;
}
System.out.print(" " + vec_loc.get());
if (cnt > size) {
break;
}
cnt++;
}
System.out.println("");
}
}