package org.bygle.utils.io;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.LinkedHashSet;
import java.util.TreeMap;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.helpers.DefaultHandler;
public class FileInfoReader {
private static int kDEFAULT_CHUNK_SIZE = 256;
public static String extractText(InputStream input) throws IOException {
String result = "";
String resultSplitted = "";
String words[] = null;
LinkedHashSet<String> uniqueWords = null;
try {
ContentHandler handler = new BodyContentHandler(10 * 1024 * 1024);
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser();
parser.parse(input, handler, metadata, new ParseContext());
result = handler.toString();
uniqueWords = new LinkedHashSet<String>();
words = result.split("[\\W]+");
for (int i = 0; i < words.length; i++) {
uniqueWords.add(words[i].toLowerCase());
}
int i = 0;
for (String unique : uniqueWords) {
if (((1 + i) % 100) == 0) {
resultSplitted += unique + ",\n";
} else
resultSplitted += unique + ", ";
i++;
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (input != null)
input.close();
}
return stripWhiteSpace(resultSplitted);
}
public static String stripWhiteSpace(String in) {
StringBuilder out = new StringBuilder();
char current;
if (in == null || ("".equals(in)))
return "";
for (int i = 0; i < in.length(); i++) {
current = in.charAt(i);
if ((current == 0x9) || (current == 0xA) || (current == 0xD) || ((current >= 0x20) && (current <= 0xD7FF)) || ((current >= 0xE000) && (current <= 0xFFFD)) || ((current >= 0x10000) && (current <= 0x10FFFF)))
out.append(current);
}
return out.toString();
}
public static String extractMD5(InputStream input) throws IOException {
String result = "";
try {
result = DigestUtils.md5Hex(input);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (input != null)
input.close();
}
return result;
}
public static TreeMap<String, String> extractMetaData(InputStream input) throws IOException {
TreeMap<String, String> treeMap = new TreeMap<String, String>();
try {
ContentHandler handler = new DefaultHandler();
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser();
parser.parse(input, handler, metadata, new ParseContext());
for (int i = 0; i < metadata.names().length; i++) {
String name = metadata.names()[i];
treeMap.put(name, stripWhiteSpace(metadata.get(name)));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (input != null)
input.close();
}
return treeMap;
}
public static String extractStringMetaData(InputStream input) throws IOException {
String result="";
try {
ContentHandler handler = new DefaultHandler();
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser();
parser.parse(input, handler, metadata, new ParseContext());
for (int i = 0; i < metadata.names().length; i++) {
String name = metadata.names()[i];
result+=name.toUpperCase()+" : "+stripWhiteSpace(metadata.get(name))+"\n";
}
} catch (Exception e) {
} finally {
if (input != null)
input.close();
}
return result;
}
public static String getMetaData(InputStream input, String metaData) throws IOException {
String result = "";
try {
ContentHandler handler = new DefaultHandler();
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser();
parser.parse(input, handler, metadata, new ParseContext());
result = metadata.get(metaData);
} catch (Exception e) {
} finally {
if (input != null)
input.close();
}
return result;
}
public static byte[] loadBytesFromURL(URL url) throws Exception {
byte[] b = null;
URLConnection con = url.openConnection();
int size = con.getContentLength();
InputStream in = null;
try {
if ((in = con.getInputStream()) != null)
b = (size != -1) ? loadBytesFromStreamForSize(in, size) : loadBytesFromStream(in);
} finally {
if (in != null)
try {
in.close();
} catch (IOException ioe) {
}
}
return b;
}
private static byte[] loadBytesFromStream(InputStream in) throws IOException {
return loadBytesFromStream(in, kDEFAULT_CHUNK_SIZE);
}
private static byte[] loadBytesFromStreamForSize(InputStream in, int size) throws IOException {
int count, index = 0;
byte[] b = new byte[size];
while ((count = in.read(b, index, size)) > 0) {
size -= count;
index += count;
}
return b;
}
private static byte[] loadBytesFromStream(InputStream in, int chunkSize) throws IOException {
if (chunkSize < 1)
chunkSize = kDEFAULT_CHUNK_SIZE;
int count;
ByteArrayOutputStream bo = new ByteArrayOutputStream();
byte[] b = new byte[chunkSize];
try {
while ((count = in.read(b, 0, chunkSize)) > 0)
bo.write(b, 0, count);
byte[] thebytes = bo.toByteArray();
return thebytes;
} finally {
bo.close();
bo = null;
}
}
public static String getFileSizeString(long sizeInBytes){
String result="";
double bytes = sizeInBytes;
double kilobytes = Math.round((bytes / 1024));
double megabytes = Math.round((kilobytes / 1024));
double gigabytes = Math.round((megabytes / 1024));
double terabytes = Math.round((gigabytes / 1024));
if(terabytes>=1)
result = StringUtils.substringBeforeLast(Double.toString(terabytes), ".")+" TB";
else if(gigabytes>=1)
result = StringUtils.substringBeforeLast(Double.toString(gigabytes), ".")+" GB";
else if(megabytes>=1)
result = StringUtils.substringBeforeLast(Double.toString(megabytes), ".")+" MB";
else if(kilobytes>=1)
result = StringUtils.substringBeforeLast(Double.toString(kilobytes), ".")+" KB";
else
result = StringUtils.substringBeforeLast(Double.toString(bytes),".")+" byte";
return result;
}
public static void main(String args[]) throws Exception {
try {
// byte[] bytes = FileInfoReader.loadBytesFromURL(new URL("http://www.repubblica.it"));
// InputStream input = new ByteArrayInputStream(bytes);
// System.out.println(FileInfoReader.extractMetaData(input).toString());
InputStream input = new FileInputStream(new File("C:\\Users\\sandro.REGESTAEXE\\Desktop\\usa.JPG"));
//System.out.println(FileInfoReader.extractMetaData(input).toString());
System.out.println(FileInfoReader.extractStringMetaData(input));
// String[] words = null;
// String test1 = "";
// LinkedHashSet<String> uniqueWords = null;
// File f = new File("/home/diego/Scaricati/D13104.pdf");
// InputStream input = new FileInputStream(f);
// System.out.println(FileInfoReader.extractMetaData(input).toString());
// input = new FileInputStream(f);
// try {
//
// String text = FileInfoReader.extractText(input);
// uniqueWords = new LinkedHashSet<String>();
// words = text.split("[\\W]+");
// System.err.println(words.length);
// for (int i = 0; i < words.length; i++) {
// if ((i % 100) == (i / 100))
// uniqueWords.add(words[i] + ",\n");
// else
// uniqueWords.add(words[i] + ",");
//
// }
// } catch (IOException e) {
// System.out.println("intercettata");
// }
//
// for (String test : uniqueWords) {
//
// test1 += test;
//
// }
//
// input = new FileInputStream(f);
// System.out.println(FileInfoReader.extractMD5(input));
// input = new FileInputStream(f);
// System.out.println(FileInfoReader.getMetaData(input,
// "Content-Type"));
/*
* byte[] bytes = FileInfoReader.loadBytesFromURL(new URL(
* "http://www.salute.gov.it/imgs/C_17_pubblicazioni_605_allegato.pdf"
* )); InputStream input = new ByteArrayInputStream(bytes);
* System.out
* .println(FileInfoReader.extractMetaData(input).toString()); input
* = new ByteArrayInputStream(bytes);
* System.out.println(FileInfoReader.extractText(input)); input =
* new ByteArrayInputStream(bytes);
* System.out.println(FileInfoReader.extractMD5(input)); input = new
* ByteArrayInputStream(bytes);
* System.out.println(FileInfoReader.getMetaData
* (input,"Content-Type"));
*/
} catch (IOException e) {
e.printStackTrace();
}
}
}