package utilities; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.regex.Pattern; import javax.swing.JOptionPane; import org.apache.commons.io.FilenameUtils; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.parser.PdfReaderContentParser; import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy; import com.itextpdf.text.pdf.parser.TextExtractionStrategy; public class TextSucheInOrdner { public static String fileorfolder; static String searchedString; static PrintWriter outputfile; static int stringfound; static String extension; static int MAXIMAL_HITS = 50; // implemented non-context-sensitive for methods // searchforStringinSimpleFiles and searchforStringinPdfFiles public static void main(String args[]) throws IOException { stringfound = 0; try { fileorfolder = utilities.BrowserDialogs.chooseFileOrFolder(); if (fileorfolder != null) { searchedString = JOptionPane.showInputDialog(null, "Bitte geben Sie den gesuchten Text ein. Gross- und Kleinschreibung wird nicht beachtet", "Eingabemaske", JOptionPane.PLAIN_MESSAGE); if ((searchedString == null) || (searchedString.length() == 0)) { JOptionPane.showMessageDialog(null, "You have not typed in any text", "Misbehaviour. Program stopped.", JOptionPane.PLAIN_MESSAGE); } else { if (new File(fileorfolder).isDirectory()) { searchStringinFolder(fileorfolder); } else if (new File(fileorfolder).isFile()) { searchStringinFile(fileorfolder); } if (stringfound == 0) { JOptionPane.showMessageDialog(null, "The searched String was not found", "Findings", JOptionPane.PLAIN_MESSAGE); outputfile.println("<StringWasNotFound>"); outputfile.println("<OccuranceSearchedTest>" + stringfound + "</OccuranceSearchedTest>"); outputfile.println("</StringWasNotFound>"); } else if (stringfound == MAXIMAL_HITS) { JOptionPane.showMessageDialog(null, "The searched String was found at least " + MAXIMAL_HITS + " times. Search stopped", "Findings", JOptionPane.PLAIN_MESSAGE); outputfile.println("<SearchedStopped>Der gesuchte Text ist mehr als 50 Mal in den gesuchten Dateien vorhanden</SearchedStopped>"); } else { JOptionPane.showMessageDialog(null, "The searched String was found " + stringfound + " times", "Findings", JOptionPane.PLAIN_MESSAGE); outputfile.println("<OccuranceSearchedTest>" + stringfound + "</OccuranceSearchedTest>"); } outputfile.println("</Textsuche>"); outputfile.close(); } } } catch (FileNotFoundException e) { JOptionPane.showMessageDialog(null, "An exception occured " + e); } } public static void searchStringinFile(String file) throws IOException { StringBuilder stringBuilder = new StringBuilder(); String[] parts = file.split(Pattern.quote("\\")); for (int i = 0; i < parts.length - 1; i++) { stringBuilder.append(parts[i]); stringBuilder.append("//"); } String finalString = stringBuilder.toString(); outputfile = new PrintWriter(new FileWriter(finalString + "ErgebnisTextSuche" + ".xml")); String xmlVersion = "xml version='1.0'"; String xmlEncoding = "encoding='ISO-8859-1'"; String xsltStyleSheet = "<?xml-stylesheet type=\"text/xsl\" href=\"TextSucheTagStyle.xsl\"?>"; String xsltLocation = finalString + "TextSucheTagStyle.xsl"; output.XslStyleSheets.TextSucheCustomizedXsl(xsltLocation); outputfile.println("<?" + xmlVersion + " " + xmlEncoding + "?>"); outputfile.println(xsltStyleSheet); outputfile.println("<Textsuche>"); outputfile.println("<Datei>"); // outputfile.println("<Durchsucht =\"file\">" + fileorfolder + // "</Durchsucht>"); outputfile.println("<Durchsucht>" + fileorfolder + "</Durchsucht>"); outputfile.println("<Art>File</Art>"); outputfile.println("<AnzahlDateien>" + "1" + "</AnzahlDateien>"); outputfile.println("<GesuchterText>" + searchedString + "</GesuchterText>"); String filename = FilenameUtils.getBaseName(file); if (!filename.startsWith("~")) { extension = FilenameUtils.getExtension(file.toString()).toLowerCase(); System.out.println(extension); if ((extension.equals("txt")) || (extension.equals("java")) || (extension.equals("yml"))) { if (fileorfolder.length() != 0) /** * important because otherwise not yet closed outpufile causes * neverending story */ { // TODO: There is a big performance // problem with too large Txt-Files, e. // g. more than 500 KB or a certain no. // of lines. File examinedfile = new File(file); searchforStringinSimpleFiles(examinedfile); } } else if (extension.equals("pdf")) { File pdffile = new File(fileorfolder); searchforStringinPdfFiles(pdffile); } else { JOptionPane.showMessageDialog(null, "Search for String is not implemented yet for this kind of file format.", "Findings", JOptionPane.PLAIN_MESSAGE); return; // TODO: this is not very elegant and // has to be improved } } outputfile.println("</Datei>"); } public static void searchStringinFolder(String folder) throws IOException { outputfile = new PrintWriter(new FileWriter(folder + "//" + "ErgebnisTextSuche" + ".xml")); String xmlVersion = "xml version='1.0'"; String xmlEncoding = "encoding='ISO-8859-1'"; String xsltStyleSheet = "<?xml-stylesheet type=\"text/xsl\" href=\"TextSucheTagStyle.xsl\"?>"; String xsltLocation = folder + "//" + "TextSucheTagStyle.xsl"; output.XslStyleSheets.TextSucheCustomizedXsl(xsltLocation); outputfile.println("<?" + xmlVersion + " " + xmlEncoding + "?>"); outputfile.println(xsltStyleSheet); outputfile.println("<Textsuche>"); // outputfile.println("<Durchsucht = \"folder\">" + fileorfolder + // "</Durchsucht>"); outputfile.println("<Durchsucht>" + fileorfolder + "</Durchsucht>"); outputfile.println("<Art>Folder</Art>"); ArrayList<File> files = utilities.ListsFiles.getPaths(new File(folder), new ArrayList<File>()); int anzahlDateien = files.size(); outputfile.println("<AnzahlDateien>" + anzahlDateien + "</AnzahlDateien>"); outputfile.println("<GesuchterText>" + searchedString + "</GesuchterText>"); if (files != null) { for (int i = 0; i < files.size(); i++) { String filename = FilenameUtils.getBaseName(files.get(i).toString()); if (!filename.startsWith("~")) { outputfile.println("<Datei>"); extension = FilenameUtils.getExtension(files.get(i).toString()).toLowerCase(); if ((extension.equals("txt")) || (extension.equals("java")) || (extension.equals("yml"))) { // TODO: add more extensions that can be // searched by a simple BufferedReader searchforStringinSimpleFiles(files.get(i)); } else if (extension.equals("pdf")) { searchforStringinPdfFiles(files.get(i)); } else { outputfile.println("<FileExtension>" + extension + "</FileExtension>"); outputfile.println("<Dateiname>" + files.get(i).getName() + "</Dateiname>"); outputfile.println("<Suchergebnis>" + "nicht durchsucht" + "</Suchergebnis>"); } outputfile.println("</Datei>"); } } } } // TODO: Add other file formats, e. g. MS Word to // search for string there, too // else if "doc" // else if "xls" // else if "xlsx" // else if "ppt" // else if "xml" // else if "html" // else if "pptx" public static void searchforStringinPdfFiles(File file) throws IOException { outputfile.println("<Dateiname>" + (file.getName()) + "</Dateiname>"); int trefferinDatei; if (filetools.pdf.PdfAnalysis.testPdfOk(file)) { try { PdfReader reader = new PdfReader(file.toString()); int pagesPdf = reader.getNumberOfPages(); StringBuffer buff = new StringBuffer(); String ExtractedText = null; PdfReaderContentParser parser = new PdfReaderContentParser(reader); TextExtractionStrategy strategy; trefferinDatei = 0; for (int i = 1; i <= pagesPdf; i++) { strategy = parser.processContent(i, new SimpleTextExtractionStrategy()); ExtractedText = strategy.getResultantText().toString(); buff.append(ExtractedText + "\n"); String[] LinesArray = buff.toString().split("\n"); int linesPdf = LinesArray.length; for (int j = 0; (j < linesPdf && (stringfound < MAXIMAL_HITS)); j++) { String paragraph = LinesArray[j].toLowerCase(); String searchStringlowerCase = searchedString.toLowerCase(); if (paragraph.contains(searchStringlowerCase)) { trefferinDatei++; stringfound++; outputfile.println("<Seitenzahl>" + i + "</Seitenzahl>"); outputfile.println("<GanzeZeile>" + (LinesArray[j]) + "</GanzeZeile>"); } } } outputfile.println("<TextinDatei>" + trefferinDatei + "</TextinDatei>"); outputfile.println("<Suchergebnis>" + trefferinDatei + " x " + "</Suchergebnis>"); reader.close(); } catch (Exception e) { outputfile.println("<Fehlermeldung>" + e + "</Fehlermeldung>"); } } } public static void searchforStringinSimpleFiles(File file) throws IOException { if (file.length() != 0) /** * important because otherwise not yet closed outputfile causes * neverending story */ outputfile.println("<Dateiname>" + (file.getName()) + "</Dateiname>"); { // TODO: There is a big performance // problem with too large Txt-Files, // e. // g. more than 500 KB or a certain // no. // of lines. BufferedReader txtreader = new BufferedReader(new FileReader(file)); String line; int trefferinDatei = 0; while (null != (line = txtreader.readLine()) && (stringfound < MAXIMAL_HITS)) { String linelowercase = line.toLowerCase(); String searchStringlowerCase = searchedString.toLowerCase(); if (linelowercase.contains(searchStringlowerCase)) { trefferinDatei++; stringfound++; outputfile.println("<GanzeZeile>" + (line) + "</GanzeZeile>"); } } outputfile.println("<TextinDatei>" + trefferinDatei + "</TextinDatei>"); outputfile.println("<Suchergebnis>" + trefferinDatei + " x gefunden" + "</Suchergebnis>"); txtreader.close(); } } }