/*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is the Kowari Metadata Store.
*
* The Initial Developer of the Original Code is Plugged In Software Pty
* Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
* created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
* Plugged In Software Pty Ltd. All Rights Reserved.
*
* Contributor(s): N/A.
*
* [NOTE: The text of this Exhibit A may differ slightly from the text
* of the notices in the Source Code files of the Original Code. You
* should use the text of this Exhibit A rather than the text found in the
* Original Code Source Code for Your Modifications.]
*
*/
package org.mulgara.util.conversion.html;
// Java 2 standard packages
import java.io.*;
import java.util.*;
/**
* Tests the {@link HtmlToTextConverter}.
*
* @created 2002-08-01
*
* @author Ben Warren
*
* @version $Revision: 1.9 $
*
* @modified $Date: 2005/01/05 04:59:30 $
*
* @maintenanceAuthor $Author: newmana $
*
* @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A>
*
* @copyright ©2002 <a href="http://www.pisoftware.com/">Plugged In
* Software Pty Ltd</a>
*
* @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
*/
public class HtmlToTextConverterTester {
/**
* Test the HtmlTextConverter.
*
* @param args The command line args.
* @throws Exception on error.
*/
public static void main(String[] args) throws Exception {
// Use normal spaces instead of non-breaking unicode spaces
HtmlToTextConverter.setUseNormalSpace(true);
// Don't include titles
HtmlToTextConverter.setIncludeTitle(false);
// Don't include image alts
HtmlToTextConverter.setIncludeImageAlts(false);
// Directory
if ("-dir".equals(args[0])) {
String[] files = new File(args[1]).list();
Arrays.sort(files);
for (int i = 0; i < files.length; i++) {
System.err.println(files[i]);
File file = new File(args[1], files[i]);
convertFile(file);
}
}
// One file
else {
File file = new File(args[0]);
convertFile(file);
}
}
/**
* Convert a HTML file to text.
*
* @param file The file to convert.
* @throws Exception on error.
*/
private static void convertFile(File file) throws Exception {
// Convert file to a string
StringBuffer lines = new StringBuffer();
BufferedReader reader = new BufferedReader(new FileReader(file));
for (String line = reader.readLine(); line != null;
line = reader.readLine()) {
lines.append(line + "\n");
}
System.out.println("\nConverting file " + file.getAbsolutePath() + "\n\n");
System.out.println(HtmlToTextConverter.convert(lines.toString()));
System.out.println("======================================================" +
"=======================================================\n");
}
}