/*
* Copyright (c) 2008 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.dmozdl;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.security.NoSuchAlgorithmException;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
public class DmozDriver {
/**
*/
public static void main(String[] args) throws Exception {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
XMLReader parser = factory.newSAXParser().getXMLReader();
DmozHandler domzHandler = new DmozHandler();
parser.setContentHandler(domzHandler);
parser.setFeature("http://xml.org/sax/features/string-interning", true);
parser.setEntityResolver(new EntityResolver() {
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException {
InputSource is = new InputSource(new ByteArrayInputStream(new byte[0]));
is.setPublicId(publicId);
is.setSystemId(systemId);
return is;
}
});
parser.setErrorHandler(new ErrorHandler() {
public void error(SAXParseException exception) throws SAXException {
}
public void fatalError(SAXParseException exception)
throws SAXException {
}
public void warning(SAXParseException exception)
throws SAXException {
}});
InputSource is = new InputSource(new GZIPInputStream(new FileInputStream(args[0])));
parser.parse(is);
Writer out = new OutputStreamWriter(new FileOutputStream(args[1]), "utf-8");
for (Map.Entry<String, String> entry : domzHandler.getTheMap().entrySet()) {
out.write(entry.getKey());
out.write('\t');
out.write(entry.getValue());
out.write('\n');
}
}
}