// This file is part of TagSoup and is Copyright 2002-2008 by John Cowan. // // TagSoup is licensed under the Apache License, // Version 2.0. You may obtain a copy of this license at // http://www.apache.org/licenses/LICENSE-2.0 . You may also have // additional legal rights not granted by this license. // // TagSoup is distributed in the hope that it will be useful, but // unless required by applicable law or agreed to in writing, TagSoup // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS // OF ANY KIND, either express or implied; not even the implied warranty // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // // This file is part of TagSoup. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. You may also distribute // and/or modify it under version 2.1 of the Academic Free License. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // // PYX Scanner package org.ccil.cowan.tagsoup; import java.io.*; import org.xml.sax.SAXException; /** A Scanner that accepts PYX format instead of HTML. Useful primarily for debugging. **/ public class PYXScanner implements Scanner { public void resetDocumentLocator(String publicid, String systemid) { // Need this method for interface compatibility, but note // that PyxScanner does not implement Locator. } public void scan(Reader r, ScanHandler h) throws IOException, SAXException { BufferedReader br = new BufferedReader(r); String s; char[] buff = null; boolean instag = false; while ((s = br.readLine()) != null) { int size = s.length(); if (buff == null || buff.length < size) { buff = new char[size]; } s.getChars(0, size, buff, 0); switch (buff[0]) { case '(': if (instag) { h.stagc(buff, 0, 0); instag = false; } h.gi(buff, 1, size - 1); instag = true; break; case ')': if (instag) { h.stagc(buff, 0, 0); instag = false; } h.etag(buff, 1, size - 1); break; case '?': if (instag) { h.stagc(buff, 0, 0); instag = false; } h.pi(buff, 1, size - 1); break; case 'A': int sp = s.indexOf(' '); h.aname(buff, 1, sp - 1); h.aval(buff, sp + 1, size - sp - 1); break; case '-': if (instag) { h.stagc(buff, 0, 0); instag = false; } if (s.equals("-\\n")) { buff[0] = '\n'; h.pcdata(buff, 0, 1); } else { // FIXME: // Does not decode \t and \\ in input h.pcdata(buff, 1, size - 1); } break; case 'E': if (instag) { h.stagc(buff, 0, 0); instag = false; } h.entity(buff, 1, size - 1); break; default: // System.err.print("Gotcha "); // System.err.print(s); // System.err.print('\n'); break; } } h.eof(buff, 0, 0); } public void startCDATA() { } public static void main(String[] argv) throws IOException, SAXException { Scanner s = new PYXScanner(); Reader r = new InputStreamReader(System.in, "UTF-8"); Writer w = new BufferedWriter(new OutputStreamWriter(System.out, "UTF-8")); s.scan(r, new PYXWriter(w)); } }