/* * Copyright 2008 Jeff Dwyer * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.apress.progwt.server.lucene; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import org.apache.log4j.Logger; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.compass.core.converter.ConversionException; import org.htmlparser.Parser; import org.htmlparser.beans.StringBean; import org.htmlparser.lexer.Lexer; import org.htmlparser.util.ParserException; /** * NOTE: Unused. Created during some confusion over the role of compass * Converters vs Analyzers * * * @author Jeff Dwyer * */ public class HTMLAnalyzer extends StandardAnalyzer { private static final Logger log = Logger .getLogger(HTMLAnalyzer.class); @Override public TokenStream tokenStream(String name, Reader reader) { if (log.isDebugEnabled()) { TokenStream ts = super.tokenStream(name, htmlReaderFromReader(reader)); Token t; if (log.isDebugEnabled()) { try { while ((t = ts.next()) != null) { log.debug("token: " + t); } } catch (IOException e) { e.printStackTrace(); } } } return super.tokenStream(name, htmlReaderFromReader(reader)); } private Reader htmlReaderFromReader(Reader reader) throws ConversionException { StringBuilder stringBuilder = new StringBuilder(); try { BufferedReader br = new BufferedReader(reader); String s; while ((s = br.readLine()) != null) { stringBuilder.append(s); } Lexer l = new Lexer(stringBuilder.toString()); Parser parser = new Parser(l); StringBean sb = new StringBean(); parser.visitAllNodesWith(sb); String ret = sb.getStrings(); return new StringReader(ret); } catch (ParserException e) { log.warn("Conversion Exception: " + e); throw new ConversionException(e.getMessage()); } catch (IOException e2) { log.warn("Conversion Exception: " + e2); throw new ConversionException(e2.getMessage()); } } }