// ContentTransformer.java // --------------------------------- // (C) by Michael Peter Christen; mc@yacy.net // first published on http://www.anomic.de // Frankfurt, Germany, 2004 // // $LastChangedDate$ // $LastChangedRevision$ // $LastChangedBy$ // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA package net.yacy.document.parser.html; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.TreeSet; import net.yacy.cora.document.encoding.ASCII; import net.yacy.kelondro.io.CharBuffer; @Deprecated // TODO: delete candidate, because not in use, (noticed 2014-12-02) public class ContentTransformer extends AbstractTransformer implements Transformer { // statics: for initialization of the HTMLFilterAbstractTransformer private static final TreeSet<String> linkTags0 = new TreeSet<String>(ASCII.insensitiveASCIIComparator); private static final TreeSet<String> linkTags1 = new TreeSet<String>(ASCII.insensitiveASCIIComparator); static { linkTags0.add("img"); linkTags0.add("input"); linkTags1.add("a"); } private ArrayList<String> bluelist = null; public ContentTransformer() { super(linkTags0, linkTags1); } @Override public void init(final String initarg) { if (this.bluelist == null) { // here, the init arg is used to load a list of blue-listed words this.bluelist = new ArrayList<String>(); final File f = new File(initarg); if (f.canRead()) { try { final BufferedReader r = new BufferedReader(new FileReader(f)); String s; while ((s = r.readLine()) != null) { if (!s.isEmpty() && s.charAt(0) != '#') this.bluelist.add(s.toLowerCase()); } r.close(); } catch (final IOException e) { } // if (bluelist.isEmpty()) System.out.println("BLUELIST is empty"); } } } @Override public boolean isIdentityTransformer() { return this.bluelist.isEmpty(); } private static char[] genBlueLetters(int length) { final CharBuffer bb = new CharBuffer(ContentScraper.MAX_DOCSIZE, " <FONT COLOR=#0000FF>".toCharArray()); length = length / 2; if (length > 10) length = 7; while (length-- > 0) { bb.append('X'); } bb.append("</FONT> "); final char[] result = bb.getChars(); bb.close(); return result; } private boolean bluelistHit(final char[] text) { if (text == null || this.bluelist == null) return false; final String lc = new String(text).toLowerCase(); for (int i = 0; i < this.bluelist.size(); i++) { if (lc.indexOf(this.bluelist.get(i)) >= 0) return true; } return false; } @Override public char[] transformText(final char[] text) { if (this.bluelist != null) { if (bluelistHit(text)) { // System.out.println("FILTERHIT: " + text); return genBlueLetters(text.length); } return text; } return text; } @Override public char[] transformTag0(final ContentScraper.Tag tag, final char quotechar) { if (tag.name.equals("img")) { // check bluelist if (bluelistHit(tag.opts.getProperty("src", "").toCharArray())) return genBlueLetters(5); if (bluelistHit(tag.opts.getProperty("alt", "").toCharArray())) return genBlueLetters(5); // replace image alternative name tag.opts.setProperty("alt", new String(transformText(tag.opts.getProperty("alt", "").toCharArray()))); } if (tag.name.equals("input") && (tag.opts.getProperty("type") != null && tag.opts.getProperty("type").equals("submit"))) { // rewrite button name tag.opts.setProperty("value", new String(transformText(tag.opts.getProperty("value", "").toCharArray()))); } return TransformerWriter.genTag0(tag.name, tag.opts, quotechar); } @Override public char[] transformTag1(final ContentScraper.Tag tag, final char quotechar) { if (bluelistHit(tag.opts.getProperty("href","").toCharArray())) return genBlueLetters(tag.content.length()); if (bluelistHit(tag.content.getChars())) return genBlueLetters(tag.content.length()); return TransformerWriter.genTag1(tag.name, tag.opts, tag.content.getChars(), quotechar); } @Override public synchronized void close() { // free resources super.close(); } }