/* * Encog(tm) Core v2.5 - Java Version * http://www.heatonresearch.com/encog/ * http://code.google.com/p/encog-java/ * Copyright 2008-2010 Heaton Research, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more information on Heaton Research copyrights, licenses * and trademarks visit: * http://www.heatonresearch.com/copyright */ package org.encog.bot.browse.range; import java.util.ArrayList; import java.util.List; import org.encog.bot.browse.WebPage; import org.encog.bot.dataunit.DataUnit; import org.encog.bot.dataunit.TextDataUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Base class that represents a document range. A document range is a collection * of tags that all apply to one "concept". For example, a Form, or a Link. This * allows the form to collect the elements inside the form, or a link to collect * the text along with the link tag. * * @author jheaton * */ public class DocumentRange { /** * The beginning index for this range. */ private int begin; /** * The ending index for this range. */ private int end; /** * The source page for this range. */ private WebPage source; /** * The id attribute, on the source tag. Useful for DIV tags. */ private String idAttribute; /** * The class attribute. on the source tag. */ private String classAttribute; /** * Sub elements of this range. */ private final List<DocumentRange> elements = new ArrayList<DocumentRange>(); /** * The parent to this range, or null if top. */ private DocumentRange parent; /** * The logger. */ @SuppressWarnings("unused") private final Logger logger = LoggerFactory.getLogger(this.getClass()); /** * Construct a document range from the specified WebPage. * * @param source * The web page that this range belongs to. */ public DocumentRange(final WebPage source) { this.source = source; } /** * Add an element. * * @param element * The element to add. */ public void addElement(final DocumentRange element) { this.elements.add(element); element.setParent(this); } /** * @return The beginning index. */ public int getBegin() { return this.begin; } /** * @return the classAttribute */ public String getClassAttribute() { return this.classAttribute; } /** * @return The elements of this document range. */ public List<DocumentRange> getElements() { return this.elements; } /** * @return The ending index. */ public int getEnd() { return this.end; } /** * @return the idAttribute */ public String getIdAttribute() { return this.idAttribute; } /** * @return The web page that owns this class. */ public DocumentRange getParent() { return this.parent; } /** * @return The web page that this range is owned by. */ public WebPage getSource() { return this.source; } /** * Get the text from this range. * * @return The text from this range. */ public String getTextOnly() { final StringBuilder result = new StringBuilder(); for (int i = getBegin(); i < getEnd(); i++) { final DataUnit du = this.source.getData().get(i); if (du instanceof TextDataUnit) { result.append(du.toString()); result.append("\n"); } } return result.toString(); } /** * Set the beginning index. * * @param begin * The beginning index. */ public void setBegin(final int begin) { this.begin = begin; } /** * @param classAttribute * the classAttribute to set */ public void setClassAttribute(final String classAttribute) { this.classAttribute = classAttribute; } /** * Set the ending index. * * @param end * The ending index. */ public void setEnd(final int end) { this.end = end; } /** * @param idAttribute * the idAttribute to set */ public void setIdAttribute(final String idAttribute) { this.idAttribute = idAttribute; } /** * Set the parent. * * @param parent * The parent. */ public void setParent(final DocumentRange parent) { this.parent = parent; } /** * Set the source web page. * * @param source * The source web page. */ public void setSource(final WebPage source) { this.source = source; } /** * {@inheritDoc} */ @Override public String toString() { return getTextOnly(); } }