/* * Encog(tm) Core v3.4 - Java Version * http://www.heatonresearch.com/encog/ * https://github.com/encog/encog-java-core * Copyright 2008-2016 Heaton Research, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For more information on Heaton Research copyrights, licenses * and trademarks visit: * http://www.heatonresearch.com/copyright */ package org.encog.bot.browse.range; import java.util.ArrayList; import java.util.List; import org.encog.bot.browse.WebPage; import org.encog.bot.dataunit.DataUnit; import org.encog.bot.dataunit.TextDataUnit; /** * Base class that represents a document range. A document range is a collection * of tags that all apply to one "concept". For example, a Form, or a Link. This * allows the form to collect the elements inside the form, or a link to collect * the text along with the link tag. * * @author jheaton * */ public class DocumentRange { /** * The beginning index for this range. */ private int begin; /** * The ending index for this range. */ private int end; /** * The source page for this range. */ private WebPage source; /** * The id attribute, on the source tag. Useful for DIV tags. */ private String idAttribute; /** * The class attribute. on the source tag. */ private String classAttribute; /** * Sub elements of this range. */ private final List<DocumentRange> elements = new ArrayList<DocumentRange>(); /** * The parent to this range, or null if top. */ private DocumentRange parent; /** * Construct a document range from the specified WebPage. * * @param theSource * The web page that this range belongs to. */ public DocumentRange(final WebPage theSource) { this.source = theSource; } /** * Add an element. * * @param element * The element to add. */ public final void addElement(final DocumentRange element) { this.elements.add(element); element.setParent(this); } /** * @return The beginning index. */ public final int getBegin() { return this.begin; } /** * @return the classAttribute */ public final String getClassAttribute() { return this.classAttribute; } /** * @return The elements of this document range. */ public final List<DocumentRange> getElements() { return this.elements; } /** * @return The ending index. */ public final int getEnd() { return this.end; } /** * @return the idAttribute */ public final String getIdAttribute() { return this.idAttribute; } /** * @return The web page that owns this class. */ public final DocumentRange getParent() { return this.parent; } /** * @return The web page that this range is owned by. */ public final WebPage getSource() { return this.source; } /** * Get the text from this range. * * @return The text from this range. */ public final String getTextOnly() { final StringBuilder result = new StringBuilder(); for (int i = getBegin(); i < getEnd(); i++) { final DataUnit du = this.source.getData().get(i); if (du instanceof TextDataUnit) { result.append(du.toString()); result.append("\n"); } } return result.toString(); } /** * Set the beginning index. * * @param theBegin * The beginning index. */ public final void setBegin(final int theBegin) { this.begin = theBegin; } /** * @param theClassAttribute * the classAttribute to set */ public final void setClassAttribute(final String theClassAttribute) { this.classAttribute = theClassAttribute; } /** * Set the ending index. * * @param theEnd * The ending index. */ public final void setEnd(final int theEnd) { this.end = theEnd; } /** * @param id * the idAttribute to set */ public final void setIdAttribute(final String id) { this.idAttribute = id; } /** * Set the parent. * * @param theParent * The parent. */ public final void setParent(final DocumentRange theParent) { this.parent = theParent; } /** * Set the source web page. * * @param theSource * The source web page. */ public final void setSource(final WebPage theSource) { this.source = theSource; } }