/*
* Encog(tm) Core v2.5 - Java Version
* http://www.heatonresearch.com/encog/
* http://code.google.com/p/encog-java/
* Copyright 2008-2010 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.bot;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
/*
* Encog Artificial Intelligence Framework v2.x Java Version
* http://www.heatonresearch.com/encog/ http://code.google.com/p/encog-java/
*
* Copyright 2008-2009, Heaton Research Inc., and individual contributors. See
* the copyright.txt in the distribution for a full listing of individual
* contributors.
*
* This is free software; you can redistribute it and/or modify it under the
* terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this software; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA, or see the FSF
* site: http://www.fsf.org.
*/
import org.encog.parse.tags.read.ReadHTML;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utility class for bots.
*
* @author jheaton
*
*/
public final class BotUtil {
/**
* How much data to read at once.
*/
public static final int BUFFER_SIZE = 8192;
/**
* The logging object.
*/
private static final Logger LOGGER = LoggerFactory.getLogger(BotUtil.class);
/**
* This method is very useful for grabbing information from a HTML page.
*
* @param str
* The string to search.
* @param token1
* The text, or tag, that comes before the desired text
* @param token2
* The text, or tag, that comes after the desired text
* @param index
* Which occurrence of token1 to use, 1 for the first
* @return The contents of the URL that was downloaded.
*/
public static String extract(final String str, final String token1,
final String token2, final int index) {
int location1, location2;
// convert everything to lower case
final String searchStr = str.toLowerCase();
final String token1Lower = token1.toLowerCase();
final String token2Lower = token2.toLowerCase();
int count = index;
// now search
location1 = -1;
location2 = -1;
do {
location1 = searchStr.indexOf(token1Lower, location1 + 1);
if (location1 == -1) {
return null;
}
count--;
} while (count > 0);
// return the result from the original string that has mixed
// case
location2 = searchStr.indexOf(token2Lower, location1 + 1);
if (location2 == -1) {
return null;
}
return str.substring(location1 + token1Lower.length(), location2);
}
/**
* This method is very useful for grabbing information from a HTML page.
*
* @param str
* The string to search.
* @param token1
* The text, or tag, that comes before the desired text
* @param token2
* The text, or tag, that comes after the desired text
* @param index
* Index in the string to start searching from.
* @param occurence
* What occurrence.
* @return The contents of the URL that was downloaded.
*/
public static String extractFromIndex(final String str,
final String token1, final String token2, final int index,
final int occurence) {
int location1, location2;
// convert everything to lower case
final String searchStr = str.toLowerCase();
final String token1Lower = token1.toLowerCase();
final String token2Lower = token2.toLowerCase();
int count = occurence;
// now search
location1 = index - 1;
location2 = location1;
do {
location1 = searchStr.indexOf(token1Lower, location1 + 1);
if (location1 == -1) {
return null;
}
count--;
} while (count > 0);
// return the result from the original string that has mixed
// case
location2 = searchStr.indexOf(token2Lower, location1 + 1);
if (location2 == -1) {
return null;
}
return str.substring(location1 + token1Lower.length(), location2);
}
/**
* Find the specified occurrence of one string in another string.
*
* @param search
* The string to search.
* @param searchFor
* What we are searching for.
* @param index
* The occurrence to find.
* @return The index of the specified string, or -1 if not found.
*/
public static int findOccurance(final String search,
final String searchFor, final int index) {
int count = index;
final String lowerSearch = search.toLowerCase();
int result = -1;
do {
result = lowerSearch.indexOf(searchFor, result + 1);
} while (count-- > 0);
return result;
}
/**
* Load load from the specified input stream.
*
* @param is
* The input stream to load from.
* @return The data loaded from the specified input stream.
*/
public static String loadPage(final InputStream is) {
try {
final StringBuilder result = new StringBuilder();
final byte[] buffer = new byte[BotUtil.BUFFER_SIZE];
int length;
do {
length = is.read(buffer);
if (length >= 0) {
result.append(new String(buffer, 0, length));
}
} while (length >= 0);
return result.toString();
} catch (final IOException e) {
if (BotUtil.LOGGER.isErrorEnabled()) {
BotUtil.LOGGER.error("Exception", e);
}
throw new BotError(e);
}
}
/**
* Load the specified web page into a string.
*
* @param url
* The url to load.
* @return The web page as a string.
*/
public static String loadPage(final URL url) {
try {
final StringBuilder result = new StringBuilder();
final byte[] buffer = new byte[BotUtil.BUFFER_SIZE];
int length;
final InputStream is = url.openStream();
do {
length = is.read(buffer);
if (length >= 0) {
result.append(new String(buffer, 0, length));
}
} while (length >= 0);
return result.toString();
} catch (final IOException e) {
if (BotUtil.LOGGER.isErrorEnabled()) {
BotUtil.LOGGER.error("Exception", e);
}
throw new BotError(e);
}
}
/**
* Strip any HTML or XML tags from the specified string.
*
* @param str
* The string to process.
* @return The string without tags.
*/
public static String stripTags(final String str) {
final ByteArrayInputStream is =
new ByteArrayInputStream(str.getBytes());
final StringBuilder result = new StringBuilder();
final ReadHTML html = new ReadHTML(is);
int ch;
while ((ch = html.read()) != -1) {
if (ch != 0) {
result.append((char) ch);
}
}
return result.toString();
}
/**
* Private constructor.
*/
private BotUtil() {
}
}