/*
* Copyright 2012 Javier Pérez Pacheco and Francisco Díaz Rodriguez
* TweetTopics 2.0
* javielinux@gmail.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.javielinux.infos;
import com.javielinux.utils.GuessEncodingInputStream;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.XPatherException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLDecoder;
public class InfoWeb {
private String web = "";
private String title = "";
private String image = "";
private String description = "";
public InfoWeb(String web) {
this.web = web;
if ( (web.endsWith(".pdf") || (web.endsWith(".jpg")) || (web.endsWith(".png")) || (web.endsWith(".gif")) ) ) {
} else {
try {
HtmlCleaner cleaner = new HtmlCleaner();
CleanerProperties props = cleaner.getProperties();
props.setAllowHtmlInsideAttributes(true);
props.setAllowMultiWordAttributes(true);
props.setRecognizeUnicodeChars(true);
props.setOmitComments(true);
URL url = new URL(web);
InputStream istream = url.openStream();
// ver el encoding
InputStream istreamEncoding = new GuessEncodingInputStream(istream);
String encoding = ((GuessEncodingInputStream)istreamEncoding).guess();
URLConnection conn;
conn = url.openConnection();
InputStreamReader isr;
if (encoding!=null) {
isr = new InputStreamReader(conn.getInputStream(), encoding);
} else {
isr = new InputStreamReader(conn.getInputStream());
}
TagNode node = cleaner.clean(isr);
Object[] objTitle = node.evaluateXPath("//title");
if (objTitle.length > 0) {
TagNode info_node = (TagNode) objTitle[0];
title = URLDecoder.decode(info_node.getChildren().iterator().next().toString().trim());
}
Object[] desc = node.evaluateXPath("//meta[@name='description']");
if (desc.length > 0) {
TagNode info_node = (TagNode) desc[0];
description = URLDecoder.decode(info_node.getAttributeByName("content").toString().trim());
}
Object[] icon = node.evaluateXPath("//link[@rel='image_src']");
if (icon.length > 0) {
TagNode info_node = (TagNode) icon[0];
String w = info_node.getAttributeByName("href").toString().trim();
if (!w.startsWith("http")) {
if (!w.startsWith("www")) {
w = web.substring(0, web.indexOf("/", 9)+1) + w;
} else {
w = "http://" + w;
}
}
image = w;
}
} catch (OutOfMemoryError e) {
e.printStackTrace();
} catch (StackOverflowError e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (XPatherException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
}
public String getWeb() {
return web;
}
public String getImage() {
return image;
}
public String getDescription() {
return description;
}
public String getTitle() {
return title;
}
}