/*
* Copyright (c) 2004-2011 Marco Maccaferri and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Marco Maccaferri - initial API and implementation
*/
package org.eclipsetrader.yahoo.internal.news;
import java.net.URL;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.StringTokenizer;
import org.eclipse.core.runtime.IProgressMonitor;
import org.htmlparser.Parser;
import org.htmlparser.filters.OrFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.SimpleNodeIterator;
public class ItalianNewsHandler implements INewsHandler {
public ItalianNewsHandler() {
}
/* (non-Javadoc)
* @see org.eclipsetrader.yahoo.internal.news.INewsHandler#parseNewsPages(java.net.URL[], org.eclipse.core.runtime.IProgressMonitor)
*/
@Override
public HeadLine[] parseNewsPages(URL[] url, IProgressMonitor monitor) {
List<HeadLine> headlinesList = new ArrayList<HeadLine>();
for (int i = 0; i < url.length && !monitor.isCanceled(); i++) {
monitor.subTask(url[i].toString());
try {
Parser parser = new Parser(url[i].toString());
NodeList list = parser.extractAllNodesThatMatch(new OrFilter(new TagNameFilter("dt"), new TagNameFilter("li"))); //$NON-NLS-1$ //$NON-NLS-2$
for (SimpleNodeIterator iter = list.elements(); iter.hasMoreNodes();) {
TagNode root = (TagNode) iter.nextNode();
list = root.getChildren();
if (root.getTagName().equalsIgnoreCase("dt") && list.size() == 12) {
LinkTag link = (LinkTag) list.elementAt(3);
String source = list.elementAt(9).getText();
source = source.replaceAll("[\r\n]", " "); //$NON-NLS-1$ //$NON-NLS-2$
source = source.replaceAll("[()]", ""); //$NON-NLS-1$ //$NON-NLS-2$
source = source.replaceAll("[ ]{2,}", " ").trim(); //$NON-NLS-1$ //$NON-NLS-2$
Date publishedDate = parseDateString(root.getNextSibling().getNextSibling().getNextSibling().getChildren().elementAt(1).getText());
headlinesList.add(new HeadLine(publishedDate, source, decode(link.getLinkText().trim()), null, link.getLink()));
}
else if (root.getTagName().equalsIgnoreCase("li") && list.size() == 14) {
LinkTag link = (LinkTag) list.elementAt(1);
String source = list.elementAt(6).getText();
source = source.replaceAll("[\r\n]", " "); //$NON-NLS-1$ //$NON-NLS-2$
source = source.replaceAll("[()]", ""); //$NON-NLS-1$ //$NON-NLS-2$
source = source.replaceAll("[ ]{2,}", " ").trim(); //$NON-NLS-1$ //$NON-NLS-2$
Date publishedDate = parseDateString(list.elementAt(10).getText());
headlinesList.add(new HeadLine(publishedDate, source, decode(link.getLinkText().trim()), null, link.getLink()));
}
}
} catch (Exception e) {
e.printStackTrace();
}
monitor.worked(1);
}
return headlinesList.toArray(new HeadLine[headlinesList.size()]);
}
private Date parseDateString(String date) {
Calendar gc = Calendar.getInstance(Locale.ITALY);
StringTokenizer st = new StringTokenizer(date, " ,:"); //$NON-NLS-1$
st.nextToken();
Integer vint = new Integer(st.nextToken());
gc.set(Calendar.DAY_OF_MONTH, vint.intValue());
gc.set(Calendar.MONTH, getMonth(st.nextToken()) - 1);
vint = new Integer(st.nextToken());
gc.set(Calendar.YEAR, vint.intValue());
vint = new Integer(st.nextToken());
gc.set(Calendar.HOUR_OF_DAY, vint.intValue());
vint = new Integer(st.nextToken());
gc.set(Calendar.MINUTE, vint.intValue());
gc.set(Calendar.SECOND, 0);
gc.set(Calendar.MILLISECOND, 0);
return gc.getTime();
}
private int getMonth(String t) {
if (t.equalsIgnoreCase("Gennaio") == true) {
return 1;
}
if (t.equalsIgnoreCase("Febbraio") == true) {
return 2;
}
if (t.equalsIgnoreCase("Marzo") == true) {
return 3;
}
if (t.equalsIgnoreCase("Aprile") == true) {
return 4;
}
if (t.equalsIgnoreCase("Maggio") == true) {
return 5;
}
if (t.equalsIgnoreCase("Giugno") == true) {
return 6;
}
if (t.equalsIgnoreCase("Luglio") == true) {
return 7;
}
if (t.equalsIgnoreCase("Agosto") == true) {
return 8;
}
if (t.equalsIgnoreCase("Settembre") == true) {
return 9;
}
if (t.equalsIgnoreCase("Ottobre") == true) {
return 10;
}
if (t.equalsIgnoreCase("Novembre") == true) {
return 11;
}
if (t.equalsIgnoreCase("Dicembre") == true) {
return 12;
}
return 0;
}
private String decode(String s) {
if (s.indexOf("") == -1) {
return s;
}
int i = 0;
StringBuffer sb = new StringBuffer();
byte[] bytes = new byte[0];
try {
bytes = s.getBytes();
} catch (Exception e) {
e.printStackTrace();
bytes = s.getBytes();
}
while (i < bytes.length) {
byte c = bytes[i++];
if (c == '&' && i < bytes.length) {
c = bytes[i++];
if (c == '#') {
int data = 0;
while (i < bytes.length) {
c = bytes[i++];
if (c < '0' || c > '9') {
break;
}
data = data * 10 + c - '0';
}
if (data >= ' ') {
try {
sb.append(new String(new byte[] {
(byte) data
}));
} catch (Exception e) {
e.printStackTrace();
}
}
}
else {
try {
sb.append('&');
sb.append(new String(new byte[] {
c
}));
} catch (Exception e) {
e.printStackTrace();
}
}
}
else if (c >= ' ') {
try {
sb.append(new String(new byte[] {
c
}));
} catch (Exception e) {
e.printStackTrace();
}
}
}
return sb.toString();
}
}