package name.abuchen.portfolio.online.impl;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.MessageFormat;
import java.text.ParseException;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Scanner;
import java.util.Set;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import name.abuchen.portfolio.Messages;
import name.abuchen.portfolio.model.Exchange;
import name.abuchen.portfolio.model.LatestSecurityPrice;
import name.abuchen.portfolio.model.Security;
import name.abuchen.portfolio.model.SecurityPrice;
import name.abuchen.portfolio.money.Values;
import name.abuchen.portfolio.online.QuoteFeed;
import name.abuchen.portfolio.util.Strings;
public class HTMLTableQuoteFeed implements QuoteFeed
{
private abstract static class Column
{
static final ThreadLocal<DecimalFormat> DECIMAL_FORMAT_GERMAN = new ThreadLocal<DecimalFormat>()
{
@Override
protected DecimalFormat initialValue()
{
return new DecimalFormat("#,##0.###", new DecimalFormatSymbols(Locale.GERMAN)); //$NON-NLS-1$
}
};
static final ThreadLocal<DecimalFormat> DECIMAL_FORMAT_ENGLISH = new ThreadLocal<DecimalFormat>()
{
@Override
protected DecimalFormat initialValue()
{
return new DecimalFormat("#,##0.###", new DecimalFormatSymbols(Locale.ENGLISH)); //$NON-NLS-1$
}
};
static final ThreadLocal<DecimalFormat> DECIMAL_FORMAT_APOSTROPHE = new ThreadLocal<DecimalFormat>()
{
@Override
protected DecimalFormat initialValue()
{
DecimalFormatSymbols unusualSymbols = new DecimalFormatSymbols(Locale.US);
unusualSymbols.setGroupingSeparator('\'');
return new DecimalFormat("#,##0.##", unusualSymbols); //$NON-NLS-1$
}
};
private final Pattern[] patterns;
protected Column(String[] strings)
{
this.patterns = new Pattern[strings.length];
for (int ii = 0; ii < strings.length; ii++)
this.patterns[ii] = Pattern.compile(strings[ii]);
}
protected boolean matches(Element header)
{
String text = header.text();
for (Pattern pattern : patterns)
{
if (pattern.matcher(text).matches())
return true;
}
return false;
}
abstract void setValue(Element value, LatestSecurityPrice price, String languageHint) throws ParseException;
protected long asQuote(Element value, String languageHint) throws ParseException
{
String text = value.text().trim();
DecimalFormat format = null;
if ("de".equals(languageHint)) //$NON-NLS-1$
format = DECIMAL_FORMAT_GERMAN.get();
else if ("en".equals(languageHint)) //$NON-NLS-1$
format = DECIMAL_FORMAT_ENGLISH.get();
if (format == null)
{
// check first for apostrophe
int apostrophe = text.indexOf('\'');
if (apostrophe >= 0)
format = DECIMAL_FORMAT_APOSTROPHE.get();
}
if (format == null)
{
// determine format based on the relative location of the last
// comma and dot, e.g. the last comma indicates a German number
// format
int lastDot = text.lastIndexOf('.');
int lastComma = text.lastIndexOf(',');
format = Math.max(lastDot, lastComma) == lastComma ? DECIMAL_FORMAT_GERMAN.get()
: DECIMAL_FORMAT_ENGLISH.get();
}
double quote = format.parse(text).doubleValue();
return Math.round(quote * Values.Quote.factor());
}
}
private static class DateColumn extends Column
{
private DateTimeFormatter[] formatters;
@SuppressWarnings("nls")
public DateColumn()
{
super(new String[] { "Datum", "Date" });
formatters = new DateTimeFormatter[] { DateTimeFormatter.ofPattern("y-M-d"),
DateTimeFormatter.ofPattern("d.M.yy"), //$NON-NLS-1$
DateTimeFormatter.ofPattern("d.M.y"), //$NON-NLS-1$
DateTimeFormatter.ofPattern("d. MMM y"), //$NON-NLS-1$
DateTimeFormatter.ofPattern("d. MMMM y"), //$NON-NLS-1$
DateTimeFormatter.ofPattern("d. MMM. y"), //$NON-NLS-1$
DateTimeFormatter.ofPattern("MMM d, y", Locale.ENGLISH), //$NON-NLS-1$
DateTimeFormatter.ofPattern("MMM dd, y", Locale.ENGLISH) //$NON-NLS-1$
};
}
@Override
void setValue(Element value, LatestSecurityPrice price, String languageHint) throws ParseException
{
String text = Strings.strip(value.text());
for (int ii = 0; ii < formatters.length; ii++)
{
try
{
LocalDate date = LocalDate.parse(text, formatters[ii]);
price.setTime(date);
return;
}
catch (DateTimeParseException e) // NOSONAR
{
// continue with next pattern
}
}
throw new ParseException(text, 0);
}
}
private static class CloseColumn extends Column
{
@SuppressWarnings("nls")
public CloseColumn()
{
super(new String[] { "Schluss.*", "Schluß.*", "Rücknahmepreis.*", "Close.*", "Zuletzt", "Price",
"akt. Kurs" });
}
@Override
void setValue(Element value, LatestSecurityPrice price, String languageHint) throws ParseException
{
price.setValue(asQuote(value, languageHint));
}
}
private static class HighColumn extends Column
{
@SuppressWarnings("nls")
public HighColumn()
{
super(new String[] { "Hoch.*", "Tageshoch.*", "Max.*", "High.*" });
}
@Override
void setValue(Element value, LatestSecurityPrice price, String languageHint) throws ParseException
{
if ("-".equals(value.text().trim())) //$NON-NLS-1$
price.setHigh(LatestSecurityPrice.NOT_AVAILABLE);
else
price.setHigh(asQuote(value, languageHint));
}
}
private static class LowColumn extends Column
{
@SuppressWarnings("nls")
public LowColumn()
{
super(new String[] { "Tief.*", "Tagestief.*", "Low.*" });
}
@Override
void setValue(Element value, LatestSecurityPrice price, String languageHint) throws ParseException
{
if ("-".equals(value.text().trim())) //$NON-NLS-1$
price.setLow(LatestSecurityPrice.NOT_AVAILABLE);
else
price.setLow(asQuote(value, languageHint));
}
}
private static class Spec
{
private final Column column;
private final int index;
public Spec(Column column, int index)
{
this.column = column;
this.index = index;
}
}
public static final String ID = "GENERIC_HTML_TABLE"; //$NON-NLS-1$
private static final Column[] COLUMNS = new Column[] { new DateColumn(), new CloseColumn(), new HighColumn(),
new LowColumn() };
private final PageCache cache = new PageCache();
@Override
public String getId()
{
return ID;
}
@Override
public String getName()
{
return Messages.LabelHTMLTable;
}
@Override
public boolean updateLatestQuotes(List<Security> securities, List<Exception> errors)
{
boolean isUpdated = false;
for (Security security : securities)
{
// if latestFeed is null, then the policy is 'use same configuration
// as historic quotes'
String feedURL = security.getLatestFeed() == null ? security.getFeedURL() : security.getLatestFeedURL();
List<LatestSecurityPrice> quotes = internalGetQuotes(security, feedURL, errors);
int size = quotes.size();
if (size > 0)
{
Collections.sort(quotes);
LatestSecurityPrice latest = quotes.get(size - 1);
LatestSecurityPrice previous = size > 1 ? quotes.get(size - 2) : null;
latest.setPreviousClose(previous != null ? previous.getValue() : latest.getValue());
latest.setVolume(LatestSecurityPrice.NOT_AVAILABLE);
boolean isAdded = security.setLatest(latest);
isUpdated = isUpdated || isAdded;
}
}
return isUpdated;
}
@Override
public boolean updateHistoricalQuotes(Security security, List<Exception> errors)
{
List<LatestSecurityPrice> quotes = internalGetQuotes(security, security.getFeedURL(), errors);
boolean isUpdated = false;
for (LatestSecurityPrice quote : quotes)
{
boolean isAdded = security.addPrice(new SecurityPrice(quote.getTime(), quote.getValue()));
isUpdated = isUpdated || isAdded;
}
return isUpdated;
}
@Override
public List<LatestSecurityPrice> getHistoricalQuotes(Security security, LocalDate start, List<Exception> errors)
{
return internalGetQuotes(security, security.getFeedURL(), errors);
}
private List<LatestSecurityPrice> internalGetQuotes(Security security, String feedURL, List<Exception> errors)
{
if (feedURL == null || feedURL.length() == 0)
{
errors.add(new IOException(MessageFormat.format(Messages.MsgMissingFeedURL, security.getName())));
return Collections.emptyList();
}
List<LatestSecurityPrice> answer = cache.lookup(feedURL);
if (answer != null)
return answer;
answer = parseFromURL(feedURL, errors);
if (!answer.isEmpty())
cache.put(feedURL, answer);
return answer;
}
@Override
public List<LatestSecurityPrice> getHistoricalQuotes(String response, List<Exception> errors)
{
return parseFromHTML(response, errors);
}
@Override
public List<Exchange> getExchanges(Security subject, List<Exception> errors)
{
return Collections.emptyList();
}
protected List<LatestSecurityPrice> parseFromURL(String url, List<Exception> errors)
{
try
{
String escapedUrl = new URI(url).toASCIIString();
return parse(Jsoup.connect(escapedUrl).userAgent(OnlineHelper.getUserAgent()).timeout(30000).get(), errors);
}
catch (URISyntaxException | IOException e)
{
errors.add(e);
return Collections.emptyList();
}
}
protected List<LatestSecurityPrice> parseFromHTML(String html, List<Exception> errors)
{
return parse(Jsoup.parse(html), errors);
}
private List<LatestSecurityPrice> parse(Document document, List<Exception> errors)
{
// check if language is provided
String language = document.select("html").attr("lang"); //$NON-NLS-1$ //$NON-NLS-2$
List<LatestSecurityPrice> prices = new ArrayList<>();
// first: find tables
Elements tables = document.getElementsByTag("table"); //$NON-NLS-1$
for (Element table : tables)
{
List<Spec> specs = new ArrayList<>();
int rowIndex = buildSpecFromTable(table, specs);
if (isSpecValid(specs))
{
Elements rows = table.select("> tbody > tr"); //$NON-NLS-1$
int size = rows.size();
for (; rowIndex < size; rowIndex++)
{
Element row = rows.get(rowIndex);
try
{
LatestSecurityPrice price = extractPrice(row, specs, language);
if (price != null)
prices.add(price);
}
catch (Exception e)
{
errors.add(e);
}
}
// skip all other tables
break;
}
}
// if no quotes could be extract, log HTML for further analysis
if (prices.isEmpty())
errors.add(new IOException(MessageFormat.format(Messages.MsgNoQuotesFoundInHTML, document.html())));
return prices;
}
@SuppressWarnings("nls")
private int buildSpecFromTable(Element table, List<Spec> specs)
{
// check if thead exists
Elements header = table.select("> thead > tr > th");
if (!header.isEmpty())
{
buildSpecFromRow(header, specs);
return 0;
}
header = table.select("> thead > tr > td");
if (!header.isEmpty())
{
buildSpecFromRow(header, specs);
return 0;
}
// check if th exist in body
header = table.select("> tbody > tr > th");
if (!header.isEmpty())
{
buildSpecFromRow(header, specs);
return 0;
}
// then check first two regular rows
int rowIndex = 0;
Elements rows = table.select("> tbody > tr");
if (!rows.isEmpty())
{
Element firstRow = rows.get(0);
buildSpecFromRow(firstRow.select("> td"), specs);
rowIndex++;
}
if (specs.isEmpty() && rows.size() > 1)
{
Element secondRow = rows.get(1);
buildSpecFromRow(secondRow.select("> td"), specs);
rowIndex++;
}
return rowIndex;
}
private void buildSpecFromRow(Elements row, List<Spec> specs)
{
Set<Column> available = new HashSet<>();
for (Column column : COLUMNS)
available.add(column);
for (int ii = 0; ii < row.size(); ii++)
{
Element element = row.get(ii);
for (Column column : available)
{
if (column.matches(element))
{
specs.add(new Spec(column, ii));
available.remove(column);
break;
}
}
}
}
private boolean isSpecValid(List<Spec> specs)
{
if (specs == null || specs.isEmpty())
return false;
boolean hasDate = false;
boolean hasClose = false;
for (Spec spec : specs)
{
hasDate = hasDate || spec.column instanceof DateColumn;
hasClose = hasClose || spec.column instanceof CloseColumn;
}
return hasDate && hasClose;
}
private LatestSecurityPrice extractPrice(Element row, List<Spec> specs, String languageHint) throws ParseException
{
Elements cells = row.select("> td"); //$NON-NLS-1$
// row can be empty if it contains only 'th' elements
if (cells.size() <= 1)
return null;
LatestSecurityPrice price = new LatestSecurityPrice();
for (Spec spec : specs)
spec.column.setValue(cells.get(spec.index), price, languageHint);
return price;
}
/**
* Test method to parse HTML tables
*
* @param args
* list of URLs and/or local files
*/
public static void main(String[] args) throws IOException
{
PrintWriter writer = new PrintWriter(System.out); // NOSONAR
for (String arg : args)
if (arg.charAt(0) != '#')
doLoad(arg, writer);
writer.flush();
}
@SuppressWarnings("nls")
private static void doLoad(String source, PrintWriter writer) throws IOException
{
writer.println("--------");
writer.println(source);
writer.println("--------");
List<LatestSecurityPrice> prices;
List<Exception> errors = new ArrayList<>();
if (source.startsWith("http"))
{
prices = new HTMLTableQuoteFeed().parseFromURL(source, errors);
}
else
{
try (Scanner scanner = new Scanner(new File(source), StandardCharsets.UTF_8.name()))
{
String html = scanner.useDelimiter("\\A").next();
prices = new HTMLTableQuoteFeed().parseFromHTML(html, errors);
}
}
for (Exception error : errors)
error.printStackTrace(writer); // NOSONAR
for (LatestSecurityPrice p : prices)
{
writer.print(Values.Date.format(p.getTime()));
writer.print("\t");
writer.print(Values.Quote.format(p.getValue()));
writer.print("\t");
writer.print(Values.Quote.format(p.getLow()));
writer.print("\t");
writer.println(Values.Quote.format(p.getHigh()));
}
}
}