package name.abuchen.portfolio.datatransfer.pdf; import java.io.File; import java.io.IOException; import java.math.BigDecimal; import java.text.MessageFormat; import java.text.NumberFormat; import java.text.ParseException; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.Map; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.util.PDFTextStripper; import name.abuchen.portfolio.Messages; import name.abuchen.portfolio.datatransfer.Extractor; import name.abuchen.portfolio.datatransfer.SecurityCache; import name.abuchen.portfolio.datatransfer.pdf.PDFParser.DocumentType; import name.abuchen.portfolio.model.Client; import name.abuchen.portfolio.model.Security; import name.abuchen.portfolio.money.CurrencyUnit; import name.abuchen.portfolio.money.Values; /* package */abstract class AbstractPDFExtractor implements Extractor { private static final DateTimeFormatter DATE_FORMAT = DateTimeFormatter.ofPattern("dd.MM.yyyy", Locale.GERMANY); //$NON-NLS-1$ private final NumberFormat numberFormat = NumberFormat.getInstance(Locale.GERMANY); private final Client client; private SecurityCache securityCache; private final PDFTextStripper textStripper; private final List<String> bankIdentifier = new ArrayList<>(); private final List<DocumentType> documentTypes = new ArrayList<>(); public AbstractPDFExtractor(Client client) throws IOException { this.client = client; textStripper = new PDFTextStripper(); textStripper.setSortByPosition(true); } protected final void addDocumentTyp(DocumentType type) { this.documentTypes.add(type); } protected final void addBankIdentifier(String identifier) { this.bankIdentifier.add(identifier); } @Override public String getFilterExtension() { return "*.pdf"; //$NON-NLS-1$ } @Override public List<Item> extract(List<File> files, List<Exception> errors) { // careful: security cache makes extractor stateful securityCache = new SecurityCache(client); List<Item> results = new ArrayList<>(); for (File f : files) { try { String text = strip(f); results.addAll(extract(f.getName(), text, errors)); } catch (IOException e) { errors.add(new IOException(f.getName() + ": " + e.getMessage(), e)); //$NON-NLS-1$ } } results.addAll(securityCache.createMissingSecurityItems(results)); securityCache = null; return results; } /* testing */ protected String strip(File file) throws IOException { try (PDDocument doc = PDDocument.load(file)) { return textStripper.getText(doc); } } private List<Item> extract(String filename, String text, List<Exception> errors) { try { checkBankIdentifier(filename, text); List<Item> items = new ArrayList<>(); for (DocumentType type : documentTypes) { if (type.matches(text)) type.parse(filename, items, text); } if (items.isEmpty()) { errors.add(new UnsupportedOperationException( MessageFormat.format(Messages.PDFdbMsgCannotDetermineFileType, filename))); } for (Item item : items) item.getSubject().setNote(filename); return items; } catch (IllegalArgumentException e) { errors.add(new IllegalArgumentException(e.getMessage() + " @ " + filename, e)); //$NON-NLS-1$ return Collections.emptyList(); } catch (UnsupportedOperationException e) { errors.add(e); return Collections.emptyList(); } } private void checkBankIdentifier(String filename, String text) { if (bankIdentifier.isEmpty()) bankIdentifier.add(getLabel()); for (String identifier : bankIdentifier) if (text.contains(identifier)) return; throw new UnsupportedOperationException( // MessageFormat.format(Messages.PDFMsgFileNotSupported, filename, getLabel())); } protected Security getOrCreateSecurity(Map<String, String> values) { String isin = values.get("isin"); //$NON-NLS-1$ if (isin != null) isin = isin.trim(); String tickerSymbol = values.get("tickerSymbol"); //$NON-NLS-1$ if (tickerSymbol != null) tickerSymbol = tickerSymbol.trim(); String wkn = values.get("wkn"); //$NON-NLS-1$ if (wkn != null) wkn = wkn.trim(); String name = values.get("name"); //$NON-NLS-1$ if (name != null) name = name.trim(); Security security = securityCache.lookup(isin, tickerSymbol, wkn, name, () -> { Security s = new Security(); s.setCurrencyCode(asCurrencyCode(values.get("currency"))); //$NON-NLS-1$ return s; }); if (security == null) throw new IllegalArgumentException("Unable to construct security: " + values.toString()); //$NON-NLS-1$ return security; } protected long asShares(String value) { try { return Math.round(numberFormat.parse(value).doubleValue() * Values.Share.factor()); } catch (ParseException e) { throw new IllegalArgumentException(e); } } protected String asCurrencyCode(String currency) { // ensure that the security is always created with a valid currency code if (currency == null) return client.getBaseCurrency(); CurrencyUnit unit = CurrencyUnit.getInstance(currency.trim()); return unit == null ? client.getBaseCurrency() : unit.getCurrencyCode(); } /* protected */long asAmount(String value) { try { return Math.abs(Math.round(numberFormat.parse(value).doubleValue() * Values.Amount.factor())); } catch (ParseException e) { throw new IllegalArgumentException(e); } } /* protected */BigDecimal asExchangeRate(String value) { try { return BigDecimal.valueOf(numberFormat.parse(value).doubleValue()); } catch (ParseException e) { throw new IllegalArgumentException(e); } } /* protected */LocalDate asDate(String value) { return LocalDate.parse(value, DATE_FORMAT); } }