OvidImporter.java example

Explorer
jabref-master
- src
package org.jabref.logic.importer.fileformat;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jabref.logic.importer.Importer;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.util.FileExtensions;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;

/**
 * Imports an Ovid file.
 */
public class OvidImporter extends Importer {

    private static final Pattern OVID_SOURCE_PATTERN = Pattern
            .compile("Source ([ \\w&\\-,:]+)\\.[ ]+([0-9]+)\\(([\\w\\-]+)\\):([0-9]+\\-?[0-9]+?)\\,.*([0-9][0-9][0-9][0-9])");

    private static final Pattern OVID_SOURCE_PATTERN_NO_ISSUE = Pattern
            .compile("Source ([ \\w&\\-,:]+)\\.[ ]+([0-9]+):([0-9]+\\-?[0-9]+?)\\,.*([0-9][0-9][0-9][0-9])");

    private static final Pattern OVID_SOURCE_PATTERN_2 = Pattern.compile(
            "([ \\w&\\-,]+)\\. Vol ([0-9]+)\\(([\\w\\-]+)\\) ([A-Za-z]+) ([0-9][0-9][0-9][0-9]), ([0-9]+\\-?[0-9]+)");

    private static final Pattern INCOLLECTION_PATTERN = Pattern.compile(
            "(.+)\\(([0-9][0-9][0-9][0-9])\\)\\. ([ \\w&\\-,:]+)\\.[ ]+\\(pp. ([0-9]+\\-?[0-9]+?)\\).[A-Za-z0-9, ]+pp\\. "
                    + "([\\w, ]+): ([\\w, ]+)");
    private static final Pattern BOOK_PATTERN = Pattern.compile(
            "\\(([0-9][0-9][0-9][0-9])\\)\\. [A-Za-z, ]+([0-9]+) pp\\. ([\\w, ]+): ([\\w, ]+)");

    private static final String OVID_PATTERN_STRING = "<[0-9]+>";
    private static final Pattern OVID_PATTERN = Pattern.compile(OVID_PATTERN_STRING);

    private static final int MAX_ITEMS = 50;

    @Override
    public String getName() {
        return "Ovid";
    }

    @Override
    public FileExtensions getExtensions() {
        return FileExtensions.OVID;
    }

    @Override
    public String getDescription() {
        return "Imports an Ovid file.";
    }

    @Override
    public boolean isRecognizedFormat(BufferedReader reader) throws IOException {
        String str;
        int i = 0;
        while (((str = reader.readLine()) != null) && (i < MAX_ITEMS)) {

            if (OvidImporter.OVID_PATTERN.matcher(str).find()) {
                return true;
            }

            i++;
        }
        return false;
    }

    @Override
    public ParserResult importDatabase(BufferedReader reader) throws IOException {
        List<BibEntry> bibitems = new ArrayList<>();
        StringBuilder sb = new StringBuilder();
        String line;
        while ((line = reader.readLine()) != null) {
            if (!line.isEmpty() && (line.charAt(0) != ' ')) {
                sb.append("__NEWFIELD__");
            }
            sb.append(line);
            sb.append('\n');
        }

        String[] items = sb.toString().split(OVID_PATTERN_STRING);

        for (int i = 1; i < items.length; i++) {
            Map<String, String> h = new HashMap<>();
            String[] fields = items[i].split("__NEWFIELD__");
            for (String field : fields) {
                int linebreak = field.indexOf('\n');
                String fieldName = field.substring(0, linebreak).trim();
                String content = field.substring(linebreak).trim();

                // Check if this is the author field (due to a minor special treatment for this field):
                boolean isAuthor = (fieldName.indexOf("Author") == 0)
                        && !fieldName.contains("Author Keywords")
                        && !fieldName.contains("Author e-mail");

                // Remove unnecessary dots at the end of lines, unless this is the author field,
                // in which case a dot at the end could be significant:
                if (!isAuthor && content.endsWith(".")) {
                    content = content.substring(0, content.length() - 1);
                }
                if (isAuthor) {

                    h.put(FieldName.AUTHOR, content);

                } else if (fieldName.startsWith("Title")) {
                    content = content.replaceAll("\\[.+\\]", "").trim();
                    if (content.endsWith(".")) {
                        content = content.substring(0, content.length() - 1);
                    }
                    h.put(FieldName.TITLE, content);
                } else if (fieldName.startsWith("Chapter Title")) {
                    h.put("chaptertitle", content);
                } else if (fieldName.startsWith("Source")) {
                    Matcher matcher;
                    if ((matcher = OvidImporter.OVID_SOURCE_PATTERN.matcher(content)).find()) {
                        h.put(FieldName.JOURNAL, matcher.group(1));
                        h.put(FieldName.VOLUME, matcher.group(2));
                        h.put(FieldName.ISSUE, matcher.group(3));
                        h.put(FieldName.PAGES, matcher.group(4));
                        h.put(FieldName.YEAR, matcher.group(5));
                    } else if ((matcher = OvidImporter.OVID_SOURCE_PATTERN_NO_ISSUE.matcher(content)).find()) { // may be missing the issue
                        h.put(FieldName.JOURNAL, matcher.group(1));
                        h.put(FieldName.VOLUME, matcher.group(2));
                        h.put(FieldName.PAGES, matcher.group(3));
                        h.put(FieldName.YEAR, matcher.group(4));
                    } else if ((matcher = OvidImporter.OVID_SOURCE_PATTERN_2.matcher(content)).find()) {

                        h.put(FieldName.JOURNAL, matcher.group(1));
                        h.put(FieldName.VOLUME, matcher.group(2));
                        h.put(FieldName.ISSUE, matcher.group(3));
                        h.put(FieldName.MONTH, matcher.group(4));
                        h.put(FieldName.YEAR, matcher.group(5));
                        h.put(FieldName.PAGES, matcher.group(6));

                    } else if ((matcher = OvidImporter.INCOLLECTION_PATTERN.matcher(content)).find()) {
                        h.put(FieldName.EDITOR, matcher.group(1).replace(" (Ed)", ""));
                        h.put(FieldName.YEAR, matcher.group(2));
                        h.put(FieldName.BOOKTITLE, matcher.group(3));
                        h.put(FieldName.PAGES, matcher.group(4));
                        h.put(FieldName.ADDRESS, matcher.group(5));
                        h.put(FieldName.PUBLISHER, matcher.group(6));
                    } else if ((matcher = OvidImporter.BOOK_PATTERN.matcher(content)).find()) {
                        h.put(FieldName.YEAR, matcher.group(1));
                        h.put(FieldName.PAGES, matcher.group(2));
                        h.put(FieldName.ADDRESS, matcher.group(3));
                        h.put(FieldName.PUBLISHER, matcher.group(4));

                    }
                    // Add double hyphens to page ranges:
                    if (h.get(FieldName.PAGES) != null) {
                        h.put(FieldName.PAGES, h.get(FieldName.PAGES).replace("-", "--"));
                    }

                } else if ("Abstract".equals(fieldName)) {
                    h.put(FieldName.ABSTRACT, content);

                } else if ("Publication Type".equals(fieldName)) {
                    if (content.contains("Book")) {
                        h.put(BibEntry.TYPE_HEADER, "book");
                    } else if (content.contains("Journal")) {
                        h.put(BibEntry.TYPE_HEADER, "article");
                    } else if (content.contains("Conference Paper")) {
                        h.put(BibEntry.TYPE_HEADER, "inproceedings");
                    }
                } else if (fieldName.startsWith("Language")) {
                    h.put(FieldName.LANGUAGE, content);
                } else if (fieldName.startsWith("Author Keywords")) {
                    content = content.replace(";", ",").replace("  ", " ");
                    h.put(FieldName.KEYWORDS, content);
                } else if (fieldName.startsWith("ISSN")) {
                    h.put(FieldName.ISSN, content);
                } else if (fieldName.startsWith("DOI Number")) {
                    h.put(FieldName.DOI, content);
                }
            }

            // Now we need to check if a book entry has given editors in the author field;
            // if so, rearrange:
            String auth = h.get(FieldName.AUTHOR);
            if ((auth != null) && auth.contains(" [Ed]")) {
                h.remove(FieldName.AUTHOR);
                h.put(FieldName.EDITOR, auth.replace(" [Ed]", ""));
            }

            // Rearrange names properly:
            auth = h.get(FieldName.AUTHOR);
            if (auth != null) {
                h.put(FieldName.AUTHOR, fixNames(auth));
            }
            auth = h.get(FieldName.EDITOR);
            if (auth != null) {
                h.put(FieldName.EDITOR, fixNames(auth));
            }

            // Set the entrytype properly:
            String entryType = h.containsKey(BibEntry.TYPE_HEADER) ? h.get(BibEntry.TYPE_HEADER) : BibEntry.DEFAULT_TYPE;
            h.remove(BibEntry.TYPE_HEADER);
            if ("book".equals(entryType) && h.containsKey("chaptertitle")) {
                // This means we have an "incollection" entry.
                entryType = "incollection";
                // Move the "chaptertitle" to just "title":
                h.put(FieldName.TITLE, h.remove("chaptertitle"));
            }
            BibEntry b = new BibEntry(entryType);
            b.setField(h);

            bibitems.add(b);

        }

        return new ParserResult(bibitems);
    }

    /**
     * Convert a string of author names into a BibTeX-compatible format.
     * @param content The name string.
     * @return The formatted names.
     */
    private static String fixNames(String content) {
        String names;
        if (content.indexOf(';') > 0) { //LN FN; [LN FN;]*
            names = content.replaceAll("[^\\.A-Za-z,;\\- ]", "").replace(";", " and");
        } else if (content.indexOf("  ") > 0) {
            String[] sNames = content.split("  ");
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < sNames.length; i++) {
                if (i > 0) {
                    sb.append(" and ");
                }
                sb.append(sNames[i].replaceFirst(" ", ", "));
            }
            names = sb.toString();
        } else {
            names = content;
        }
        return AuthorList.fixAuthorLastNameFirst(names);
    }

}