package org.jabref.logic.importer.fileformat; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.regex.Pattern; import java.util.stream.Collectors; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBElement; import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import org.jabref.logic.importer.Importer; import org.jabref.logic.importer.ParserResult; import org.jabref.logic.importer.fileformat.mods.AbstractDefinition; import org.jabref.logic.importer.fileformat.mods.DateDefinition; import org.jabref.logic.importer.fileformat.mods.DetailDefinition; import org.jabref.logic.importer.fileformat.mods.ExtentDefinition; import org.jabref.logic.importer.fileformat.mods.GenreDefinition; import org.jabref.logic.importer.fileformat.mods.HierarchicalGeographicDefinition; import org.jabref.logic.importer.fileformat.mods.IdentifierDefinition; import org.jabref.logic.importer.fileformat.mods.IssuanceDefinition; import org.jabref.logic.importer.fileformat.mods.LanguageDefinition; import org.jabref.logic.importer.fileformat.mods.LanguageTermDefinition; import org.jabref.logic.importer.fileformat.mods.LocationDefinition; import org.jabref.logic.importer.fileformat.mods.ModsCollectionDefinition; import org.jabref.logic.importer.fileformat.mods.ModsDefinition; import org.jabref.logic.importer.fileformat.mods.NameDefinition; import org.jabref.logic.importer.fileformat.mods.NamePartDefinition; import org.jabref.logic.importer.fileformat.mods.NoteDefinition; import org.jabref.logic.importer.fileformat.mods.OriginInfoDefinition; import org.jabref.logic.importer.fileformat.mods.PartDefinition; import org.jabref.logic.importer.fileformat.mods.PhysicalLocationDefinition; import org.jabref.logic.importer.fileformat.mods.PlaceDefinition; import org.jabref.logic.importer.fileformat.mods.PlaceTermDefinition; import org.jabref.logic.importer.fileformat.mods.RecordInfoDefinition; import org.jabref.logic.importer.fileformat.mods.RelatedItemDefinition; import org.jabref.logic.importer.fileformat.mods.StringPlusLanguage; import org.jabref.logic.importer.fileformat.mods.StringPlusLanguagePlusAuthority; import org.jabref.logic.importer.fileformat.mods.StringPlusLanguagePlusSupplied; import org.jabref.logic.importer.fileformat.mods.SubjectDefinition; import org.jabref.logic.importer.fileformat.mods.TitleInfoDefinition; import org.jabref.logic.importer.fileformat.mods.UrlDefinition; import org.jabref.logic.util.FileExtensions; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.FieldName; import org.jabref.preferences.JabRefPreferences; import com.google.common.base.Joiner; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * Importer for the MODS format.<br> * More details about the format can be found here <a href="http://www.loc.gov/standards/mods/">http://www.loc.gov/standards/mods/</a>. <br> * The newest xml schema can also be found here <a href="www.loc.gov/standards/mods/mods-schemas.html.">www.loc.gov/standards/mods/mods-schemas.html.</a>. */ public class ModsImporter extends Importer { private static final Log LOGGER = LogFactory.getLog(ModsImporter.class); private static final String KEYWORD_SEPARATOR = JabRefPreferences.getInstance().getImportFormatPreferences() .getKeywordSeparator() + " "; private static final Pattern MODS_PATTERN = Pattern.compile("<mods .*>"); private JAXBContext context; @Override public boolean isRecognizedFormat(BufferedReader input) throws IOException { return input.lines().anyMatch(line -> MODS_PATTERN.matcher(line).find()); } @Override public ParserResult importDatabase(BufferedReader input) throws IOException { Objects.requireNonNull(input); List<BibEntry> bibItems = new ArrayList<>(); try { if (context == null) { context = JAXBContext.newInstance("org.jabref.logic.importer.fileformat.mods"); } Unmarshaller unmarshaller = context.createUnmarshaller(); //The unmarshalled object is a jaxbElement. JAXBElement<?> unmarshalledObject = (JAXBElement<?>) unmarshaller.unmarshal(input); Optional<ModsCollectionDefinition> collection = getElement(unmarshalledObject.getValue(), ModsCollectionDefinition.class); Optional<ModsDefinition> mods = getElement(unmarshalledObject.getValue(), ModsDefinition.class); if (collection.isPresent()) { List<ModsDefinition> modsDefinitions = collection.get().getMods(); parseModsCollection(bibItems, modsDefinitions); } else if (mods.isPresent()) { ModsDefinition modsDefinition = mods.get(); parseMods(bibItems, modsDefinition); } else { LOGGER.warn("Not expected root element found"); } } catch (JAXBException e) { LOGGER.debug("could not parse document", e); return ParserResult.fromError(e); } return new ParserResult(bibItems); } private void parseModsCollection(List<BibEntry> bibItems, List<ModsDefinition> mods) { for (ModsDefinition modsDefinition : mods) { parseMods(bibItems, modsDefinition); } } private void parseMods(List<BibEntry> bibItems, ModsDefinition modsDefinition) { BibEntry entry = new BibEntry(); Map<String, String> fields = new HashMap<>(); if (modsDefinition.getID() != null) { entry.setCiteKey(modsDefinition.getID()); } if (modsDefinition.getModsGroup() != null) { parseModsGroup(fields, modsDefinition.getModsGroup(), entry); } entry.setField(fields); bibItems.add(entry); } private void parseModsGroup(Map<String, String> fields, List<Object> modsGroup, BibEntry entry) { List<String> keywords = new ArrayList<>(); List<String> authors = new ArrayList<>(); List<String> notes = new ArrayList<>(); for (Object groupElement : modsGroup) { //Get the element. Only one of the elements should be not an empty optional. Optional<AbstractDefinition> abstractDefinition = getElement(groupElement, AbstractDefinition.class); Optional<GenreDefinition> genreDefinition = getElement(groupElement, GenreDefinition.class); Optional<LanguageDefinition> languageDefinition = getElement(groupElement, LanguageDefinition.class); Optional<LocationDefinition> locationDefinition = getElement(groupElement, LocationDefinition.class); Optional<NameDefinition> nameDefinition = getElement(groupElement, NameDefinition.class); Optional<OriginInfoDefinition> originInfoDefinition = getElement(groupElement, OriginInfoDefinition.class); Optional<RecordInfoDefinition> recordInfoDefinition = getElement(groupElement, RecordInfoDefinition.class); Optional<NoteDefinition> noteDefinition = getElement(groupElement, NoteDefinition.class); Optional<RelatedItemDefinition> relatedItemDefinition = getElement(groupElement, RelatedItemDefinition.class); Optional<SubjectDefinition> subjectDefinition = getElement(groupElement, SubjectDefinition.class); Optional<IdentifierDefinition> identifierDefinition = getElement(groupElement, IdentifierDefinition.class); Optional<TitleInfoDefinition> titleInfoDefinition = getElement(groupElement, TitleInfoDefinition.class); //Now parse the information if the element is present abstractDefinition .ifPresent(abstractDef -> putIfValueNotNull(fields, FieldName.ABSTRACT, abstractDef.getValue())); genreDefinition.ifPresent(genre -> entry.setType(genre.getValue())); languageDefinition.ifPresent( languageDef -> languageDef.getLanguageTerm().stream().map(LanguageTermDefinition::getValue) .forEach(language -> putIfValueNotNull(fields, FieldName.LANGUAGE, language))); locationDefinition.ifPresent(location -> parseLocationAndUrl(fields, location)); nameDefinition.ifPresent(name -> handleAuthorsInNamePart(name, authors, fields)); originInfoDefinition.ifPresent(originInfo -> originInfo.getPlaceOrPublisherOrDateIssued().stream() .forEach(element -> putPlaceOrPublisherOrDate(fields, element.getName().getLocalPart(), element.getValue()))); recordInfoDefinition.ifPresent(recordInfo -> parseRecordInfo(fields, recordInfo)); noteDefinition.ifPresent(note -> notes.add(note.getValue())); relatedItemDefinition.ifPresent(relatedItem -> parseRelatedModsGroup(fields, relatedItem.getModsGroup())); subjectDefinition .ifPresent(subject -> parseTopic(fields, subject.getTopicOrGeographicOrTemporal(), keywords)); identifierDefinition.ifPresent(identifier -> parseIdentifier(fields, identifier, entry)); titleInfoDefinition.ifPresent(titleInfo -> parseTitle(fields, titleInfo.getTitleOrSubTitleOrPartNumber())); } //The element subject can appear more than one time, that's why the keywords has to be put out of the for loop putIfListIsNotEmpty(fields, keywords, FieldName.KEYWORDS, KEYWORD_SEPARATOR); //same goes for authors and notes putIfListIsNotEmpty(fields, authors, FieldName.AUTHOR, " and "); putIfListIsNotEmpty(fields, notes, FieldName.NOTE, ", "); } private void parseTitle(Map<String, String> fields, List<Object> titleOrSubTitleOrPartNumber) { for (Object object : titleOrSubTitleOrPartNumber) { if (object instanceof JAXBElement) { @SuppressWarnings("unchecked") JAXBElement<StringPlusLanguage> element = (JAXBElement<StringPlusLanguage>) object; if ("title".equals(element.getName().getLocalPart())) { StringPlusLanguage title = element.getValue(); fields.put(FieldName.TITLE, title.getValue()); } } } } private void parseIdentifier(Map<String, String> fields, IdentifierDefinition identifier, BibEntry entry) { String type = identifier.getType(); if ("citekey".equals(type) && !entry.getCiteKeyOptional().isPresent()) { entry.setCiteKey(identifier.getValue()); } else if (!"local".equals(type) && !"citekey".equals(type)) { //put all identifiers (doi, issn, isbn,...) except of local and citekey putIfValueNotNull(fields, identifier.getType(), identifier.getValue()); } } private void parseTopic(Map<String, String> fields, List<JAXBElement<?>> topicOrGeographicOrTemporal, List<String> keywords) { for (JAXBElement<?> jaxbElement : topicOrGeographicOrTemporal) { Object value = jaxbElement.getValue(); String elementName = jaxbElement.getName().getLocalPart(); if (value instanceof HierarchicalGeographicDefinition) { HierarchicalGeographicDefinition hierarchichalGeographic = (HierarchicalGeographicDefinition) value; parseGeographicInformation(fields, hierarchichalGeographic); } else if ((value instanceof StringPlusLanguagePlusAuthority) && "topic".equals(elementName)) { StringPlusLanguagePlusAuthority topic = (StringPlusLanguagePlusAuthority) value; keywords.add(topic.getValue().trim()); } } } /** * Returns an Optional which contains an instance of the given class, if the given element can be cast to this class. * If the element cannot be cast to the given class, then an empty optional will be returned. * * @param groupElement The element that should be cast * @param clazz The class to which groupElement should be cast * @return An Optional, that contains the groupElement as instance of clazz, if groupElement can be cast to clazz. * An empty Optional, if groupElement cannot be cast to clazz */ private <T> Optional<T> getElement(Object groupElement, Class<T> clazz) { if (clazz.isAssignableFrom(groupElement.getClass())) { return Optional.of(clazz.cast(groupElement)); } return Optional.empty(); } private void parseGeographicInformation(Map<String, String> fields, HierarchicalGeographicDefinition hierarchichalGeographic) { List<JAXBElement<? extends StringPlusLanguage>> areaOrContinentOrCountry = hierarchichalGeographic .getExtraTerrestrialAreaOrContinentOrCountry(); for (JAXBElement<? extends StringPlusLanguage> element : areaOrContinentOrCountry) { String localName = element.getName().getLocalPart(); if ("city".equals(localName)) { StringPlusLanguage city = element.getValue(); putIfValueNotNull(fields, "city", city.getValue()); } else if ("country".equals(localName)) { StringPlusLanguage country = element.getValue(); putIfValueNotNull(fields, "country", country.getValue()); } } } private void parseLocationAndUrl(Map<String, String> fields, LocationDefinition locationDefinition) { List<String> locations = locationDefinition.getPhysicalLocation().stream() .map(PhysicalLocationDefinition::getValue).collect(Collectors.toList()); putIfListIsNotEmpty(fields, locations, FieldName.LOCATION, ", "); List<String> urls = locationDefinition.getUrl().stream().map(UrlDefinition::getValue) .collect(Collectors.toList()); putIfListIsNotEmpty(fields, urls, FieldName.URL, ", "); } private void parseRecordInfo(Map<String, String> fields, RecordInfoDefinition recordInfo) { List<JAXBElement<?>> recordContent = recordInfo.getRecordContentSourceOrRecordCreationDateOrRecordChangeDate(); for (JAXBElement<?> jaxbElement : recordContent) { Object value = jaxbElement.getValue(); if (value instanceof StringPlusLanguagePlusAuthority) { StringPlusLanguagePlusAuthority source = (StringPlusLanguagePlusAuthority) value; putIfValueNotNull(fields, "source", source.getValue()); } else if (value instanceof LanguageDefinition) { LanguageDefinition language = (LanguageDefinition) value; List<LanguageTermDefinition> languageTerms = language.getLanguageTerm(); List<String> languages = languageTerms.stream().map(LanguageTermDefinition::getValue) .collect(Collectors.toList()); putIfListIsNotEmpty(fields, languages, FieldName.LANGUAGE, ", "); } } } /** * Puts the Information from the RelatedModsGroup. It has the same elements like the ModsGroup. * But Informations like volume, issue and the pages appear here instead of in the ModsGroup. * Also if there appears a title field, then this indicates that is the name of journal which the article belongs to. */ private void parseRelatedModsGroup(Map<String, String> fields, List<Object> relatedModsGroup) { for (Object groupElement : relatedModsGroup) { if (groupElement instanceof PartDefinition) { PartDefinition part = (PartDefinition) groupElement; List<Object> detailOrExtentOrDate = part.getDetailOrExtentOrDate(); for (Object object : detailOrExtentOrDate) { if (object instanceof DetailDefinition) { DetailDefinition detail = (DetailDefinition) object; List<JAXBElement<StringPlusLanguage>> numberOrCaptionOrTitle = detail .getNumberOrCaptionOrTitle(); //In the for loop should only be the value of the element that belongs to the detail not be null for (JAXBElement<StringPlusLanguage> jaxbElement : numberOrCaptionOrTitle) { StringPlusLanguage value = jaxbElement.getValue(); //put details like volume, issue,... putIfValueNotNull(fields, detail.getType(), value.getValue()); } } else if (object instanceof ExtentDefinition) { ExtentDefinition extentDefinition = (ExtentDefinition) object; putPageInformation(extentDefinition, fields); } } } else if (groupElement instanceof TitleInfoDefinition) { TitleInfoDefinition titleInfo = (TitleInfoDefinition) groupElement; List<Object> titleOrSubTitleOrPartNumber = titleInfo.getTitleOrSubTitleOrPartNumber(); for (Object object : titleOrSubTitleOrPartNumber) { if (object instanceof JAXBElement) { @SuppressWarnings("unchecked") JAXBElement<StringPlusLanguage> element = (JAXBElement<StringPlusLanguage>) object; if ("title".equals(element.getName().getLocalPart())) { StringPlusLanguage journal = element.getValue(); fields.put(FieldName.JOURNAL, journal.getValue()); } } } } } } private void putPageInformation(ExtentDefinition extentDefinition, Map<String, String> fields) { if (extentDefinition.getTotal() != null) { putIfValueNotNull(fields, FieldName.PAGES, String.valueOf(extentDefinition.getTotal())); } else if (extentDefinition.getStart() != null) { putIfValueNotNull(fields, FieldName.PAGES, extentDefinition.getStart().getValue()); if (extentDefinition.getEnd() != null) { String endPage = extentDefinition.getEnd().getValue(); //if end appears, then there has to be a start page appeared, so get it and put it together with //the end page String startPage = fields.get(FieldName.PAGES); fields.put(FieldName.PAGES, startPage + "-" + endPage); } } } private void putPlaceOrPublisherOrDate(Map<String, String> fields, String elementName, Object object) { Optional<IssuanceDefinition> issuanceDefinition = getElement(object, IssuanceDefinition.class); Optional<PlaceDefinition> placeDefinition = getElement(object, PlaceDefinition.class); Optional<DateDefinition> dateDefinition = getElement(object, DateDefinition.class); Optional<StringPlusLanguagePlusSupplied> publisherOrEdition = getElement(object, StringPlusLanguagePlusSupplied.class); issuanceDefinition.ifPresent(issuance -> putIfValueNotNull(fields, "issuance", issuance.value())); List<String> places = new ArrayList<>(); placeDefinition .ifPresent(place -> place.getPlaceTerm().stream().filter(placeTerm -> placeTerm.getValue() != null) .map(PlaceTermDefinition::getValue).forEach(element -> places.add(element))); putIfListIsNotEmpty(fields, places, FieldName.ADDRESS, ", "); dateDefinition.ifPresent(date -> putDate(fields, elementName, date)); publisherOrEdition.ifPresent(pubOrEd -> putPublisherOrEdition(fields, elementName, pubOrEd)); } private void putPublisherOrEdition(Map<String, String> fields, String elementName, StringPlusLanguagePlusSupplied pubOrEd) { if ("publisher".equals(elementName)) { putIfValueNotNull(fields, FieldName.PUBLISHER, pubOrEd.getValue()); } else if ("edition".equals(elementName)) { putIfValueNotNull(fields, FieldName.EDITION, pubOrEd.getValue()); } } private void putDate(Map<String, String> fields, String elementName, DateDefinition date) { if (date.getValue() != null) { switch (elementName) { case "dateIssued": //The first 4 digits of dateIssued should be the year fields.put(FieldName.YEAR, date.getValue().substring(0, 4)); break; case "dateCreated": //If there was no year in date issued, then take the year from date created if (fields.get(FieldName.YEAR) == null) { fields.put(FieldName.YEAR, date.getValue().substring(0, 4)); } fields.put("created", date.getValue()); break; case "dateCaptured": fields.put("captured", date.getValue()); break; case "dateModified": fields.put("modified", date.getValue()); break; default: break; } } } private void putIfListIsNotEmpty(Map<String, String> fields, List<String> list, String key, String separator) { if (!list.isEmpty()) { fields.put(key, Joiner.on(separator).join(list)); } } private void handleAuthorsInNamePart(NameDefinition name, List<String> authors, Map<String, String> fields) { List<JAXBElement<?>> namePartOrDisplayFormOrAffiliation = name.getNamePartOrDisplayFormOrAffiliation(); List<String> foreName = new ArrayList<>(); String familyName = ""; String author = ""; for (JAXBElement<?> element : namePartOrDisplayFormOrAffiliation) { Object value = element.getValue(); String elementName = element.getName().getLocalPart(); if (value instanceof NamePartDefinition) { NamePartDefinition namePart = (NamePartDefinition) value; String type = namePart.getAtType(); if ((type == null) && (namePart.getValue() != null)) { authors.add(namePart.getValue()); } else if ("family".equals(type) && (namePart.getValue() != null)) { //family should come first, so if family appears we can set the author then comes before //we have to check if forename and family name are not empty in case it's the first author if (!foreName.isEmpty() && !familyName.isEmpty()) { //now set and add the old author author = familyName + ", " + Joiner.on(" ").join(foreName); authors.add(author); //remove old forenames foreName.clear(); } else if (foreName.isEmpty() && !familyName.isEmpty()) { authors.add(familyName); } familyName = namePart.getValue(); } else if ("given".equals(type) && (namePart.getValue() != null)) { foreName.add(namePart.getValue()); } } else if ((value instanceof StringPlusLanguage) && "affiliation".equals(elementName)) { StringPlusLanguage affiliation = (StringPlusLanguage) value; putIfValueNotNull(fields, "affiliation", affiliation.getValue()); } } //last author is not added, so do it here if (!foreName.isEmpty() && !familyName.isEmpty()) { author = familyName + ", " + Joiner.on(" ").join(foreName); authors.add(author.trim()); foreName.clear(); } else if (foreName.isEmpty() && !familyName.isEmpty()) { authors.add(familyName.trim()); } } private void putIfValueNotNull(Map<String, String> fields, String modsKey, String value) { if (value != null) { fields.put(modsKey, value); } } @Override public String getName() { return "MODS"; } @Override public FileExtensions getExtensions() { return FileExtensions.MODS; } @Override public String getDescription() { return "Importer for the MODS format"; } }