package org.jabref.logic.importer.fileformat;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.Objects;
import java.util.Optional;
import org.jabref.logic.exporter.SavePreferences;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.Importer;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.util.FileExtensions;
/**
* This importer exists only to enable `--importToOpen someEntry.bib`
*
* It is NOT intended to import a BIB file. This is done via the option action, which treats the metadata fields
* The metadata is not required to be read here, as this class is NOT called at --import
*/
public class BibtexImporter extends Importer {
// Signature written at the top of the .bib file in earlier versions.
private static final String SIGNATURE = "This file was created with JabRef";
private final ImportFormatPreferences importFormatPreferences;
public BibtexImporter(ImportFormatPreferences importFormatPreferences) {
this.importFormatPreferences = importFormatPreferences;
}
/**
* @return true as we have no effective way to decide whether a file is in bibtex format or not. See
* https://github.com/JabRef/jabref/pull/379#issuecomment-158685726 for more details.
*/
@Override
public boolean isRecognizedFormat(BufferedReader reader) {
Objects.requireNonNull(reader);
return true;
}
@Override
public ParserResult importDatabase(Path filePath, Charset defaultEncoding) throws IOException {
// We want to check if there is a JabRef signature in the file, because that would tell us
// which character encoding is used. However, to read the signature we must be using a compatible
// encoding in the first place. Since the signature doesn't contain any fancy characters, we can
// read it regardless of encoding, with either UTF-8 or UTF-16. That's the hypothesis, at any rate.
// 8 bit is most likely, so we try that first:
Optional<Charset> suppliedEncoding;
try (BufferedReader utf8Reader = getUTF8Reader(filePath)) {
suppliedEncoding = getSuppliedEncoding(utf8Reader);
}
// Now if that did not get us anywhere, we check with the 16 bit encoding:
if (!suppliedEncoding.isPresent()) {
try (BufferedReader utf16Reader = getUTF16Reader(filePath)) {
suppliedEncoding = getSuppliedEncoding(utf16Reader);
}
}
if (suppliedEncoding.isPresent()) {
return super.importDatabase(filePath, suppliedEncoding.get());
} else {
return super.importDatabase(filePath, defaultEncoding);
}
}
@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
return new BibtexParser(importFormatPreferences).parse(reader);
}
@Override
public String getName() {
return "BibTeX";
}
@Override
public FileExtensions getExtensions() {
return FileExtensions.BIBTEX_DB;
}
@Override
public String getDescription() {
return "This importer exists only to enable `--importToOpen someEntry.bib`\n" +
"It is NOT intended to import a BIB file. This is done via the option action, which treats the metadata fields.\n" +
"The metadata is not required to be read here, as this class is NOT called at --import.";
}
/**
* Searches the file for "Encoding: myEncoding" and returns the found supplied encoding.
*/
private static Optional<Charset> getSuppliedEncoding(BufferedReader reader) {
try {
String line;
while ((line = reader.readLine()) != null) {
line = line.trim();
// Line does not start with %, so there are no comment lines for us and we can stop parsing
if (!line.startsWith("%")) {
return Optional.empty();
}
// Only keep the part after %
line = line.substring(1).trim();
if (line.startsWith(BibtexImporter.SIGNATURE)) {
// Signature line, so keep reading and skip to next line
} else if (line.startsWith(SavePreferences.ENCODING_PREFIX)) {
// Line starts with "Encoding: ", so the rest of the line should contain the name of the encoding
// Except if there is already a @ symbol signaling the starting of a BibEntry
Integer atSymbolIndex = line.indexOf('@');
String encoding;
if (atSymbolIndex > 0) {
encoding = line.substring(SavePreferences.ENCODING_PREFIX.length(), atSymbolIndex);
} else {
encoding = line.substring(SavePreferences.ENCODING_PREFIX.length());
}
return Optional.of(Charset.forName(encoding));
} else {
// Line not recognized so stop parsing
return Optional.empty();
}
}
} catch (IOException ignored) {
// Ignored
}
return Optional.empty();
}
}