package edu.byu.cs.roots.opg.io;
import java.io.IOException;
import static edu.byu.cs.roots.opg.io.FSM.*;
import edu.byu.cs.roots.opg.model.Event;
import edu.byu.cs.roots.opg.model.EventType;
import edu.byu.cs.roots.opg.model.Family;
import edu.byu.cs.roots.opg.model.Gender;
import edu.byu.cs.roots.opg.model.Individual;
/**
* @author Graham Henry
* Gedcom Parser - parses any valid gedcom file
*/
public class GedcomParser {
private FSM tokenizer;
private GedcomRecord record;
/**
* Creates an instance of a GedcomParser, and initialized the Lexer
* @param fileToParse - The file name of the gedcom you wish to parse,
* note: the file name doesn't have to end in .ged
*/
public GedcomParser(String fileToParse) throws IOException
{
this.tokenizer = new FSM(fileToParse);
}
/**
* Parses the gedcom file that was given in the constructor
* @return A linked GedcomRecord, which contains a HashMap of
* the families and a HashMap of the individuals contained in
* the gedcom file
*/
public GedcomRecord parseGedcom() throws InvalidSyntaxException, IOException{
GedcomRecord record = null;
try{
parseHead();
record = parseBody();
record.linkRecord();
}
catch(InvalidSyntaxException e){
System.err.println("INVALID SYNTAX:\n"+e);
throw e;
}
catch(IOException e){
System.err.println("EOF");
//e.printStackTrace();
}
catch(Exception e)
{
//do nothing
}
return record;
}
/**
* Parses the head of the gedcom file by assuring it starts with "0 HEAD",
* and skips the rest
* @throws IOException
* @throws InvalidSyntaxException
*/
private void parseHead() throws IOException, InvalidSyntaxException {
System.out.println("Parsing HEAD");
if(tokenizer.nextTokenId() != LEVEL_0 && tokenizer.nextTokenId() != HEAD)
throw new InvalidSyntaxException("INVALID HEADER",tokenizer.lineNumber);
skipRecord(LEVEL_0);
}
/**
* Parses the Body of the gedcom file, which contains individuals and
* families. It will parse them in any order.
* @return An unlinked GedcomRecord that contains a HashMap of the Families
* and a HashMap of the Individuals in the gedcom file. The linking is done
* in the parseGedcom() method
* @throws IOException
* @throws InvalidSyntaxException
*/
private GedcomRecord parseBody() throws IOException, InvalidSyntaxException {
System.out.println("Parsing BODY");
String id;
record = new GedcomRecord();
while(tokenizer.peekTokenId() != TRLR && tokenizer.peekTokenId() != TRAILER){
tokenizer.resetToken();
String xrefIdString = tokenizer.nextWord();
if (xrefIdString.indexOf('@') < 0) {
//this is not a GEDCOM cross-reference id
// delimited by '@' characters - skip to the next record
skipRecord(LEVEL_0);
continue;
}
String[] fields = xrefIdString.split("@");
if(fields.length<1) throw new InvalidSyntaxException("Invalid Individual ID",tokenizer.lineNumber);
id = fields[0];
int token = tokenizer.peekTokenId();
switch(token){
case INDIVIDUAL:
case INDI:
record.addIndividual(id, parseIndividual(id));
break;
case FAMILY:
case FAM:
record.addFamily(id, parseFamily(id));
break;
case IGNORE:
System.out.println("IGNORE: " +token+" On Line " + (tokenizer.lineNumber-1));
skipRecord(LEVEL_0);
break;
case EOF:
return record;
default:
//tokenizer.reader.mark(1000);
//System.out.println(tokenizer.reader.readLine());
//tokenizer.reader.reset();
System.out.println(token+" On Line " + (tokenizer.lineNumber-1));
skipRecord(LEVEL_0);
//throw new InvalidSyntaxException("ERROR: Expected INDI or FAM tags",tokenizer.lineNumber-1);
}
}
return record;
}
/**
* Parses an individual in the gedcom file
* @param id - the individual ID of the person to parse
* @return - an instance of the Individual class
* @throws IOException
* @throws InvalidSyntaxException
*/
private Individual parseIndividual(String id) throws IOException, InvalidSyntaxException {
Individual individual = new Individual(id);
String[] fields;
while(tokenizer.peekTokenId() != LEVEL_0){
switch(tokenizer.peekTokenId()){
case NAME:
parseName(individual);
break;
case SEX:
individual.gender = parseGender(tokenizer.nextLine());
break;
case BIRTH:
individual.birth = parseEvent(EventType.BIRTH);
break;
case BIRT:
individual.birth = parseEvent(EventType.BIRTH);
break;
case DEATH:
individual.death = parseEvent(EventType.DEATH);
break;
case DEAT:
individual.death = parseEvent(EventType.DEATH);
break;
case FAMILY_SPOUSE:
case FAMS:
fields = tokenizer.nextLine().split("@");
if(fields.length < 2) throw new InvalidSyntaxException("Invalid Individual ID in your gedcom file for individual "+id,tokenizer.lineNumber);
individual.famsIds.add(fields[1]);
break;
case FAMILY_CHILD:
case FAMC:
parseFamc(individual);
break;
case IGNORE:
skipRecord(LEVEL_1);
tokenizer.resetToken();
break;
case BAPL:
parseBapl(individual);
break;
case CONL:
parseBapl(individual);
break;
case ENDL:
parseEndl(individual);
break;
case SLGC:
parseSlgc(individual);
break;
case OBJE:
parseObje(individual);
break;
default:
System.out.println(tokenizer.peekTokenId());
throw new InvalidSyntaxException("Gedcom error with Individual "+id,tokenizer.lineNumber);
}
}
return individual;
}
private void parseFamc(Individual ind) throws IOException, InvalidSyntaxException {
boolean foundStat = false;
int token = 0;
//Takes in the first famc and keeps it, unless a Primary specifies a later one
String famId = "";
String prevId = "";
do{
if(token == _PRIMARY){
if(foundStat == false) throw new InvalidSyntaxException("Invalid Individual ID in your gedcom file for individual "+ind.id,tokenizer.lineNumber);
famId = prevId;
}
else{
String[] fields = tokenizer.nextLine().split("@");
if(fields.length < 2) throw new InvalidSyntaxException("Invalid Individual ID in your gedcom file for individual "+ind.id,tokenizer.lineNumber);
prevId = (fields[1]);
if (!foundStat){
famId = (fields[1]);
foundStat = true;
}
}
}
while(((token = tokenizer.peekTokenId(2)) == FAMC) || token == _PRIMARY);
tokenizer.resetToken();
ind.famcIds.add(famId);
}
private void parseBapl(Individual ind) throws IOException {
boolean foundStat = false;
int token;
while((token = tokenizer.peekTokenId()) != LEVEL_1 && token != LEVEL_0){
switch(tokenizer.peekTokenId()) {
case STAT:
foundStat = true;
switch(tokenizer.peekTokenId()) {
case CHILD:
case INFANT:
case STILLBORN:
case COMPLETED:
case PRE_1970:
ind.baptismComplete = true;
case CLEARED:
case SUBMITTED:
case QUALIFIED:
ind.baptism = true;
}
break;
default:
tokenizer.skipLine();
}
}
if(!foundStat) {
ind.baptism = true;
ind.baptismComplete = true;
}
tokenizer.resetToken();
}
//TODO Add TEMP (temple)
private void parseEndl(Individual ind) throws IOException {
boolean foundStat = false;
int token;
while((token = tokenizer.peekTokenId()) != LEVEL_1 && token != LEVEL_0){
switch(tokenizer.peekTokenId()) {
case STAT:
foundStat = true;
switch(tokenizer.peekTokenId()) {
case BIC:
case DNS:
case STILLBORN:
case COMPLETED:
case PRE_1970:
ind.endowmentComplete = true;
case CLEARED:
case SUBMITTED:
case QUALIFIED:
ind.endowment = true;
}
break;
default:
tokenizer.skipLine();
}
}
if(!foundStat) {
ind.endowment = true;
ind.endowmentComplete = true;
}
tokenizer.resetToken();
}
private void parseSlgc(Individual ind) throws IOException {
boolean foundStat = false;
int token;
while((token = tokenizer.peekTokenId()) != LEVEL_1 && token != LEVEL_0){
switch(tokenizer.peekTokenId()) {
case STAT:
foundStat = true;
switch(tokenizer.peekTokenId()) {
case BIC:
case DNS:
case STILLBORN:
case COMPLETED:
case PRE_1970:
ind.sealingToParentsComplete = true;
case CLEARED:
case SUBMITTED:
case QUALIFIED:
ind.sealingToParents = true;
}
break;
default:
tokenizer.skipLine();
}
}
if(!foundStat) {
ind.sealingToParents = true;
ind.sealingToParentsComplete = true;
}
tokenizer.resetToken();
}
private void parseSlgs(Family fam) throws IOException {
boolean foundStat = false;
int token;
while((token = tokenizer.peekTokenId()) != LEVEL_1 && token != LEVEL_0){
switch(tokenizer.peekTokenId()) {
case STAT:
foundStat = true;
switch(tokenizer.peekTokenId()) {
case DNS:
case DNS_CAN:
case COMPLETED:
case PRE_1970:
fam.sealingComplete = true;
case CLEARED:
case SUBMITTED:
case QUALIFIED:
fam.sealing = true;
}
break;
default:
tokenizer.skipLine();
}
}
if(!foundStat) {
fam.sealing = true;
fam.sealingComplete = true;
}
tokenizer.resetToken();
}
private void parseObje(Individual ind) throws IOException {
int token;
while((token = tokenizer.peekTokenId()) != LEVEL_1 && token != LEVEL_0){
switch(tokenizer.peekTokenId()) {
case FILE:
//add file as photo if it's a jpg, gif, or png (built in supported types for Java)
String fileName = tokenizer.nextLine();
if (fileName.endsWith(".jpg") || fileName.endsWith(".gif") || fileName.endsWith(".png"))
{
ind.photoPath = fileName;
ind.hasPhoto = true;
}
break;
default:
tokenizer.skipLine();
}
}
tokenizer.resetToken();
}
/**
* Parses a String to determine the gender of an individual. It is case
* insensitive, and accepts a single letter ex. 'M' or the entire word ex. 'MALE'
* @param gender - A string containing the gender of an individual
* @return - A Gender enum with possible values MALE, FEMALE, or UNKNOWN
*/
private Gender parseGender(String gender) {
String g = gender.toUpperCase();
if(g.equals("M") || g.equals("MALE")) return Gender.MALE;
else if(g.equals("F") || g.equals("FEMALE")) return Gender.FEMALE;
else return Gender.UNKNOWN;
}
/**
* Parses an Event in the gedcom file, including Date and Place
* @param type - Possible values: BIRTH, DEATH
* @return -An instance of the Event class
* @throws IOException
*/
private Event parseEvent(EventType type) throws IOException {
Event event = new Event(type);
if(tokenizer.peekTokenId() != Y) tokenizer.resetToken();
while(tokenizer.peekTokenId() > LEVEL_1){
switch(tokenizer.nextTokenId()){
case DATE:
event.date = tokenizer.nextLine().trim();
event.parseDateParts();
break;
case PLACE:
case PLAC:
event.place = tokenizer.nextLine().trim();
break;
default: tokenizer.nextLine();
}
}
tokenizer.resetToken();
return event;
}
/**
* Parses the name of an individual, including given, surname,
* name-prefix, surname-prefix and name-suffix
* @param the current Individual the parser is parsing
* @throws IOException
* @throws InvalidSyntaxException
*/
private void parseName(Individual individual) throws IOException, InvalidSyntaxException {
String nameline = tokenizer.nextLine();
individual.givenName = getGivenName(nameline);
individual.surname = getSurname(nameline);
individual.nameSuffix = getNameSuffix(nameline);
while(tokenizer.peekTokenId() > LEVEL_1){
switch(tokenizer.peekTokenId()){
case NPFX:
individual.namePrefix = tokenizer.nextLine().trim();
break;
case SPFX:
individual.surnamePrefix = tokenizer.nextLine().trim();
break;
case SURN:
individual.surname = tokenizer.nextLine().trim();
break;
case NSFX:
individual.nameSuffix = tokenizer.nextLine().trim();
break;
default:
tokenizer.nextLine();
}
}
tokenizer.resetToken();
}
private String getNameSuffix(String nameline) {
int start = nameline.lastIndexOf("/");
if(start != -1 && start < nameline.length()-1)
return nameline.substring(start+1);
else return "";
}
private String getSurname(String nameline) {
int start = nameline.indexOf("/");
int end = nameline.lastIndexOf("/");
if(start != -1 && end != -1 && start < nameline.length()-1 && start != end){
return nameline.substring(start+1, end);
}
else if(start != -1 && end == -1 && start < nameline.length()-1 && start != end){
return nameline.substring(start+1);
}
else return "";
}
private String getGivenName(String nameline) {
int end = nameline.indexOf("/");
if(end != -1)
return nameline.substring(0,end);
else return nameline.trim();
}
/**
* Parses a family in the gedcom File
* @param id - The ID of the family the parser is about to parse
* @return - An instance of the Family class, setting only the ID's.
* The actual Husband, Wife, Children are added after the parsing is done,
* when the Record is linked.
* @throws IOException
*/
private Family parseFamily(String id) throws IOException {
int token;
Family family = new Family(id);
while((token = tokenizer.peekTokenId()) > LEVEL_0){
if(token == LEVEL_1){
switch(tokenizer.peekTokenId()){
case HUSBAND:
case HUSB:
family.husbandId = tokenizer.nextLine().split("@")[1];
break;
case WIFE:
family.wifeId = tokenizer.nextLine().split("@")[1];
break;
case CHILD:
case CHIL:
String code = tokenizer.nextLine().split("@")[1];
Individual indi;
if((indi = record.getIndividual(code)) != null)
if(indi.famcIds.get(0).contains(id))
family.childrenXRefIds.add(code);
break;
case MARRIAGE:
case MARR:
family.marriage = parseEvent(EventType.MARRIAGE);
break;
case SLGS:
parseSlgs(family);
//skipRecord(LEVEL_1);
//tokenizer.resetToken();
break;
default:
tokenizer.skipLine();
}
}
else tokenizer.skipLine();
}
return family;
}
/**
* Skips a Record, ignoring all tags until the stop level is reached
* @param stopLevel - The level you wish to stop skipping tags at.
* @throws IOException
*/
private void skipRecord(int stopLevel) throws IOException{
tokenizer.skipLine();
while(tokenizer.peekTokenId() > stopLevel)
tokenizer.skipLine();
}
public static void main(String[] args){
System.err.println("here");
GedcomParser parser = null;
GedcomRecord record = null;
try{
parser = new GedcomParser("");
record = parser.parseGedcom();
}
catch(Exception e){
e.printStackTrace();
System.exit(0);
}
System.out.println("Individuals: "+record.getIndividuals().size());
System.out.println("Families: "+record.getFamilies().size());
}
}