SimpleMMcifConsumer.java example

Explorer
biojava-master
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 * created at Apr 26, 2008
 */
package org.biojava.nbio.structure.io.mmcif;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import javax.vecmath.Matrix4d;

import org.biojava.nbio.structure.AminoAcid;
import org.biojava.nbio.structure.AminoAcidImpl;
import org.biojava.nbio.structure.Atom;
import org.biojava.nbio.structure.AtomImpl;
import org.biojava.nbio.structure.Chain;
import org.biojava.nbio.structure.ChainImpl;
import org.biojava.nbio.structure.EntityInfo;
import org.biojava.nbio.structure.EntityType;
import org.biojava.nbio.structure.DBRef;
import org.biojava.nbio.structure.Element;
import org.biojava.nbio.structure.Group;
import org.biojava.nbio.structure.GroupType;
import org.biojava.nbio.structure.HetatomImpl;
import org.biojava.nbio.structure.NucleotideImpl;
import org.biojava.nbio.structure.PDBCrystallographicInfo;
import org.biojava.nbio.structure.PDBHeader;
import org.biojava.nbio.structure.ResidueNumber;
import org.biojava.nbio.structure.SeqMisMatch;
import org.biojava.nbio.structure.SeqMisMatchImpl;
import org.biojava.nbio.structure.Site;
import org.biojava.nbio.structure.Structure;
import org.biojava.nbio.structure.StructureException;
import org.biojava.nbio.structure.StructureImpl;
import org.biojava.nbio.structure.StructureTools;
import org.biojava.nbio.structure.io.BondMaker;
import org.biojava.nbio.structure.io.ChargeAdder;
import org.biojava.nbio.structure.io.EntityFinder;
import org.biojava.nbio.structure.io.FileParsingParameters;
import org.biojava.nbio.structure.io.SeqRes2AtomAligner;
import org.biojava.nbio.structure.io.mmcif.model.AtomSite;
import org.biojava.nbio.structure.io.mmcif.model.AtomSites;
import org.biojava.nbio.structure.io.mmcif.model.AuditAuthor;
import org.biojava.nbio.structure.io.mmcif.model.Cell;
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
import org.biojava.nbio.structure.io.mmcif.model.ChemCompAtom;
import org.biojava.nbio.structure.io.mmcif.model.ChemCompBond;
import org.biojava.nbio.structure.io.mmcif.model.ChemCompDescriptor;
import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBremark;
import org.biojava.nbio.structure.io.mmcif.model.DatabasePDBrev;
import org.biojava.nbio.structure.io.mmcif.model.DatabasePdbrevRecord;
import org.biojava.nbio.structure.io.mmcif.model.Entity;
import org.biojava.nbio.structure.io.mmcif.model.EntityPoly;
import org.biojava.nbio.structure.io.mmcif.model.EntityPolySeq;
import org.biojava.nbio.structure.io.mmcif.model.EntitySrcGen;
import org.biojava.nbio.structure.io.mmcif.model.EntitySrcNat;
import org.biojava.nbio.structure.io.mmcif.model.EntitySrcSyn;
import org.biojava.nbio.structure.io.mmcif.model.Exptl;
import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompDescriptor;
import org.biojava.nbio.structure.io.mmcif.model.PdbxChemCompIdentifier;
import org.biojava.nbio.structure.io.mmcif.model.PdbxEntityNonPoly;
import org.biojava.nbio.structure.io.mmcif.model.PdbxNonPolyScheme;
import org.biojava.nbio.structure.io.mmcif.model.PdbxPolySeqScheme;
import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssembly;
import org.biojava.nbio.structure.io.mmcif.model.PdbxStructAssemblyGen;
import org.biojava.nbio.structure.io.mmcif.model.PdbxStructOperList;
import org.biojava.nbio.structure.io.mmcif.model.Refine;
import org.biojava.nbio.structure.io.mmcif.model.Struct;
import org.biojava.nbio.structure.io.mmcif.model.StructAsym;
import org.biojava.nbio.structure.io.mmcif.model.StructConn;
import org.biojava.nbio.structure.io.mmcif.model.StructKeywords;
import org.biojava.nbio.structure.io.mmcif.model.StructNcsOper;
import org.biojava.nbio.structure.io.mmcif.model.StructRef;
import org.biojava.nbio.structure.io.mmcif.model.StructRefSeq;
import org.biojava.nbio.structure.io.mmcif.model.StructRefSeqDif;
import org.biojava.nbio.structure.io.mmcif.model.StructSite;
import org.biojava.nbio.structure.io.mmcif.model.StructSiteGen;
import org.biojava.nbio.structure.io.mmcif.model.Symmetry;
import org.biojava.nbio.structure.quaternary.BioAssemblyInfo;
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyBuilder;
import org.biojava.nbio.structure.quaternary.BiologicalAssemblyTransformation;
import org.biojava.nbio.structure.xtal.CrystalCell;
import org.biojava.nbio.structure.xtal.SpaceGroup;
import org.biojava.nbio.structure.xtal.SymoplibParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A MMcifConsumer implementation that builds an in-memory representation of the
 * content of a mmcif file as a BioJava Structure object.
 *
 * @author Andreas Prlic
 * @since 1.7
 */

public class SimpleMMcifConsumer implements MMcifConsumer {

	private static final Logger logger = LoggerFactory.getLogger(SimpleMMcifConsumer.class);

	private Structure structure;
	private Chain currentChain;
	private Group currentGroup;

	/**
	 * A temporary data structure to hold all parsed chains
	 */
	private ArrayList<List<Chain>> allModels; 
	/**
	 * The current set of chains per model
	 */
	private List<Chain>      currentModel;
	private List<Entity>     entities;
	/**
	 * Needed in header only mode to get mapping between asym ids and author ids
	 */
	private List<EntityPoly> entityPolys;
	private List<StructRef>  strucRefs;
	private List<Chain>      seqResChains;
	private List<Chain>      entityChains; // needed to link entities, chains and compounds...
	private List<StructAsym> structAsyms;  // needed to link entities, chains and compounds...
	private List<PdbxStructOperList> structOpers ; //
	private List<PdbxStructAssembly> strucAssemblies;
	private List<PdbxStructAssemblyGen> strucAssemblyGens;
	private List<EntitySrcGen> entitySrcGens;
	private List<EntitySrcNat> entitySrcNats;
	private List<EntitySrcSyn> entitySrcSyns;
	private List<StructConn> structConn;
	private List<StructNcsOper> structNcsOper;
	private List<StructRefSeqDif> sequenceDifs;
	private List<StructSiteGen> structSiteGens;
	
	private Matrix4d parsedScaleMatrix;



	/**
	 * A map of asym ids (internal chain ids) to entity ids extracted from
	 * the _struct_asym category
	 */
	private Map<String,String> asymId2entityId;

	/**
	 * A map of asym ids (internal chain ids) to author ids extracted from 
	 * the _entity_poly category. Used in header only parsing.
	 */
	private Map<String,String> asymId2authorId;

	private String currentNmrModelNumber ;

	private FileParsingParameters params;

	public  SimpleMMcifConsumer(){
		params = new FileParsingParameters();
		documentStart();

	}

	@Override
	public void newEntity(Entity entity) {
		logger.debug("New entity: {}",entity.toString());
		entities.add(entity);
	}

	@Override
	public void newEntityPoly(EntityPoly entityPoly) {
		entityPolys.add(entityPoly);
	}

	@Override
	public void newPdbxStructOperList(PdbxStructOperList structOper){

		structOpers.add(structOper);
	}

	@Override
	public void newStructAsym(StructAsym sasym){

		structAsyms.add(sasym);
	}

	private Entity getEntity(int entity_id){
		try {
			for (Entity e: entities){
				int eId = Integer.parseInt(e.getId());
				if  (eId== entity_id){
					return e;
				}
			}
		} catch (NumberFormatException e) {
			logger.warn("Entity id does not look like a number:", e.getMessage());
		}
		return null;
	}

	@Override
	public void newStructKeywords(StructKeywords kw){
		PDBHeader header = structure.getPDBHeader();
		if ( header == null)
			header = new PDBHeader();
		header.setDescription(kw.getPdbx_keywords());
		header.setClassification(kw.getPdbx_keywords());
	}

	@Override
	public void setStruct(Struct struct) {

		PDBHeader header = structure.getPDBHeader();
		if ( header == null)
			header = new PDBHeader();

		header.setTitle(struct.getTitle());
		header.setIdCode(struct.getEntry_id());
		//header.setDescription(struct.getPdbx_descriptor());
		//header.setClassification(struct.getPdbx_descriptor());
		//header.setDescription(struct.getPdbx_descriptor());



		structure.setPDBHeader(header);
		structure.setPDBCode(struct.getEntry_id());
	}

	/** initiate new group, either Hetatom, Nucleotide, or AminoAcid */
	private Group getNewGroup(String recordName,Character aminoCode1, long seq_id,String groupCode3) {

		Group g = ChemCompGroupFactory.getGroupFromChemCompDictionary(groupCode3);
		if ( g != null && !g.getChemComp().isEmpty()) {
			if ( g instanceof AminoAcidImpl) {
				AminoAcidImpl aa = (AminoAcidImpl) g;
				aa.setId(seq_id);
			} else if ( g instanceof NucleotideImpl) {
				NucleotideImpl nuc =  (NucleotideImpl) g;
				nuc.setId(seq_id);
			} else if ( g instanceof HetatomImpl) {
				HetatomImpl het = (HetatomImpl)g;
				het.setId(seq_id);
			}
			return g;
		}



		Group group;
		if ( recordName.equals("ATOM") ) {
			if (StructureTools.isNucleotide(groupCode3))  {
				// it is a nucleotide
				NucleotideImpl nu = new NucleotideImpl();
				group = nu;
				nu.setId(seq_id);

			} else if (aminoCode1==null || aminoCode1 == StructureTools.UNKNOWN_GROUP_LABEL){
				HetatomImpl h = new HetatomImpl();
				h.setId(seq_id);
				group = h;

			} else {
				AminoAcidImpl aa = new AminoAcidImpl() ;
				aa.setAminoType(aminoCode1);
				aa.setId(seq_id);
				group = aa ;
			}
		}
		else {
			if (StructureTools.isNucleotide(groupCode3))  {
				// it is a nucleotide
				NucleotideImpl nu = new NucleotideImpl();
				group = nu;
				nu.setId(seq_id);
			}
			else if (aminoCode1 != null ) {
				AminoAcidImpl aa = new AminoAcidImpl() ;
				aa.setAminoType(aminoCode1);
				aa.setId(seq_id);
				group = aa ;
			} else {
				HetatomImpl h = new HetatomImpl();
				h.setId(seq_id);
				group = h;
			}
		}
		return  group ;
	}

	/**
	 * Test if the given asymId is already present in the list of chains given. If yes, returns the chain
	 * otherwise returns null.
	 */
	private static Chain isKnownChain(String asymId, List<Chain> chains){

		for (int i = 0; i< chains.size();i++){
			Chain testchain =  chains.get(i);
			//System.out.println("comparing chainID >"+chainID+"< against testchain " + i+" >" +testchain.getName()+"<");
			if (asymId.equals(testchain.getId())) {
				//System.out.println("chain "+ chainID+" already known ...");
				return testchain;
			}
		}

		return null;
	}

	@Override
	public void newAtomSite(AtomSite atom) {

		if (params.isHeaderOnly()) return;

		// Warning: getLabel_asym_id is not the "chain id" in the PDB file
		// it is the internally used chain id.
		// later on we will fix this...

		// later one needs to map the asym id to the pdb_strand_id

		//TODO: add support for FileParsingParams.getMaxAtoms()

		boolean startOfNewChain = false;

		String asymId = atom.getLabel_asym_id();
		String authId = atom.getAuth_asym_id();

		String recordName    = atom.getGroup_PDB();
		String residueNumberS = atom.getAuth_seq_id();
		Integer residueNrInt = Integer.parseInt(residueNumberS);

		// the 3-letter name of the group:
		String groupCode3    = atom.getLabel_comp_id();

		boolean isHetAtomInFile = false;

		Character aminoCode1 = null;
		if ( recordName.equals("ATOM") )
			aminoCode1 = StructureTools.get1LetterCodeAmino(groupCode3);
		else {
			aminoCode1 = StructureTools.get1LetterCodeAmino(groupCode3);

			// for nucleotides this will be null..
			if (aminoCode1 != null &&  aminoCode1.equals(StructureTools.UNKNOWN_GROUP_LABEL))
				aminoCode1 = null;

			isHetAtomInFile = true;
		}
		String insCodeS = atom.getPdbx_PDB_ins_code();
		Character insCode = null;
		if (!  insCodeS.equals("?")) {
			insCode = insCodeS.charAt(0);
		}
		// we store the internal seq id in the Atom._id field
		// this is not a PDB file field but we need this to internally assign the insertion codes later
		// from the pdbx_poly_seq entries..

		long seq_id = -1;
		try {
			seq_id = Long.parseLong(atom.getLabel_seq_id());
		} catch (NumberFormatException e){
			// non polymer chains (ligands and small molecules) will have a label_seq_id set to '.', thus it is ok to
			// silently ignore this
			//logger.debug("Could not parse number for _atom_site.label_seq_id: "+e.getMessage());
		}

		String nmrModelNumber = atom.getPdbx_PDB_model_num();

		if ( currentNmrModelNumber == null) {
			currentNmrModelNumber = nmrModelNumber;
		}

		if (! currentNmrModelNumber.equals(nmrModelNumber)){
			currentNmrModelNumber = nmrModelNumber;

			// add previous data
			if ( currentChain != null ) {
				currentChain.addGroup(currentGroup);
				currentGroup.trimToSize();
			}

			// we came to the beginning of a new NMR model
			allModels.add(currentModel);
			currentModel = new ArrayList<Chain>();
			currentChain = null;
			currentGroup = null;
		}


		if (currentChain == null) {

			currentChain = new ChainImpl();
			currentChain.setName(authId);
			currentChain.setId(asymId);
			currentModel.add(currentChain);
			startOfNewChain = true;
		}

		//System.out.println("BEFORE: " + chain_id + " " + current_chain.getName());
		if ( ! asymId.equals(currentChain.getId()) ) {
			//logger.info("unknown chain. creating new chain. authId:" + authId + " asymId: " + asymId);
			startOfNewChain = true;

			// end up old chain...
			currentChain.addGroup(currentGroup);

			// see if old chain is known ...
			Chain testchain = isKnownChain(asymId,currentModel);

			if ( testchain == null) {
				//logger.info("unknown chain. creating new chain. authId:" + authId + " asymId: " + asymId);

				currentChain = new ChainImpl();
				currentChain.setName(authId);
				currentChain.setId(asymId);

			}   else {
				currentChain = testchain;
			}

			if ( ! currentModel.contains(currentChain))
				currentModel.add(currentChain);

		}


		ResidueNumber residueNumber = new ResidueNumber(authId,residueNrInt, insCode);

		if (currentGroup == null) {


			currentGroup = getNewGroup(recordName,aminoCode1,seq_id, groupCode3);

			currentGroup.setResidueNumber(residueNumber);
			currentGroup.setPDBName(groupCode3);
			currentGroup.setHetAtomInFile(isHetAtomInFile);
		}

		// SET UP THE ALT LOC GROUP
		Group altGroup = null;
		String altLocS = atom.getLabel_alt_id();
		Character altLoc = ' ';
		if ( altLocS.length()>0) {
			altLoc = altLocS.charAt(0);
			if ( altLoc.equals('.') )
				altLoc = ' ';

		}
		// If it's the start of the new chain 
		if ( startOfNewChain){
			currentGroup = getNewGroup(recordName,aminoCode1,seq_id, groupCode3);
			currentGroup.setResidueNumber(residueNumber);
			currentGroup.setPDBName(groupCode3);
			currentGroup.setHetAtomInFile(isHetAtomInFile);
		}
		// ANTHONY BRADLEY ADDED THIS -> WE ONLY WAN'T TO CHECK FOR ALT LOCS WHEN IT's NOT THE FIRST GROUP IN CHAIN
		else{
			// check if residue number is the same ...
			// insertion code is part of residue number
			if ( ! residueNumber.equals(currentGroup.getResidueNumber())) {
				//System.out.println("end of residue: "+current_group.getPDBCode()+" "+residueNrInt);
				currentChain.addGroup(currentGroup);
				currentGroup.trimToSize();
				currentGroup = getNewGroup(recordName,aminoCode1,seq_id,groupCode3);
				currentGroup.setPDBName(groupCode3);
				currentGroup.setResidueNumber(residueNumber);
				currentGroup.setHetAtomInFile(isHetAtomInFile);


			} else {
				// same residueNumber, but altLocs...
				// test altLoc

				if ( ! altLoc.equals(' ') && ( ! altLoc.equals('.'))) {
					logger.debug("found altLoc! " + altLoc + " " + currentGroup + " " + altGroup);
					altGroup = getCorrectAltLocGroup( altLoc,recordName,aminoCode1,groupCode3, seq_id);
					if (altGroup.getChain()==null) {
						altGroup.setChain(currentChain);
					}
				}
			}
		}
		//atomCount++;
		//System.out.println("fixing atom name for  >" + atom.getLabel_atom_id() + "< >" + fullname + "<");


		if ( params.isParseCAOnly() ){
			// yes , user wants to get CA only
			// only parse CA atoms...
			if (! (atom.getLabel_atom_id().equals(StructureTools.CA_ATOM_NAME) && atom.getType_symbol().equals("C"))) {
				//System.out.println("ignoring " + line);
				//atomCount--;
				return;
			}
		}

		//see if chain_id is one of the previous chains ...

		Atom a = convertAtom(atom);

		//see if chain_id is one of the previous chains ...
		if ( altGroup != null) {
			altGroup.addAtom(a);
			altGroup = null;
		}
		else {
			currentGroup.addAtom(a);
		}


		String atomName = a.getName();
		// make sure that main group has all atoms 
		// GitHub issue: #76
		if ( ! currentGroup.hasAtom(atomName)) {
			// Unless it's microheterogenity https://github.com/rcsb/codec-devel/issues/81
			if (currentGroup.getPDBName().equals(a.getGroup().getPDBName())) {
				if(!StructureTools.hasNonDeuteratedEquiv(a,currentGroup)){
					currentGroup.addAtom(a);
				}
			}

		}
	}

	/** 
	 * Convert a mmCIF AtomSite object to a BioJava Atom object
	 *
	 * @param atom the mmmcif AtomSite record
	 * @return an Atom
	 */
	private Atom convertAtom(AtomSite atom){


		Atom a = new AtomImpl();

		a.setPDBserial(Integer.parseInt(atom.getId()));
		a.setName(atom.getLabel_atom_id());

		double x = Double.parseDouble (atom.getCartn_x());
		double y = Double.parseDouble (atom.getCartn_y());
		double z = Double.parseDouble (atom.getCartn_z());
		a.setX(x);
		a.setY(y);
		a.setZ(z);

		float occupancy = Float.parseFloat (atom.getOccupancy());
		a.setOccupancy(occupancy);

		float temp = Float.parseFloat (atom.getB_iso_or_equiv());
		a.setTempFactor(temp);

		String alt = atom.getLabel_alt_id();
		if (( alt != null ) && ( alt.length() > 0) && (! alt.equals("."))){
			a.setAltLoc(new Character(alt.charAt(0)));
		} else {
			a.setAltLoc(new Character(' '));
		}

		Element element = Element.R;
		try {
			element = Element.valueOfIgnoreCase(atom.getType_symbol());
		}  catch (IllegalArgumentException e) {
			logger.info("Element {} was not recognised as a BioJava-known element, the element will be represented as the generic element {}", atom.getType_symbol(), Element.R.name());
		}
		a.setElement(element);

		return a;

	}


	private Group getCorrectAltLocGroup( Character altLoc,
			String recordName,
			Character aminoCode1,
			String groupCode3,
			long seq_id) {

		// see if we know this altLoc already;
		List<Atom> atoms = currentGroup.getAtoms();
		if ( atoms.size() > 0) {
			Atom a1 = atoms.get(0);
			// we are just adding atoms to the current group
			// probably there is a second group following later...
			if (a1.getAltLoc().equals(altLoc)) {

				return currentGroup;
			}
		}

		List<Group> altLocs = currentGroup.getAltLocs();
		for ( Group altLocG : altLocs ){
			atoms = altLocG.getAtoms();
			if ( atoms.size() > 0) {
				for ( Atom a1 : atoms) {
					if (a1.getAltLoc().equals( altLoc)) {

						return altLocG;
					}
				}
			}
		}

		// no matching altLoc group found.
		// build it up.

		if ( groupCode3.equals(currentGroup.getPDBName())) {
			if ( currentGroup.getAtoms().size() == 0) {
				//System.out.println("current group is empty " + current_group + " " + altLoc);
				return currentGroup;
			}
			//System.out.println("cloning current group " + current_group + " " + current_group.getAtoms().get(0).getAltLoc() + " altLoc " + altLoc);
			Group altLocG = (Group) currentGroup.clone();
			// drop atoms from cloned group...
			// https://redmine.open-bio.org/issues/3307
			altLocG.setAtoms(new ArrayList<Atom>());
			altLocG.getAltLocs().clear();
			currentGroup.addAltLoc(altLocG);
			return altLocG;
		}

		//	System.out.println("new  group " + recordName + " " + aminoCode1 + " " +groupCode3);
		//String recordName,Character aminoCode1, long seq_id,String groupCode3) {
		Group altLocG = getNewGroup(recordName,aminoCode1,seq_id,groupCode3);

		altLocG.setPDBName(groupCode3);
		altLocG.setResidueNumber(currentGroup.getResidueNumber());
		currentGroup.addAltLoc(altLocG);
		return altLocG;
	}

	/** 
	 * Start the parsing
	 */
	@Override
	public void documentStart() {
		structure = new StructureImpl();

		currentChain        = null;
		currentGroup 		= null;
		currentNmrModelNumber 	= null;
		//atomCount     		= 0;

		allModels     = new ArrayList<List<Chain>>();
		currentModel  = new ArrayList<Chain>();
		entities      = new ArrayList<Entity>();
		entityPolys   = new ArrayList<>();
		strucRefs     = new ArrayList<StructRef>();
		seqResChains  = new ArrayList<Chain>();
		entityChains  = new ArrayList<Chain>();
		structAsyms   = new ArrayList<StructAsym>();

		asymId2entityId = new HashMap<String,String>();
		asymId2authorId = new HashMap<>();
		structOpers   = new ArrayList<PdbxStructOperList>();
		strucAssemblies = new ArrayList<PdbxStructAssembly>();
		strucAssemblyGens = new ArrayList<PdbxStructAssemblyGen>();
		entitySrcGens = new ArrayList<EntitySrcGen>();
		entitySrcNats = new ArrayList<EntitySrcNat>();
		entitySrcSyns = new ArrayList<EntitySrcSyn>();
		structConn = new ArrayList<StructConn>();
		structNcsOper = new ArrayList<StructNcsOper>();
		sequenceDifs = new ArrayList<StructRefSeqDif>();
		structSiteGens = new ArrayList<StructSiteGen>();
	}


	@Override
	public void documentEnd() {

		// Expected that there is one current_chain that needs to be added to the model
		// When in headerOnly mode, no Atoms are read, and there will not be an active
		// current_chain.
		if ( currentChain != null ) {

			currentChain.addGroup(currentGroup);
			if (isKnownChain(currentChain.getId(),currentModel) == null) {
				currentModel.add(currentChain);
			}
		} else if (!params.isHeaderOnly()){
			logger.warn("current chain is null at end of document.");
		}

		allModels.add(currentModel);

		// this populates the asymId2authorId and asymId2entityId maps, needed in header only mode to get the mapping 
		// between the 2 chain identifiers.
		initMaps();

		for (StructAsym asym : structAsyms) {

			logger.debug("Entity {} matches asym_id: {}", asym.getEntity_id(), asym.getId() );

			Chain s = getEntityChain(asym.getEntity_id());
			Chain seqres = (Chain)s.clone();
			// to solve issue #160 (e.g. 3u7t)
			seqres = removeSeqResHeterogeneity(seqres);
			seqres.setId(asym.getId());
			if (asymId2authorId.get(asym.getId()) !=null ){ 
				seqres.setName(asymId2authorId.get(asym.getId()));
			} else {
				seqres.setName(asym.getId());
			}

			EntityType type = null;
			try {
				Entity ent = getEntity(Integer.parseInt(asym.getEntity_id()));
				type = EntityType.entityTypeFromString(ent.getType());
			} catch (NumberFormatException e) {
				logger.debug("Could not parse integer from entity id field {}", asym.getEntity_id());
			}

			// we'll only add seqres chains that are polymeric or unknown
			if (type==null || type==EntityType.POLYMER ) {
				seqResChains.add(seqres);	
			}

			logger.debug(" seqres: " + asym.getId() + " " + seqres + "<") ;
			// adding the entities to structure
			addEntities(asym);

		}

		if (structAsyms.isEmpty()) {
			logger.warn("No _struct_asym category in file, no SEQRES groups will be added.");
		}

		// entities
		// In addEntities above we created the entities if they were present in the file
		// Now we need to make sure that they are linked to chains and also that if they are not present in the file we need to add them now
		linkEntities();

		// now that we know the entities, we can add all chains to structure so that they are stored
		// properly as polymer/nonpolymer/water chains inside structure
		for (List<Chain> model:allModels) {
			structure.addModel(model);
		}

		// Only align if requested (default) and not when headerOnly mode with no Atoms.
		// Otherwise, we store the empty SeqRes Groups unchanged in the right chains.
		if ( params.isAlignSeqRes() && !params.isHeaderOnly() ){
			logger.debug("Parsing mode align_seqres, will parse SEQRES and align to ATOM sequence");
			alignSeqRes();
		} else {
			logger.debug("Parsing mode unalign_seqres, will parse SEQRES but not align it to ATOM sequence");
			SeqRes2AtomAligner.storeUnAlignedSeqRes(structure, seqResChains, params.isHeaderOnly());
		}


		// Now make sure all altlocgroups have all the atoms in all the groups
		StructureTools.cleanUpAltLocs(structure);


		// NOTE bonds and charges can only be done at this point that the chain id mapping is properly sorted out
		if (!params.isHeaderOnly()) {
			if ( params.shouldCreateAtomBonds()) {
				addBonds();
			}

			if ( params.shouldCreateAtomCharges()) {
				addCharges();
			}
		}

		if (!params.isHeaderOnly()) {

			// Do structure.setSites(sites) after any chain renaming to be like PDB.
			addSites();
		}



		// set the oligomeric state info in the header...
		if (params.isParseBioAssembly()) {

			// the more detailed mapping of chains to rotation operations happens in StructureIO...

			Map<Integer,BioAssemblyInfo> bioAssemblies = new HashMap<Integer, BioAssemblyInfo>();

			for ( PdbxStructAssembly psa : strucAssemblies){

				List<PdbxStructAssemblyGen> psags = new ArrayList<PdbxStructAssemblyGen>(1);

				for ( PdbxStructAssemblyGen psag: strucAssemblyGens ) {
					if ( psag.getAssembly_id().equals(psa.getId())) {
						psags.add(psag);
					}
				}

				BiologicalAssemblyBuilder builder = new BiologicalAssemblyBuilder();

				// these are the transformations that need to be applied to our model
				List<BiologicalAssemblyTransformation> transformations = builder.getBioUnitTransformationList(psa, psags, structOpers);

				int bioAssemblyId = -1;
				try {
					bioAssemblyId = Integer.parseInt(psa.getId());
				} catch (NumberFormatException e) {
					logger.info("Could not parse a numerical bio assembly id from '{}'",psa.getId());
				}

				// if bioassembly id is not numerical we throw it away
				// this happens usually for viral capsid entries, like 1ei7
				// see issue #230 in github
				if (bioAssemblyId!=-1) {
					int mmSize = 0;
					// note that the transforms contain asym ids of both polymers and non-polymers
					// For the mmsize, we are only interested in the polymers
					for (BiologicalAssemblyTransformation transf:transformations) {
						Chain c = structure.getChain(transf.getChainId());
						if (c==null) {
							logger.info("Could not find asym id {} specified in struct_assembly_gen", transf.getChainId());
							continue;
						}
						if (c.getEntityType() == EntityType.POLYMER &&
							// for entries like 4kro, sugars are annotated as polymers but we
							// don't want them in the macromolecularSize count
							!c.getEntityInfo().getDescription().contains("SUGAR") ) {
								
								mmSize++;
							}
					}
					
					BioAssemblyInfo bioAssembly = new BioAssemblyInfo();
					bioAssembly.setId(bioAssemblyId);
					bioAssembly.setMacromolecularSize(mmSize);
					bioAssembly.setTransforms(transformations);
					bioAssemblies.put(bioAssemblyId,bioAssembly);
				}

			}
			structure.getPDBHeader().setBioAssemblies(bioAssemblies);
		}

		setStructNcsOps();
		
		setCrystallographicInfoMetadata();


		Map<String,List<SeqMisMatch>> misMatchMap = new HashMap<String, List<SeqMisMatch>>();
		for (StructRefSeqDif sdif : sequenceDifs) {
			SeqMisMatch misMatch = new SeqMisMatchImpl();
			misMatch.setDetails(sdif.getDetails());

			String insCode = sdif.getPdbx_pdb_ins_code();
			if ( insCode != null && insCode.equals("?"))
				insCode = null;
			misMatch.setInsCode(insCode);
			misMatch.setOrigGroup(sdif.getDb_mon_id());
			misMatch.setPdbGroup(sdif.getMon_id());
			misMatch.setPdbResNum(sdif.getPdbx_auth_seq_num());
			misMatch.setUniProtId(sdif.getPdbx_seq_db_accession_code());
			misMatch.setSeqNum(sdif.getSeq_num());


			List<SeqMisMatch> mms = misMatchMap.get(sdif.getPdbx_pdb_strand_id());
			if ( mms == null) {
				mms = new ArrayList<SeqMisMatch>();
				misMatchMap.put(sdif.getPdbx_pdb_strand_id(),mms);
			}
			mms.add(misMatch);

		}

		for (String chainId : misMatchMap.keySet()){

			Chain chain = structure.getPolyChainByPDB(chainId);

			if ( chain == null) {
				logger.warn("Could not set mismatches for chain with author id" + chainId);
				continue;
			}

			chain.setSeqMisMatches(misMatchMap.get(chainId));


		}

	}

	/**
	 * Here we link entities to chains.
	 * Also if entities are not present in file, this initialises the entities with some heuristics, see {@link org.biojava.nbio.structure.io.EntityFinder}
	 */
	private void linkEntities() {

		for (int i =0; i< allModels.size() ; i++){
			for (Chain chain : allModels.get(i)) {
				//logger.info("linking entities for " + chain.getId() + " "  + chain.getName());
				String entityId = asymId2entityId.get(chain.getId());

				if (entityId==null) {
					// this can happen for instance if the cif file didn't have _struct_asym category at all
					// and thus we have no asymId2entityId mapping at all
					logger.info("No entity id could be found for chain {}", chain.getId());
					continue;
				}
				int eId = Integer.parseInt(entityId);

				// Entities are not added for non-polymeric entities, if a chain is non-polymeric its entity won't be found.
				// TODO: add all entities and unique compounds and add methods to directly get polymer or non-polymer
				// asyms (chains).  Either create a unique StructureImpl or modify existing for a better representation of the
				// mmCIF internal data structures but is compatible with Structure interface.
				// Some examples of PDB entries with this kind of problem:
				//   - 2uub: asym_id X, chainName Z, entity_id 24: fully non-polymeric but still with its own chainName
				//   - 3o6j: asym_id K, chainName Z, entity_id 6 : a single water molecule
				//   - 1dz9: asym_id K, chainName K, entity_id 6 : a potassium ion alone

				EntityInfo entityInfo = structure.getEntityById(eId);
				if (entityInfo==null) {
					// Supports the case where the only chain members were from non-polymeric entity that is missing.
					// Solved by creating a new Compound(entity) to which this chain will belong.
					logger.info("Could not find an Entity for entity_id {}, for chain id {}, creating a new Entity.",
							eId, chain.getId());
					entityInfo = new EntityInfo();
					entityInfo.setMolId(eId);
					entityInfo.addChain(chain);
					if (StructureTools.isChainWaterOnly(chain)) {
						entityInfo.setType(EntityType.WATER);
					} else {
						entityInfo.setType(EntityType.NONPOLYMER);
					}
					chain.setEntityInfo(entityInfo);
					structure.addEntityInfo(entityInfo);
				} else {
					logger.debug("Adding chain with chain id {} (auth id {}) to Entity with entity_id {}",
							chain.getId(), chain.getName(), eId);
					entityInfo.addChain(chain);
					chain.setEntityInfo(entityInfo);
				}

			}

		}

		// if no entity information was present in file we then go and find the entities heuristically with EntityFinder
		List<EntityInfo> entityInfos = structure.getEntityInfos();
		if (entityInfos==null || entityInfos.isEmpty()) {

			List<List<Chain>> polyModels = new ArrayList<>();
			List<List<Chain>> nonPolyModels = new ArrayList<>();
			List<List<Chain>> waterModels = new ArrayList<>();

			for (List<Chain> model:allModels) {

				List<Chain> polyChains = new ArrayList<>();
				List<Chain> nonPolyChains = new ArrayList<>();
				List<Chain> waterChains = new ArrayList<>();

				polyModels.add(polyChains);
				nonPolyModels.add(nonPolyChains);
				waterModels.add(waterChains);

				for (Chain c:model) {

					// we only have entities for polymeric chains, all others are ignored for assigning entities
					if (StructureTools.isChainWaterOnly(c)) {
						waterChains.add(c);

					} else if (StructureTools.isChainPureNonPolymer(c)) {
						nonPolyChains.add(c);

					} else {
						polyChains.add(c);
					}
				}
			}

			entityInfos = EntityFinder.findPolyEntities(polyModels);
			EntityFinder.createPurelyNonPolyEntities(nonPolyModels, waterModels, entityInfos);


			structure.setEntityInfos(entityInfos);
		}

		// final sanity check: it can happen that from the annotated entities some are not linked to any chains
		// e.g. 3s26: a sugar entity does not have any chains associated to it (it seems to be happening with many sugar compounds)
		// we simply log it, this can sign some other problems if the entities are used down the line
		for (EntityInfo e:entityInfos) {
			if (e.getChains().isEmpty()) {
				logger.info("Entity {} '{}' has no chains associated to it",
						e.getMolId()<0?"with no entity id":e.getMolId(), e.getDescription());
			}
		}

	}

	private void addCharges() {
		ChargeAdder.addCharges(structure);
	}

	/**
	 * The method will return a new reference to a Chain with any consecutive groups
	 * having same residue numbers removed.
	 * This is necessary to solve the microheterogeneity issue in entries like 3u7t (see github issue #160)
	 * @param c
	 * @return
	 */
	private static Chain removeSeqResHeterogeneity(Chain c) {

		Chain trimmedChain = new ChainImpl();

		ResidueNumber lastResNum = null;

		for (Group g:c.getAtomGroups()) {

			// note we have to deep copy this, otherwise they stay linked and would get altered in addGroup(g)
			ResidueNumber currentResNum = new ResidueNumber(
					g.getResidueNumber().getChainName(),
					g.getResidueNumber().getSeqNum(),
					g.getResidueNumber().getInsCode());

			if (lastResNum == null || !lastResNum.equals(currentResNum) ) {
				trimmedChain.addGroup(g);
			} else {
				logger.debug("Removing seqres group because it seems to be repeated in entity_poly_seq, most likely has hetero='y': "+g);
			}

			lastResNum = currentResNum;

		}
		return trimmedChain;
	}

	private void addBonds() {
		BondMaker maker = new BondMaker(structure, params);
		maker.makeBonds();
		maker.formBondsFromStructConn(structConn);
	}

	private void alignSeqRes() {

		logger.debug("Parsing mode align_seqres, will align to ATOM to SEQRES sequence");

		// fix SEQRES residue numbering for all models

		for (int model=0;model<structure.nrModels();model++) {

			List<Chain> atomList   = structure.getModel(model);

			for (Chain seqResChain: seqResChains){

				// this extracts the matching atom chain from atomList
				Chain atomChain = SeqRes2AtomAligner.getMatchingAtomRes(seqResChain, atomList, true);

				if (atomChain == null) {
					// most likely there's no observed residues at all for the seqres chain: can't map
					// e.g. 3zyb: chains with asym_id L,M,N,O,P have no observed residues
					logger.info("Could not map SEQRES chain with asym_id={} to any ATOM chain. Most likely there's no observed residues in the chain.",
							seqResChain.getId());
					continue;
				}

				//map the atoms to the seqres...

				// we need to first clone the seqres so that they stay independent for different models
				List<Group> seqResGroups = new ArrayList<Group>();
				for (int i=0;i<seqResChain.getAtomGroups().size();i++) {
					seqResGroups.add((Group)seqResChain.getAtomGroups().get(i).clone());
				}

				for ( int seqResPos = 0 ; seqResPos < seqResGroups.size(); seqResPos++) {
					Group seqresG = seqResGroups.get(seqResPos);
					boolean found = false;
					for ( Group atomG: atomChain.getAtomGroups()) {

						int internalNr = getInternalNr (atomG);

						if (seqresG.getResidueNumber().getSeqNum() == internalNr ) {
							seqResGroups.set(seqResPos, atomG);
							found = true;
							break;
						}


					}
					if ( ! found)
						// so far the residue number has tracked internal numbering.
						// however there are no atom records, as such this can't be a PDB residue number...
						seqresG.setResidueNumber(null);
				}
				atomChain.setSeqResGroups(seqResGroups);

			}
		}
	}

	private int getInternalNr(Group atomG) {
		if ( atomG.getType().equals(GroupType.AMINOACID)) {
			AminoAcidImpl aa = (AminoAcidImpl) atomG;
			return new Long(aa.getId()).intValue();
		} else if ( atomG.getType().equals(GroupType.NUCLEOTIDE)) {
			NucleotideImpl nu = (NucleotideImpl) atomG;
			return new Long(nu.getId()).intValue();
		} else {
			HetatomImpl he = (HetatomImpl) atomG;
			return new Long(he.getId()).intValue();
		}
	}

	private void addEntities(StructAsym asym) {
		int eId = 0;
		try {
			eId = Integer.parseInt(asym.getEntity_id());
		} catch (NumberFormatException e) {
			logger.warn("Could not parse mol_id from string {}. Will use 0 for creating Entity",asym.getEntity_id());
		}
		Entity e = getEntity(eId);

		// for some mmCIF files like 1yrm all 3 of _entity_src_gen, _entity_src_nat and _pdbx_entity_src_syn are missing
		// we need to fill the Compounds in some other way:

		EntityInfo entityInfo = structure.getEntityById(eId);

		if (entityInfo==null) {
			//logger.info("Creating new EntityInfo " + eId + " " + e.getId() + " " + e.getPdbx_description());
			entityInfo = new EntityInfo();
			entityInfo.setMolId(eId);
			// we only add the compound if a polymeric one (to match what the PDB parser does)
			if (e!=null) {
				entityInfo.setDescription(e.getPdbx_description());

				EntityType eType = EntityType.entityTypeFromString(e.getType());
				if (eType!=null) {
					entityInfo.setType(eType);
				} else {
					logger.warn("Type '{}' is not recognised as a valid entity type for entity {}", e.getType(), eId);
				}
				addAncilliaryEntityData(asym, eId, e, entityInfo);
				structure.addEntityInfo(entityInfo);
				logger.debug("Adding Entity with entity id {} from _entity, with name: {}",eId, entityInfo.getDescription());
			}
		}
	}


	/**
	 * Add any extra information to the entity information.
	 * @param asym 
	 * @param entityId 
	 * @param entity 
	 * @param entityInfo 
	 */
	private void addAncilliaryEntityData(StructAsym asym, int entityId, Entity entity, EntityInfo entityInfo) {
		// Loop through each of the entity types and add the corresponding data
		// We're assuming if data is duplicated between sources it is consistent
		// This is a potentially huge assumption...


		for (EntitySrcGen esg : entitySrcGens) {

			if (! esg.getEntity_id().equals(asym.getEntity_id()))
				continue;

			addInformationFromESG(esg, entityId, entityInfo);

		}

		for (EntitySrcNat esn : entitySrcNats) {
			if (! esn.getEntity_id().equals(asym.getEntity_id()))
				continue;
			addInformationFromESN(esn, entityId, entityInfo);

		}

		for (EntitySrcSyn ess : entitySrcSyns) {
			if (! ess.getEntity_id().equals(asym.getEntity_id()))
				continue;
			addInfoFromESS(ess, entityId, entityInfo);

		}		
	}

	/**
	 * Add the information from an ESG to a compound.
	 * @param entitySrcInfo
	 * @param entityId
	 * @param c
	 */
	private void addInformationFromESG(EntitySrcGen entitySrcInfo, int entityId, EntityInfo c) {
		c.setAtcc(entitySrcInfo.getPdbx_gene_src_atcc());
		c.setCell(entitySrcInfo.getPdbx_gene_src_cell());
		c.setOrganismCommon(entitySrcInfo.getGene_src_common_name());
		c.setOrganismScientific(entitySrcInfo.getPdbx_gene_src_scientific_name());
		c.setOrganismTaxId(entitySrcInfo.getPdbx_gene_src_ncbi_taxonomy_id());
		c.setExpressionSystemTaxId(entitySrcInfo.getPdbx_host_org_ncbi_taxonomy_id());
		c.setExpressionSystem(entitySrcInfo.getPdbx_host_org_scientific_name());
	}

	/**
	 * Add the information to entity info from ESN.
	 * @param esn
	 * @param eId
	 * @param c
	 */
	private void addInformationFromESN(EntitySrcNat esn, int eId, EntityInfo c) {

		c.setAtcc(esn.getPdbx_atcc());
		c.setCell(esn.getPdbx_cell());
		c.setOrganismCommon(esn.getCommon_name());
		c.setOrganismScientific(esn.getPdbx_organism_scientific());
		c.setOrganismTaxId(esn.getPdbx_ncbi_taxonomy_id());

	}
	/**
	 * Add the information from ESS to Entity info.
	 * @param ess
	 * @param eId
	 * @param c
	 */
	private void addInfoFromESS(EntitySrcSyn ess, int eId, EntityInfo c) {
		c.setOrganismCommon(ess.getOrganism_common_name());
		c.setOrganismScientific(ess.getOrganism_scientific());
		c.setOrganismTaxId(ess.getNcbi_taxonomy_id());

	}

	private void initMaps() {


		if (structAsyms == null || structAsyms.isEmpty()) {
			logger.info("No _struct_asym category found in file. No asym id to entity_id mapping will be available");
			return;
		}

		Map<String, List<String>> entityId2asymId = new HashMap<>();

		for (StructAsym asym : structAsyms) {

			logger.debug("Entity {} matches asym_id: {}", asym.getEntity_id(), asym.getId() );

			asymId2entityId.put(asym.getId(), asym.getEntity_id());

			if (entityId2asymId.containsKey(asym.getEntity_id())) {
				List<String> asymIds = entityId2asymId.get(asym.getEntity_id());
				asymIds.add(asym.getId());
			} else {
				List<String> asymIds = new ArrayList<>();
				asymIds.add(asym.getId());
				entityId2asymId.put(asym.getEntity_id(), asymIds);
			}
		}

		if (entityPolys==null || entityPolys.isEmpty()) {
			logger.info("No _entity_poly category found in file. No asym id to author id mapping will be available for header only parsing");
			return;
		}

		for (EntityPoly ep:entityPolys) {
			if (ep.getPdbx_strand_id()==null) {
				logger.info("_entity_poly.pdbx_strand_id is null for entity {}. Won't be able to map asym ids to author ids for this entity.", ep.getEntity_id());
				continue;
			}
			String[] chainNames = ep.getPdbx_strand_id().split(",");
			List<String> asymIds = entityId2asymId.get(ep.getEntity_id());
			if (chainNames.length!=asymIds.size()) {
				logger.warn("The list of asym ids (from _struct_asym) and the list of author ids (from _entity_poly) for entity {} have different lengths! Can't provide a mapping from asym ids to author chain ids", ep.getEntity_id());
				continue;
			}
			for (int i=0; i<chainNames.length; i++) {
				asymId2authorId.put(asymIds.get(i), chainNames[i]);
			}
		}
	}
	
	private void setStructNcsOps() {
		
		ArrayList<Matrix4d> ncsOperators = new ArrayList<Matrix4d>();
		
		for (StructNcsOper sNcsOper:structNcsOper) {
			
			if (!sNcsOper.getCode().equals("generate")) continue;
			
			try {
				Matrix4d op = new Matrix4d();
				op.setElement(3, 0, 0.0);
				op.setElement(3, 1, 0.0);
				op.setElement(3, 2, 0.0);
				op.setElement(3, 3, 1.0);


				op.setElement(0, 0, Double.parseDouble(sNcsOper.getMatrix11()));
				op.setElement(0, 1, Double.parseDouble(sNcsOper.getMatrix12()));
				op.setElement(0, 2, Double.parseDouble(sNcsOper.getMatrix13()));

				op.setElement(1, 0, Double.parseDouble(sNcsOper.getMatrix21()));
				op.setElement(1, 1, Double.parseDouble(sNcsOper.getMatrix22()));
				op.setElement(1, 2, Double.parseDouble(sNcsOper.getMatrix23()));

				op.setElement(2, 0, Double.parseDouble(sNcsOper.getMatrix31()));
				op.setElement(2, 1, Double.parseDouble(sNcsOper.getMatrix32()));
				op.setElement(2, 2, Double.parseDouble(sNcsOper.getMatrix33()));

				op.setElement(0, 3, Double.parseDouble(sNcsOper.getVector1()));
				op.setElement(1, 3, Double.parseDouble(sNcsOper.getVector2()));
				op.setElement(2, 3, Double.parseDouble(sNcsOper.getVector3()));

				ncsOperators.add(op);
				
			} catch (NumberFormatException e) {
				logger.warn("Error parsing doubles in NCS operator list, skipping operator {}", structNcsOper.indexOf(sNcsOper)+1); 
			}

		}
		
		// we only set it if not empty, otherwise remains null
		if (ncsOperators.size()>0) {
			structure.getCrystallographicInfo().setNcsOperators(
					ncsOperators.toArray(new Matrix4d[ncsOperators.size()]));
		}
	}
	
	private void setCrystallographicInfoMetadata() {
		if (parsedScaleMatrix!=null) {
			
			PDBCrystallographicInfo crystalInfo = structure.getCrystallographicInfo();
			
			boolean nonStd = false;
			if (crystalInfo.getCrystalCell()!=null && !crystalInfo.getCrystalCell().checkScaleMatrix(parsedScaleMatrix)) {
				nonStd = true;
			}
			
			crystalInfo.setNonStandardCoordFrameConvention(nonStd); 
		}
	}


	/** This method will return the parsed protein structure, once the parsing has been finished
	 *
	 * @return a BioJava protein structure object
	 */
	public Structure getStructure() {

		return structure;
	}

	@Override
	public void newDatabasePDBrevRecord(DatabasePdbrevRecord record) {

		PDBHeader header = structure.getPDBHeader();

		if ( header == null) {
			header = new PDBHeader();
			structure.setPDBHeader(header);
		}

		List<DatabasePdbrevRecord> revRecords = header.getRevisionRecords();
		if ( revRecords == null) {
			revRecords = new ArrayList<DatabasePdbrevRecord>();
			header.setRevisionRecords(revRecords);
		}
		revRecords.add(record);


	}


	@Override
	public void newDatabasePDBrev(DatabasePDBrev dbrev) {
		//System.out.println("got a database revision:" + dbrev);
		SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd",Locale.US);
		PDBHeader header = structure.getPDBHeader();

		if ( header == null) {
			header = new PDBHeader();
		}


		if (dbrev.getNum().equals("1")){

			try {
				Date dep = dateFormat.parse(dbrev.getDate_original());
				header.setDepDate(dep);

			} catch (ParseException e){
				logger.warn("Could not parse date string '{}', deposition date will be unavailable", dbrev.getDate_original());
			}

			try {
				Date mod = dateFormat.parse(dbrev.getDate());
				header.setModDate(mod);

			} catch (ParseException e){
				logger.warn("Could not parse date string '{}', modification date will be unavailable", dbrev.getDate());
			}


		} else {
			try {

				Date mod = dateFormat.parse(dbrev.getDate());
				header.setModDate(mod);

			} catch (ParseException e){
				logger.warn("Could not parse date string '{}', modification date will be unavailable", dbrev.getDate());
			}
		}

		structure.setPDBHeader(header);
	}

	@Override
	public void newDatabasePDBremark(DatabasePDBremark remark) {
		//System.out.println(remark);
		String id = remark.getId();
		if (id.equals("2")){

			//this remark field contains the resolution information:
			String line = remark.getText();

			int i = line.indexOf("ANGSTROM");
			if ( i > 5) {
				// line contains ANGSTROM info...
				String resolution = line.substring(i-5,i).trim();
				// convert string to float
				float res = 99 ;
				try {
					res = Float.parseFloat(resolution);

				} catch (NumberFormatException e) {
					logger.info("could not parse resolution from line and ignoring it " + line);
					return ;


				}
				// support for old style header

				PDBHeader pdbHeader = structure.getPDBHeader();
				pdbHeader.setResolution(res);

			}

		}
	}

	@Override
	public void newRefine(Refine r){

		PDBHeader pdbHeader = structure.getPDBHeader();
		// RESOLUTION
		// in very rare cases (for instance hybrid methods x-ray + neutron diffraction, e.g. 3ins, 4n9m)
		// there are 2 resolution values, one for each method
		// we take the last one found so that behaviour is like in PDB file parsing
		if (pdbHeader.getResolution()!=PDBHeader.DEFAULT_RESOLUTION) {
			logger.warn("More than 1 resolution value present, will use last one {} and discard previous {} "
					,r.getLs_d_res_high(), String.format("%4.2f",pdbHeader.getResolution()));
		}
		try {
			pdbHeader.setResolution(Float.parseFloat(r.getLs_d_res_high()));
		} catch (NumberFormatException e){
			logger.info("Could not parse resolution from " + r.getLs_d_res_high() + " " + e.getMessage());
		}


		// RFREE
		if (pdbHeader.getRfree()!=PDBHeader.DEFAULT_RFREE) {
			logger.warn("More than 1 Rfree value present, will use last one {} and discard previous {} ",
					r.getLs_R_factor_R_free(), String.format("%4.2f",pdbHeader.getRfree()));
		}
		if (r.getLs_R_factor_R_free()==null) {
			// some entries like 2ifo haven't got this field at all
			logger.info("_refine.ls_R_factor_R_free not present, not parsing Rfree value");
		} else {
			try {
				pdbHeader.setRfree(Float.parseFloat(r.getLs_R_factor_R_free()));
			} catch (NumberFormatException e){
				// no rfree present ('?') is very usual, that's why we set it to debug
				logger.debug("Could not parse Rfree from string '{}'", r.getLs_R_factor_R_free());
			}
		}

		// RWORK
		if(pdbHeader.getRwork()!=PDBHeader.DEFAULT_RFREE) {
			logger.warn("More than 1 R work value present, will use last one {} and discard previous {} ",
					r.getLs_R_factor_R_work(), String.format("%4.2f",pdbHeader.getRwork()));
		}
		if(r.getLs_R_factor_R_work()==null){
			logger.info("_refine.ls_R_factor_R_work not present, not parsing R-work value");
		}
		else{
			try{
				pdbHeader.setRwork(Float.parseFloat(r.getLs_R_factor_R_work()));
			}
			catch (NumberFormatException e){
				logger.debug("Could not parse R-work from string '{}'", r.getLs_R_factor_R_work());
			}

		}

	}


	@Override
	public void newAuditAuthor(AuditAuthor aa){

		String name =  aa.getName();

		StringBuffer famName = new StringBuffer();
		StringBuffer initials = new StringBuffer();
		boolean afterComma = false;
		for ( char c: name.toCharArray()) {
			if ( c == ' ')
				continue;
			if ( c == ','){
				afterComma = true;
				continue;
			}

			if ( afterComma)
				initials.append(c);
			else
				famName.append(c);
		}

		StringBuffer newaa = new StringBuffer();
		newaa.append(initials);
		newaa.append(famName);

		PDBHeader header = structure.getPDBHeader();
		String auth = header.getAuthors();
		if (auth == null) {
			header.setAuthors(newaa.toString());
		}else {
			auth += "," + newaa.toString();
			header.setAuthors(auth);

		}
	}

	@Override
	public void newExptl(Exptl exptl) {

		PDBHeader pdbHeader = structure.getPDBHeader();
		String method = exptl.getMethod();
		pdbHeader.setExperimentalTechnique(method);

	}

	@Override
	public void newCell(Cell cell) {

		try {
			float a = Float.parseFloat(cell.getLength_a());
			float b = Float.parseFloat(cell.getLength_b());
			float c = Float.parseFloat(cell.getLength_c());
			float alpha = Float.parseFloat(cell.getAngle_alpha());
			float beta = Float.parseFloat(cell.getAngle_beta());
			float gamma = Float.parseFloat(cell.getAngle_gamma());

			CrystalCell xtalCell = new CrystalCell();
			xtalCell.setA(a);
			xtalCell.setB(b);
			xtalCell.setC(c);
			xtalCell.setAlpha(alpha);
			xtalCell.setBeta(beta);
			xtalCell.setGamma(gamma);

			if (!xtalCell.isCellReasonable()) {
				// If the entry describes a structure determined by a technique other than X-ray crystallography,
				// cell is (sometimes!) a = b = c = 1.0, alpha = beta = gamma = 90 degrees
				// if so we don't add and CrystalCell will be null
				logger.debug("The crystal cell read from file does not have reasonable dimensions (at least one dimension is below {}), discarding it.",
						CrystalCell.MIN_VALID_CELL_SIZE);
				return;
			}

			structure.getPDBHeader().getCrystallographicInfo().setCrystalCell(xtalCell);

		} catch (NumberFormatException e){
			structure.getPDBHeader().getCrystallographicInfo().setCrystalCell(null);
			logger.info("could not parse some cell parameters ("+e.getMessage()+"), ignoring _cell ");
		}
	}

	@Override
	public void newSymmetry(Symmetry symmetry) {
		String spaceGroup = symmetry.getSpace_group_name_H_M();
		SpaceGroup sg = SymoplibParser.getSpaceGroup(spaceGroup);
		if (sg==null) {
			logger.warn("Space group '"+spaceGroup+"' not recognised as a standard space group");
			structure.getPDBHeader().getCrystallographicInfo().setNonStandardSg(true);
		} else {
			structure.getPDBHeader().getCrystallographicInfo().setSpaceGroup(sg);
			structure.getPDBHeader().getCrystallographicInfo().setNonStandardSg(false);
		}
	}

	@Override
	public void newStructNcsOper(StructNcsOper sNcsOper) {
		structNcsOper.add(sNcsOper);
	}
	
	public void newAtomSites(AtomSites atomSites) {
		
		try {
			Matrix4d m = new Matrix4d(
				Double.parseDouble(atomSites.getFract_transf_matrix11()), Double.parseDouble(atomSites.getFract_transf_matrix12()), Double.parseDouble(atomSites.getFract_transf_matrix13()), Double.parseDouble(atomSites.getFract_transf_vector1()),
				Double.parseDouble(atomSites.getFract_transf_matrix21()), Double.parseDouble(atomSites.getFract_transf_matrix22()), Double.parseDouble(atomSites.getFract_transf_matrix23()), Double.parseDouble(atomSites.getFract_transf_vector2()),
				Double.parseDouble(atomSites.getFract_transf_matrix31()), Double.parseDouble(atomSites.getFract_transf_matrix32()), Double.parseDouble(atomSites.getFract_transf_matrix33()), Double.parseDouble(atomSites.getFract_transf_vector3()),
				0,0,0,1);

			parsedScaleMatrix = m;
		
		} catch (NumberFormatException e) {
			logger.warn("Some values in _atom_sites.fract_transf_matrix or _atom_sites.fract_transf_vector could not be parsed as numbers. Can't check whether coordinate frame convention is correct! Error: {}", e.getMessage());
			structure.getPDBHeader().getCrystallographicInfo().setNonStandardCoordFrameConvention(false);
			
			// in this case parsedScaleMatrix stays null and can't be used in documentEnd()
		}
	}

	@Override
	public void newStructRef(StructRef sref) {
		logger.debug(sref.toString());
		strucRefs.add(sref);
	}

	private StructRef getStructRef(String ref_id){
		for (StructRef structRef : strucRefs) {

			if (structRef.getId().equals(ref_id)){
				return structRef;
			}

		}
		return null;

	}

	/**
	 * create a DBRef record from the StrucRefSeq record:
	 * <pre>
	 * PDB record                    DBREF
	 * Field Name                    mmCIF Data Item
	 * Section                       n.a.
	 * PDB_ID_Code                   _struct_ref_seq.pdbx_PDB_id_code
	 * Strand_ID                     _struct_ref_seq.pdbx_strand_id
	 * Begin_Residue_Number          _struct_ref_seq.pdbx_auth_seq_align_beg
	 * Begin_Ins_Code                _struct_ref_seq.pdbx_seq_align_beg_ins_code
	 * End_Residue_Number            _struct_ref_seq.pdbx_auth_seq_align_end
	 * End_Ins_Code                  _struct_ref_seq.pdbx_seq_align_end_ins_code
	 * Database                      _struct_ref.db_name
	 * Database_Accession_No         _struct_ref_seq.pdbx_db_accession
	 * Database_ID_Code              _struct_ref.db_code
	 * Database_Begin_Residue_Number _struct_ref_seq.db_align_beg
	 * Databaes_Begin_Ins_Code       _struct_ref_seq.pdbx_db_align_beg_ins_code
	 * Database_End_Residue_Number   _struct_ref_seq.db_align_end
	 * Databaes_End_Ins_Code         _struct_ref_seq.pdbx_db_align_end_ins_code
	 * </pre>
	 *
	 *
	 */
	@Override
	public void newStructRefSeq(StructRefSeq sref) {
		//if (DEBUG)
		//	System.out.println(sref);
		DBRef r = new DBRef();


		//if (DEBUG)
		//	System.out.println( " " + sref.getPdbx_PDB_id_code() + " " + sref.getPdbx_db_accession());
		r.setIdCode(sref.getPdbx_PDB_id_code());
		r.setDbAccession(sref.getPdbx_db_accession());
		r.setDbIdCode(sref.getPdbx_db_accession());

		r.setChainId(sref.getPdbx_strand_id());
		StructRef structRef = getStructRef(sref.getRef_id());
		if (structRef == null){
			logger.info("could not find StructRef " + sref.getRef_id() + " for StructRefSeq " + sref);
		} else {
			r.setDatabase(structRef.getDb_name());
			r.setDbIdCode(structRef.getDb_code());
		}

		int seqbegin;
		int seqend;
		try{
			seqbegin = Integer.parseInt(sref.getPdbx_auth_seq_align_beg());
			seqend   = Integer.parseInt(sref.getPdbx_auth_seq_align_end());
		}
		catch(NumberFormatException e){
			logger.info("Couldn't parse sequence alignment positions.");
			logger.debug(e.toString());
			return;
		}
		Character begin_ins_code = new Character(sref.getPdbx_seq_align_beg_ins_code().charAt(0));
		Character end_ins_code   = new Character(sref.getPdbx_seq_align_end_ins_code().charAt(0));

		if (begin_ins_code == '?')
			begin_ins_code = ' ';

		if (end_ins_code == '?')
			end_ins_code = ' ';

		r.setSeqBegin(seqbegin);
		r.setInsertBegin(begin_ins_code);

		r.setSeqEnd(seqend);
		r.setInsertEnd(end_ins_code);

		int dbseqbegin = Integer.parseInt(sref.getDb_align_beg());
		int dbseqend   = Integer.parseInt(sref.getDb_align_end());
		Character db_begin_in_code = new Character(sref.getPdbx_db_align_beg_ins_code().charAt(0));
		Character db_end_in_code   = new Character(sref.getPdbx_db_align_end_ins_code().charAt(0));

		if (db_begin_in_code == '?')
			db_begin_in_code = ' ';

		if (db_end_in_code == '?')
			db_end_in_code = ' ';


		r.setDbSeqBegin(dbseqbegin);
		r.setIdbnsBegin(db_begin_in_code);

		r.setDbSeqEnd(dbseqend);
		r.setIdbnsEnd(db_end_in_code);

		List<DBRef> dbrefs = structure.getDBRefs();
		if ( dbrefs == null)
			dbrefs = new ArrayList<DBRef>();
		dbrefs.add(r);

		logger.debug(r.toPDB());

		structure.setDBRefs(dbrefs);

	}

	@Override
	public void newStructRefSeqDif(StructRefSeqDif sref) {
		sequenceDifs.add(sref);
	}

	private Chain getEntityChain(String entity_id){

		for (Chain chain : entityChains) {
			if ( chain.getId().equals(entity_id)){

				return chain;
			}
		}
		// does not exist yet, so create...

		Chain	chain = new ChainImpl();
		chain.setId(entity_id);
		entityChains.add(chain);

		return chain;

	}

	//private Chain getSeqResChain(String chainID){
	//	return getChainFromList(seqResChains, chainID);
	//}


	/**
	 * Data items in the ENTITY_SRC_GEN category record details of
	 * the source from which the entity was obtained in cases
	 * where the source was genetically manipulated.  The
	 * following are treated separately:  items pertaining to the tissue
	 * from which the gene was obtained, items pertaining to the host
	 * organism for gene expression and items pertaining to the actual
	 * producing organism (plasmid).
	 */
	@Override
	public void newEntitySrcGen(EntitySrcGen entitySrcGen){

		// add to internal list. Map to Compound object later on...
		entitySrcGens.add(entitySrcGen);
	}

	@Override
	public void newEntitySrcNat(EntitySrcNat entitySrcNat){

		// add to internal list. Map to Compound object later on...
		entitySrcNats.add(entitySrcNat);
	}

	@Override
	public void newEntitySrcSyn(EntitySrcSyn entitySrcSyn){

		// add to internal list. Map to Compound object later on...
		entitySrcSyns.add(entitySrcSyn);
	}

	/**
	 * The EntityPolySeq object provide the amino acid sequence objects for the Entities.
	 * Later on the entities are mapped to the BioJava {@link Chain} and {@link EntityInfo} objects.
	 * @param epolseq the EntityPolySeq record for one amino acid
	 */
	@Override
	public void newEntityPolySeq(EntityPolySeq epolseq) {

		logger.debug("NEW entity poly seq " + epolseq);

		int eId = -1;
		try {
			eId = Integer.parseInt(epolseq.getEntity_id());
		} catch (NumberFormatException e) {
			logger.warn("Could not parse entity id from EntityPolySeq: "+e.getMessage());
		}
		Entity e = getEntity(eId);

		if (e == null){
			logger.info("Could not find entity "+ epolseq.getEntity_id()+". Can not match sequence to it.");
			return;
		}

		Chain entityChain = getEntityChain(epolseq.getEntity_id());

		// first we check through the chemcomp provider, if it fails we do some heuristics to guess the type of group
		// TODO some of this code is analogous to getNewGroup() and we should try to unify them - JD 2016-03-08

		Group g = ChemCompGroupFactory.getGroupFromChemCompDictionary(epolseq.getMon_id());
		//int seqId = Integer.parseInt(epolseq.getNum());
		if ( g != null && !g.getChemComp().isEmpty()) {
			if ( g instanceof AminoAcidImpl) {
				AminoAcidImpl aa = (AminoAcidImpl) g;
				aa.setRecordType(AminoAcid.SEQRESRECORD);
				//aa.setId(seqId);
			}
		} else {

			if (epolseq.getMon_id().length()==3 && StructureTools.get1LetterCodeAmino(epolseq.getMon_id())!=null){
				AminoAcidImpl a = new AminoAcidImpl();
				a.setRecordType(AminoAcid.SEQRESRECORD);
				Character code1 = StructureTools.get1LetterCodeAmino(epolseq.getMon_id());
				a.setAminoType(code1);
				g = a;

			} else if ( StructureTools.isNucleotide(epolseq.getMon_id())) {
				// the group is actually a nucleotide group...
				NucleotideImpl n = new NucleotideImpl();
				g = n;

			} else {
				logger.debug("Residue {} {} is not a standard aminoacid or nucleotide, will create a het group for it", epolseq.getNum(),epolseq.getMon_id());
				HetatomImpl h = new HetatomImpl();
				g = h;

			}


		}
		// at this stage we don't know about author residue numbers (insertion codes)
		// we abuse now the ResidueNumber field setting the internal residue numbers (label_seq_id, strictly sequential and follow the seqres sequence 1 to n)
		// later the actual ResidueNumbers (author residue numbers) have to be corrected in alignSeqRes()
		g.setResidueNumber(ResidueNumber.fromString(epolseq.getNum()));

		g.setPDBName(epolseq.getMon_id());

		entityChain.addGroup(g);

	}

	@Override
	public void newPdbxPolySeqScheme(PdbxPolySeqScheme ppss) {

		//if ( headerOnly)
		//	return;

		// replace the group asym ids with the real PDB ids!
		// replaceGroupSeqPos(ppss);  // This might be incorrect in some pdb, to use auth_seq_id of the pdbx_poly_seq_scheme.


	}


	@Override
	public void newPdbxNonPolyScheme(PdbxNonPolyScheme ppss) {

		//if (headerOnly)
		//	return;

		// merge the EntityPolySeq info and the AtomSite chains into one...
		//already known ignore:

	}

	@Override
	public void newPdbxEntityNonPoly(PdbxEntityNonPoly pen){
		// TODO: do something with them...
		// not implemented yet...
		logger.debug(pen.getEntity_id() + " " + pen.getName() + " " + pen.getComp_id());

	}

	@Override
	public void newChemComp(ChemComp c) {
		// TODO: do something with them...

	}

	@Override
	public void newGenericData(String category, List<String> loopFields,
			List<String> lineData) {

		//logger.debug("unhandled category so far: " + category);
	}

	@Override
	public FileParsingParameters getFileParsingParameters()
	{
		return params;
	}

	@Override
	public void setFileParsingParameters(FileParsingParameters params)
	{
		this.params = params;

	}

	@Override
	public void newChemCompDescriptor(ChemCompDescriptor ccd) {

		// TODO nothing happening here yet.

	}



	public List<PdbxStructOperList> getStructOpers() {
		return structOpers;
	}

	@Override
	public void newPdbxStrucAssembly(PdbxStructAssembly strucAssembly) {
		strucAssemblies.add(strucAssembly);

	}

	public List<PdbxStructAssembly> getStructAssemblies(){
		return strucAssemblies;
	}

	@Override
	public void newPdbxStrucAssemblyGen(PdbxStructAssemblyGen strucAssembly) {
		strucAssemblyGens.add(strucAssembly);

	}

	public List<PdbxStructAssemblyGen> getStructAssemblyGens(){
		return strucAssemblyGens;
	}

	@Override
	public void newChemCompAtom(ChemCompAtom atom) {

	}

	@Override
	public void newPdbxChemCompIndentifier(PdbxChemCompIdentifier id) {

	}

	@Override
	public void newChemCompBond(ChemCompBond bond) {

	}

	@Override
	public void newPdbxChemCompDescriptor(PdbxChemCompDescriptor desc) {

	}

	@Override
	public void newStructConn(StructConn structConn) {
		this.structConn.add(structConn);
	}

	@Override
	public void newStructSiteGen(StructSiteGen siteGen) { this.structSiteGens.add(siteGen);	}

	@Override
	public void newStructSite(StructSite structSite) {

		if (params.isHeaderOnly()) {
			return;
		}

		// Simply implement the method.
		List<Site> sites = structure.getSites();
		if (sites == null) sites = new ArrayList<Site>();

		Site site = null;
		for (Site asite : sites) {
			if (asite.getSiteID().equals(structSite.getId())) {
				site = asite; 		// Prevent duplicate siteIds
			}
		}
		boolean addSite = false;
		if (site == null) { site = new Site(); addSite = true; }
		site.setSiteID(structSite.getId());
		site.setDescription(structSite.getDetails());
		// site.setPdbxEvidenceCode(structSite.getPdbxEvidenceCode()); // TODO - add addition fields in Sites
		if (addSite) sites.add(site);

		structure.setSites(sites);
	}

	/**
	 * Build sites in a BioJava Structure using the original author chain id & residue numbers.
	 * Sites are built from struct_site_gen records that have been parsed.
	 */
	private void addSites() {
		List<Site> sites = structure.getSites();
		if (sites == null) sites = new ArrayList<Site>();

		for (StructSiteGen siteGen : structSiteGens) {
			// For each StructSiteGen, find the residues involved, if they exist then
			String site_id = siteGen.getSite_id(); // multiple could be in same site.
			if (site_id == null) site_id = "";
			String comp_id = siteGen.getLabel_comp_id();  // PDBName

			// Assumption: the author chain ID and residue number for the site is consistent with the original
			// author chain id and residue numbers.

			String asymId = siteGen.getLabel_asym_id(); // chain name
			String authId = siteGen.getAuth_asym_id(); // chain Id
			String auth_seq_id = siteGen.getAuth_seq_id(); // Res num

			String insCode = siteGen.getPdbx_auth_ins_code();
			if ( insCode != null && insCode.equals("?"))
				insCode = null;

			// Look for asymID = chainID and seqID = seq_ID.  Check that comp_id matches the resname.
			Group g = null;
			try {
				Chain chain = structure.getChain(asymId);

				if (null != chain) {
					try {
						Character insChar = null;
						if (null != insCode && insCode.length() > 0) insChar = insCode.charAt(0);
						g = chain.getGroupByPDB(new ResidueNumber(null, Integer.parseInt(auth_seq_id), insChar));
					} catch (NumberFormatException e) {
						logger.warn("Could not lookup residue : " + authId + auth_seq_id);
					}
				}
			} catch (StructureException e) {
				logger.warn("Problem finding residue in site entry " + siteGen.getSite_id() + " - " + e.getMessage(), e.getMessage());
			}

			if (g != null) {
				// 2. find the site_id, if not existing, create anew.
				Site site = null;
				for (Site asite: sites) {
					if (site_id.equals(asite.getSiteID())) site = asite;
				}

				boolean addSite = false;

				// 3. add this residue to the site.
				if (site == null) {
					addSite = true;
					site = new Site();
					site.setSiteID(site_id);
				}

				List<Group> groups = site.getGroups();
				if (groups == null) groups = new ArrayList<Group>();

				// Check the self-consistency of the residue reference from auth_seq_id and chain_id
				if (!comp_id.equals(g.getPDBName())) {
					logger.warn("comp_id doesn't match the residue at " + authId + " " + auth_seq_id + " - skipping");
				} else {
					groups.add(g);
					site.setGroups(groups);
				}
				if (addSite) sites.add(site);
			}
		}
		structure.setSites(sites);
	}
}