/*
The MIT License (MIT)
Copyright (c) 2015 Pierre Lindenbaum
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
History:
* 2015 creation
*/
package com.github.lindenb.jvarkit.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import com.github.lindenb.jvarkit.io.IOUtils;
import com.github.lindenb.jvarkit.util.log.Logger;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
public class Pedigree
{
private static final Logger LOG = Logger.build(Pedigree.class).make();
public static final String OPT_DESCRIPTION="A pedigree is a text file delimited with tabs. No header. Columns are (1) Family (2) Individual-ID (3) Father Id or '0' (4) Mother Id or '0' (5) Sex : 1 male/2 female / 0 unknown (6) Status : 0 unaffected, 1 affected,-9 unknown ";
private Map<String,FamilyImpl > families=new TreeMap<String, Pedigree.FamilyImpl>();
public enum Status
{
missing,unaffected,affected;
public int intValue()
{
switch(this)
{
case missing: return -9;
case unaffected: return 0;
case affected: return 1;
default:throw new IllegalStateException();
}
}
}
public enum Sex
{
male,female,unknown;
public int intValue()
{
switch(this)
{
case male: return 1;
case female: return 2;
case unknown: return 0;
default:throw new IllegalStateException();
}
}
}
public interface Family extends Comparable<Family>
{
public String getId();
public Person getPersonById(String s);
public java.util.Collection<? extends Person> getIndividuals();
public Family validate() throws IllegalStateException;
@Override
default int compareTo(final Family o) {
return this.getId().compareTo(o.getId());
}
}
private class FamilyImpl
implements Family
{
private String id;
private Map<String,PersonImpl> individuals=new TreeMap<String,PersonImpl>();
@Override
public String getId()
{
return this.id;
}
@Override
public Person getPersonById(String s)
{
return individuals.get(s);
}
@Override
public java.util.Collection<? extends Person> getIndividuals()
{
return this.individuals.values();
}
@Override
public int hashCode() {
return id.hashCode();
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
final FamilyImpl other = (FamilyImpl) obj;
return id.equals(other.id);
}
@Override
public String toString() {
return this.id;
}
@Override
public Family validate() throws IllegalStateException {
for(final PersonImpl p: this.individuals.values()) {
p.validate();
}
return this;
}
}
public interface Person
extends Comparable<Person>
{
public String getId();
public Family getFamily();
public boolean hasFather();
public Person getFather();
public boolean hasMother();
public Person getMother();
/** returns the father id or null if there is no father */
public default String getFatherId() {
final Person p = getFather();
return p==null?null:p.getId();
}
public default String getMotherId() {
final Person p = getMother();
return p==null?null:p.getId();
}
public Sex getSex();
public boolean isMale();
public boolean isFemale();
public Status getStatus();
/** return i-th parent 0=father 1=mother */
public Person getParent( int zeroOrOne);
public Person validate() throws IllegalStateException;
public boolean isAffected();
public boolean isUnaffected();
@Override
default int compareTo(final Person o) {
int i= getFamily().compareTo(o.getFamily());
if(i!=0) return i;
return this.getId().compareTo(o.getId());
}
}
private class PersonImpl
implements Person
{
FamilyImpl family;
String id;
String fatherId=null;
String motherId=null;
Sex sex=Sex.unknown;
Status status=Status.unaffected;
@Override
public Family getFamily()
{
return this.family;
}
@Override
public String getId()
{
return this.id;
}
@Override
public boolean isMale() { return Sex.male.equals(this.getSex());}
@Override
public boolean isFemale() { return Sex.female.equals(this.getSex());}
@Override
public boolean isAffected() { return Status.affected.equals(this.getStatus());}
@Override
public boolean isUnaffected() { return Status.unaffected.equals(this.getStatus());}
private Person getParent(final String s)
{
if(s==null || s.isEmpty() || s.equals("0")) return null;
return getFamily().getPersonById(s);
}
public Person getParent( int zeroOrOne) {
switch(zeroOrOne) {
case 0: return getFather();
case 1: return getMother();
default: throw new IllegalArgumentException("0 or 1 but got "+zeroOrOne);
}
}
private boolean hasParent(final String s)
{
return !(s==null || s.isEmpty() || s.equals("0"));
}
@Override
public boolean hasFather() {
return hasParent(this.fatherId);
}
@Override
public boolean hasMother() {
return hasParent(this.motherId);
}
public Person getFather()
{
return getParent(fatherId);
}
public Person getMother()
{
return getParent(motherId);
}
@Override
public Status getStatus()
{
return status;
}
@Override
public Sex getSex()
{
return sex;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + family.hashCode();
result = prime * result + id.hashCode();
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null || getClass() != obj.getClass()) {
return false;
}
final PersonImpl p =(PersonImpl)obj;
return this.family.equals(p.family) &&
this.id.equals(p.id);
}
@Override
public Person validate() throws IllegalStateException {
if(this.fatherId!=null && !this.fatherId.equals("0")) {
final PersonImpl parent = this.family.individuals.get(this.fatherId);
if(parent==null) throw new IllegalStateException(
"Individual "+this.toString()+" has father "+this.fatherId+" "+
"but he is missing in family."
);
if(parent.sex == Sex.female) throw new IllegalStateException(
"Individual "+this.toString()+" has father "+this.fatherId+" "+
"but he is declared as a woman."
);
}
if(this.motherId!=null && !this.motherId.equals("0")) {
final PersonImpl parent = this.family.individuals.get(this.motherId);
if(parent==null) throw new IllegalStateException(
"Individual "+this.toString()+" has mother "+this.motherId+" "+
"but she is missing in family."
);
if(parent.sex == Sex.male) throw new IllegalStateException(
"Individual "+this.toString()+" has mother "+this.motherId+" "+
"but she is declared as a man."
);
}
return this;
}
@Override
public String toString() {
return family+":"+this.id;
}
}
private Pedigree()
{
}
public boolean isEmpty() {
return this.families.isEmpty();
}
/** utility function for vcf, returns true if all person's ID in the pedigree
* are unique (no same ID shared by two families
*/
public boolean verifyPersonsHaveUniqueNames() {
final Set<String> m = new HashSet<String>();
for(final Family f:families.values())
{
for(final Person p:f.getIndividuals()) {
if(m.contains(p.getId())) {
return false;
}
m.add(p.getId());
}
}
return true;
}
/** utility function for vcf, return a Map<person.id,Person>
* will throw an illegalState if two individual have the same ID (but ! families )
* @return Map<person.id,Person>
*/
public Map<String, Person> getPersonsMap() {
final Map<String, Person> m = new TreeMap<String, Person>();
for(final Family f:families.values())
{
for(final Person p:f.getIndividuals()) {
final Person prev = m.get(p.getId());
if(prev!=null) {
throw new IllegalStateException(
"Cannot create a Map<String, Person> because "+prev+" and "+p + " share the same ID : "+p.getId()
);
}
m.put(p.getId(), p);
}
}
return m;
}
/** validate pedigree */
public Pedigree validate() throws IllegalStateException {
for(final Family f: getFamilies()) f.validate();
return this;
}
/** get all the families in this pedigree */
public java.util.Collection<? extends Family> getFamilies()
{
return this.families.values();
}
public Family getFamilyById(final String famId) {
return this.families.get(famId);
}
/** get an individual by id, assume individual-ID are unique */
public Person getPersonById(final String id) {
for(final Family fam:this.families.values())
{
final Person p = fam.getPersonById(id);
if(p!=null) return p;
}
return null;
}
/** get all the individuals in this pedigree */
public java.util.Set<Person> getPersons()
{
final java.util.Set<Person> set = new HashSet<>();
for(final Family f:families.values())
{
set.addAll(f.getIndividuals());
}
return set;
}
/** get affected individuals */
public java.util.Set<Person> getAffected()
{
return getPersons().stream().filter(P->P.getStatus()==Status.affected).collect(Collectors.toSet());
}
/** get unaffected individuals */
public java.util.Set<Person> getUnaffected()
{
return getPersons().stream().filter(P->P.getStatus()==Status.unaffected).collect(Collectors.toSet());
}
@Deprecated //use Pedigree.Parser.parse
public static Pedigree readPedigree(final File f) throws IOException
{
return newParser().parse(f);
}
@Deprecated //use Pedigree.Parser.parse
public static Pedigree readPedigree(final BufferedReader r) throws IOException
{
return newParser().parse(r);
}
public static final String VcfHeaderKey="Sample";
@Deprecated //use Pedigree.Parser.parse
public static Pedigree readPedigree(final VCFHeader header) {
return newParser().parse(header);
}
@Deprecated //use Pedigree.Parser.parse
public static Pedigree readPedigree(final Collection<VCFHeaderLine> metadata) {
return newParser().parse(metadata);
}
public Set<VCFHeaderLine> toVCFHeaderLines() {
final Set<VCFHeaderLine> set = new LinkedHashSet<>();
for(final Family f:families.values())
{
for(final Person p:f.getIndividuals()) {
final StringBuilder sb=new StringBuilder();
sb.append("<Family=");
sb.append(f.getId());
sb.append(",ID=");
sb.append(p.getId());
sb.append(",Father=");
sb.append(p.getFather()==null?"0":p.getFather().getId());
sb.append(",Mother=");
sb.append(p.getMother()==null?"0":p.getMother().getId());
sb.append(",Sex=");
sb.append(p.getSex().intValue());
sb.append(",Status=");
sb.append(p.getStatus().intValue());
sb.append(">");
set.add(new VCFHeaderLine(VcfHeaderKey, sb.toString()));
}
}
return set;
}
/** creates a new PEdigree parser */
public static Parser newParser()
{
return new Parser();
}
/** how to interpret the 'affected' column */
public enum StatusModel implements Function<String, Status>{
un0af1() {
@Override
public Status apply(String status) {
if(status==null) return null;
if(status.equals("1")) return Status.affected;
else if(status.equals("0")) return Status.unaffected;
return null;
}
};
};
public static final StatusModel DefaultStatusModel = StatusModel.un0af1;
public static class Parser
{
private final Pattern tab = Pattern.compile("[\t]");
private StatusModel statusModel = Pedigree.DefaultStatusModel;
private boolean statusRequired=false;
public Pedigree parse(final File f) throws IOException
{
try(BufferedReader r= IOUtils.openFileForBufferedReading(f)) {
return this.parse(r);
}
}
public Pedigree parse(final BufferedReader r)throws IOException
{
final Pedigree ped = new Pedigree();
r.lines().forEach(L->{
if(L.isEmpty() || L.startsWith("#")) return;
read(ped,tab.split(L));
});
return ped;
}
public Parser statusModel(StatusModel statusModel)
{
this.statusModel = statusModel;
return this;
}
public Parser statusIsRequired(boolean statusRequired)
{
this.statusRequired = statusRequired;
return this;
}
private void read(final Pedigree ped,final String tokens[])
{
final String fam= tokens[0];
final String indi = tokens[1];
final String father = tokens[2];
final String mother = tokens[3];
final String sex = (tokens.length>4?tokens[4]:"");
final String status = (tokens.length>5?tokens[5]:"");
build(ped,fam,indi,father,mother,sex,status);
}
public Pedigree parse(final VCFHeader h)
{
return this.parse(h.getMetaDataInInputOrder());
}
private void build(final Pedigree ped,final String famId,final String indiId,final String fatherId,final String motherId,final String sexxx,final String status)
{
FamilyImpl fam=ped.families.get(famId);
if(fam==null)
{
fam=ped.new FamilyImpl();
fam.id = famId;
ped.families.put(famId, fam);
}
if(fam.getPersonById(indiId)!=null) throw new IllegalArgumentException("duplicate individual: "+String.join(" ; ", famId,indiId,fatherId,motherId,sexxx,status));
final PersonImpl p=ped.new PersonImpl();
p.family=fam;
p.id=indiId;
p.fatherId=fatherId;
p.motherId=motherId;
if(sexxx!=null)
{
if(sexxx.equals("1")) p.sex=Sex.male;
else if(sexxx.equals("2")) p.sex=Sex.female;
}
if(status!=null)
{
final Status st= this.statusModel.apply(status);
if(st!=null ) p.status=st;
}
else if(this.statusRequired) {
throw new IllegalArgumentException("status must be declared");
}
fam.individuals.put(p.id, p);
}
/** should be readPedigree(header.getMetaDataInInputOrder()) */
public Pedigree parse(final Collection<VCFHeaderLine> metadata)
{
final Pattern comma = Pattern.compile("[,]");
final Pedigree ped=new Pedigree();
for(final VCFHeaderLine h:metadata) {
final String key = h.getKey();
if(!VcfHeaderKey.equals(key)) continue;
final String value =h.getValue();
if(!value.startsWith("<")) {
LOG.warn("in "+VcfHeaderKey+" value doesn't start with '<' "+value);
continue;
}
if(!value.endsWith(">")) {
LOG.warn("in "+VcfHeaderKey+" value doesn't end with '>' "+value);
continue;
}
String familyId=null;
String indiId=null;
String fatherId=null;
String motherId=null;
String sexx=null;
String status=null;
for(final String t:comma.split(value.substring(1, value.length()-1))) {
final int eq = t.indexOf("=");
if(eq==-1)
{
LOG.warn("'=' missing in "+t+" of "+value);
continue;
}
final String left = t.substring(0,eq);
if(left.equals("Family")) {
if(familyId!=null) throw new IllegalArgumentException("Family defined twice in " +value);
familyId= t.substring(eq+1).trim();
}
else if(left.equals("ID")) {
if(indiId!=null) throw new IllegalArgumentException("ID defined twice in " +value);
indiId= t.substring(eq+1).trim();
}
else if(left.equals("Father")) {
if(fatherId!=null) throw new IllegalArgumentException("fatherId defined twice in " +value);
fatherId= t.substring(eq+1).trim();
}
else if(left.equals("Mother")) {
if(motherId!=null) throw new IllegalArgumentException("mother defined twice in " +value);
motherId= t.substring(eq+1).trim();
}
else if(left.equals("Sex")) {
if(sexx!=null) throw new IllegalArgumentException("sex defined twice in " +value);
sexx= t.substring(eq+1).trim();
}
else if(left.equals("Status")) {
if(status!=null) throw new IllegalArgumentException("status defined twice in " +value);
status= t.substring(eq+1).trim();
}
}
if(familyId==null) throw new IllegalArgumentException("Family undefined in " +value);
if(indiId==null) throw new IllegalArgumentException("ID undefined in " +value);
build(ped,familyId,indiId,fatherId,motherId,sexx,status);
}
return ped;
}
}
}