package statalign.io.input.plugins; import java.io.BufferedReader; import java.io.FileWriter; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import statalign.io.ProteinSkeletons; import statalign.io.RawSequences; import statalign.io.input.DataReader; import statalign.io.input.IllegalFormatException; public class PDBReader extends DataReader { public PDBReader() { } @Override public List<String> supportedExtensions() { return Arrays.asList(new String[] { "pdb", "PDB" }); } @Override public ProteinSkeletons read(Reader reader) throws IOException { ProteinSkeletons data = new ProteinSkeletons(); String sequence = ""; BufferedReader br = new BufferedReader(reader); String line = null; int lineNumber = 0; try { data.names.add(new String()); data.coords.add(new ArrayList<double[]>()); data.bFactors.add(new ArrayList<Double>()); HashMap<Integer,Boolean> seen = new HashMap<Integer,Boolean>(); while((line = br.readLine()) != null) { if(++lineNumber == 1) { if (line.startsWith("HEADER") && line.length() > 62) { if (line.length() > 66) data.names.set(0,line.substring(62,67).toLowerCase().replaceAll("\\s+","")); else data.names.set(0,line.substring(62,66).toLowerCase()); } else { // If HEADER lines have been removed from PDB if (filename.length() > 4) data.names.set(0,filename.toLowerCase().substring(0,5)); else data.names.set(0,filename.toLowerCase().substring(0,4)); // then use the filename to define the name for this structure } continue; } if(line.startsWith("ATOM")) { int resn = Integer.parseInt(line.substring(22, 26).replaceAll("\\s+","")); if (seen.containsKey(resn)) continue; // so we don't include alt. conformers if (line.substring(12, 16).equals(" CA ")) { sequence += oneLetter(line.substring(17, 20)); double x = Double.parseDouble(line.substring(30, 38)); double y = Double.parseDouble(line.substring(38, 46)); double z = Double.parseDouble(line.substring(46, 54)); data.coords.get(0).add(new double[]{x,y,z}); if (line.length() > 61) { data.bFactors.get(0).add(Double.parseDouble(line.substring(60,66))); } seen.put(resn,true); } } } } catch (NumberFormatException e) { throw new IllegalFormatException("PDBReader: number formatting error: "+e.getMessage()); } if(lineNumber < 1) throw new IllegalFormatException("PDBReader: empty file"); if(data.coords.get(0).size() == 0) throw new IllegalFormatException("PDBReader: sequence "+data.names.get(0)+" is without a structure"); data.seqs = new RawSequences(sequence,data.names.get(0)); // for (int i=0; i<data.bFactors.get(0).size(); i++) { // System.out.println(i+"\t"+data.seqs.getSequence(0).charAt(i)+"\t" // +data.coords.get(0).get(i)[0]+"\t" // +data.coords.get(0).get(i)[1]+"\t" // +data.coords.get(0).get(i)[2]+"\t" // +data.bFactors.get(0).get(i)); // } return data; } public static void writePDB(double[][][] coors, String[] seqs, String[] names, FileWriter fw) throws IOException { initialiseMp13(); String format = "ATOM %5d CA %3s %1c%4d %8.3f%8.3f%8.3f\n"; String format2 = "TER %5d %3s %1c%4d\n"; System.out.println(coors.length); try { for (int i=0; i<coors.length; i++) { if (coors[i]==null) continue; double coorSum = 0; for (int j=0; j<coors[i].length; j++) { coorSum += coors[i][j][0]; } if (coorSum==0) continue; char chain = (char) ('A' + i); fw.write("HEADER "+names[i]+"\n"); int j=0; for (; j<coors[i].length; j++) { fw.write(String.format(format, j+1,mp13.get(seqs[i].substring(j,j+1)),chain,j+1,coors[i][j][0],coors[i][j][1],coors[i][j][2])); } fw.write(String.format(format2,j+1,mp13.get(seqs[i].substring(j-1,j)),chain,j)); } fw.write("END\n"); } catch (IOException e) { e.printStackTrace(); } } public HashMap<String,String> mp31 = null; public String oneLetter(String threeLetter) { if (mp31 == null) initialiseMp31(); String one = mp31.get(threeLetter); if (one == null) throw new RuntimeException("Unrecognised amino acid: "+threeLetter); return one; } private void initialiseMp31() { mp31 = new HashMap<String,String>(); mp31.put("ALA","A"); mp31.put("ARG","R"); mp31.put("ASN","N"); mp31.put("ASP","D"); mp31.put("ASX","B"); mp31.put("CYS","C"); mp31.put("GLU","E"); mp31.put("GLN","Q"); mp31.put("GLX","Z"); mp31.put("GLY","G"); mp31.put("HIS","H"); mp31.put("HSD","H"); mp31.put("ILE","I"); mp31.put("LEU","L"); mp31.put("LYS","K"); mp31.put("MET","M"); mp31.put("PHE","F"); mp31.put("PRO","P"); mp31.put("SER","S"); mp31.put("THR","T"); mp31.put("TRP","W"); mp31.put("TYR","Y"); mp31.put("VAL","V"); } public static HashMap<String,String> mp13 = null; private static void initialiseMp13() { if (mp13 != null) return; mp13 = new HashMap<String,String>(); mp13.put("A","ALA"); mp13.put("R","ARG"); mp13.put("N","ASN"); mp13.put("D","ASP"); mp13.put("B","ASX"); mp13.put("C","CYS"); mp13.put("E","GLU"); mp13.put("Q","GLN"); mp13.put("Z","GLX"); mp13.put("G","GLY"); mp13.put("H","HIS"); mp13.put("H","HSD"); mp13.put("I","ILE"); mp13.put("L","LEU"); mp13.put("K","LYS"); mp13.put("M","MET"); mp13.put("F","PHE"); mp13.put("P","PRO"); mp13.put("S","SER"); mp13.put("T","THR"); mp13.put("W","TRP"); mp13.put("Y","TYR"); mp13.put("V","VAL"); mp13.put("X","XXX"); } }