Source code for ssbio.protein.structure.properties.freesasa

import subprocess
import ssbio.utils
import os
import os.path as op
from collections import OrderedDict


[docs]def run_freesasa(infile, outfile, include_hetatms=True, outdir=None, force_rerun=False): """Run freesasa on a PDB file, output using the NACCESS RSA format. Args: infile (str): Path to PDB file (only PDB file format is accepted) outfile (str): Path or filename of output file include_hetatms (bool): If heteroatoms should be included in the SASA calculations outdir (str): Path to output file if not specified in outfile force_rerun (bool): If freesasa should be rerun even if outfile exists Returns: str: Path to output SASA file """ if not outdir: outdir = '' outfile = op.join(outdir, outfile) if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile): if op.exists(outfile): os.remove(outfile) if include_hetatms: shell_command = 'freesasa --format=rsa --hetatm {} -o {}'.format(infile, outfile) else: shell_command = 'freesasa --format=rsa {} -o {}'.format(infile, outfile) command = subprocess.Popen(shell_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) out, err = command.communicate()
return outfile
[docs]def parse_rsa_data(rsa_outfile, ignore_hets=True): """Process a NACCESS or freesasa RSA output file. Adapted from Biopython NACCESS modele. Args: rsa_outfile (str): Path to RSA output file ignore_hets (bool): If HETATMs should be excluded from the final dictionary. This is extremely important when loading this information into a ChainProp's SeqRecord, since this will throw off the sequence matching. Returns: dict: Per-residue dictionary of RSA values """ naccess_rel_dict = OrderedDict() with open(rsa_outfile, 'r') as f: for line in f: if line.startswith('RES'): res_name = line[4:7] chain_id = line[8] resseq = int(line[9:13]) icode = line[13] res_id = (' ', resseq, icode) all_atoms_abs = line[16:22].strip() all_atoms_rel = line[23:28].strip() side_chain_abs = line[29:35].strip() side_chain_rel = line[36:41].strip() main_chain_abs = line[42:48].strip() main_chain_rel = line[49:54].strip() non_polar_abs = line[55:61].strip() non_polar_rel = line[62:67].strip() all_polar_abs = line[68:74].strip() all_polar_rel = line[75:80].strip() if all_atoms_rel =='N/A' and main_chain_rel =='N/A' and all_polar_rel =='N/A' and non_polar_rel =='N/A' and side_chain_rel =='N/A' and ignore_hets: continue naccess_rel_dict[(chain_id, res_id)] = { 'res_name' : res_name, 'all_atoms_abs' : ssbio.utils.conv_to_float(all_atoms_abs, inf_str='N/A'), 'all_atoms_rel' : ssbio.utils.conv_to_float(all_atoms_rel, inf_str='N/A'), 'side_chain_abs': ssbio.utils.conv_to_float(side_chain_abs, inf_str='N/A'), 'side_chain_rel': ssbio.utils.conv_to_float(side_chain_rel, inf_str='N/A'), 'main_chain_abs': ssbio.utils.conv_to_float(main_chain_abs, inf_str='N/A'), 'main_chain_rel': ssbio.utils.conv_to_float(main_chain_rel, inf_str='N/A'), 'non_polar_abs' : ssbio.utils.conv_to_float(non_polar_abs, inf_str='N/A'), 'non_polar_rel' : ssbio.utils.conv_to_float(non_polar_rel, inf_str='N/A'), 'all_polar_abs' : ssbio.utils.conv_to_float(all_polar_abs, inf_str='N/A'), 'all_polar_rel' : ssbio.utils.conv_to_float(all_polar_rel, inf_str='N/A')}
return naccess_rel_dict