Source code for ssbio.protein.structure.properties.freesasa

import subprocess
import ssbio.utils
import os
import os.path as op
from collections import OrderedDict


[docs]def run_freesasa(infile, outfile, include_hetatms=True, outdir=None, force_rerun=False):
    """Run freesasa on a PDB file, output using the NACCESS RSA format.
    
    Args:
        infile (str): Path to PDB file (only PDB file format is accepted) 
        outfile (str): Path or filename of output file
        include_hetatms (bool): If heteroatoms should be included in the SASA calculations
        outdir (str): Path to output file if not specified in outfile
        force_rerun (bool): If freesasa should be rerun even if outfile exists

    Returns:
        str: Path to output SASA file

    """
    if not outdir:
        outdir = ''

    outfile = op.join(outdir, outfile)

    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outfile):
        if op.exists(outfile):
            os.remove(outfile)
        if include_hetatms:
            shell_command = 'freesasa --format=rsa --hetatm {} -o {}'.format(infile, outfile)
        else:
            shell_command = 'freesasa --format=rsa {} -o {}'.format(infile, outfile)
        command = subprocess.Popen(shell_command,
                                   stdout=subprocess.PIPE,
                                   stderr=subprocess.PIPE,
                                   shell=True)
        out, err = command.communicate()

    return outfile


[docs]def parse_rsa_data(rsa_outfile, ignore_hets=True):
    """Process a NACCESS or freesasa RSA output file. Adapted from Biopython NACCESS modele.
    
    Args:
        rsa_outfile (str): Path to RSA output file
        ignore_hets (bool): If HETATMs should be excluded from the final dictionary. This is extremely important
            when loading this information into a ChainProp's SeqRecord, since this will throw off the sequence matching.

    Returns:
        dict: Per-residue dictionary of RSA values

    """

    naccess_rel_dict = OrderedDict()

    with open(rsa_outfile, 'r') as f:
        for line in f:
            if line.startswith('RES'):
                res_name = line[4:7]
                chain_id = line[8]
                resseq = int(line[9:13])
                icode = line[13]
                res_id = (' ', resseq, icode)
                all_atoms_abs = line[16:22].strip()
                all_atoms_rel = line[23:28].strip()
                side_chain_abs = line[29:35].strip()
                side_chain_rel = line[36:41].strip()
                main_chain_abs = line[42:48].strip()
                main_chain_rel = line[49:54].strip()
                non_polar_abs = line[55:61].strip()
                non_polar_rel = line[62:67].strip()
                all_polar_abs = line[68:74].strip()
                all_polar_rel = line[75:80].strip()

                if all_atoms_rel =='N/A' and main_chain_rel =='N/A' and all_polar_rel =='N/A' and non_polar_rel =='N/A' and side_chain_rel =='N/A' and ignore_hets:
                    continue

                naccess_rel_dict[(chain_id, res_id)] = {
                    'res_name'      : res_name,
                    'all_atoms_abs' : ssbio.utils.conv_to_float(all_atoms_abs, inf_str='N/A'),
                    'all_atoms_rel' : ssbio.utils.conv_to_float(all_atoms_rel, inf_str='N/A'),
                    'side_chain_abs': ssbio.utils.conv_to_float(side_chain_abs, inf_str='N/A'),
                    'side_chain_rel': ssbio.utils.conv_to_float(side_chain_rel, inf_str='N/A'),
                    'main_chain_abs': ssbio.utils.conv_to_float(main_chain_abs, inf_str='N/A'),
                    'main_chain_rel': ssbio.utils.conv_to_float(main_chain_rel, inf_str='N/A'),
                    'non_polar_abs' : ssbio.utils.conv_to_float(non_polar_abs, inf_str='N/A'),
                    'non_polar_rel' : ssbio.utils.conv_to_float(non_polar_rel, inf_str='N/A'),
                    'all_polar_abs' : ssbio.utils.conv_to_float(all_polar_abs, inf_str='N/A'),
                    'all_polar_rel' : ssbio.utils.conv_to_float(all_polar_rel, inf_str='N/A')}

    return naccess_rel_dict