Coverage for biobb_mem/lipyphilic_biobb/lpp_assign_leaflets.py: 62%
84 statements
« prev ^ index » next coverage.py v7.6.11, created at 2025-02-10 11:25 +0000
« prev ^ index » next coverage.py v7.6.11, created at 2025-02-10 11:25 +0000
1#!/usr/bin/env python3
3"""Module containing the Lipyphilic AssignLeaflets class and the command line interface."""
4import argparse
5from biobb_common.generic.biobb_object import BiobbObject
6from biobb_common.configuration import settings
7from biobb_common.tools.file_utils import launchlogger
8import MDAnalysis as mda
9from biobb_mem.lipyphilic_biobb.common import calculate_box
10from lipyphilic.lib.assign_leaflets import AssignLeaflets
11import pandas as pd
12import numpy as np
15class LPPAssignLeaflets(BiobbObject):
16 """
17 | biobb_mem LPPAssignLeaflets
18 | Wrapper of the LiPyphilic AssignLeaflets module for assigning lipids to leaflets in a bilayer.
19 | LiPyphilic is a Python package for analyzing MD simulations of lipid bilayers. The parameter names and defaults are the same as the ones in the official `Lipyphilic documentation <https://lipyphilic.readthedocs.io/en/latest/reference/analysis/leaflets.html>`_.
21 Args:
22 input_top_path (str): Path to the input structure or topology file. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.pdb>`_. Accepted formats: crd (edam:3878), gro (edam:2033), mdcrd (edam:3878), mol2 (edam:3816), pdb (edam:1476), pdbqt (edam:1476), prmtop (edam:3881), psf (edam:3882), top (edam:3881), tpr (edam:2333), xml (edam:2332), xyz (edam:3887).
23 input_traj_path (str): Path to the input trajectory to be processed. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.xtc>`_. Accepted formats: arc (edam:2333), crd (edam:3878), dcd (edam:3878), ent (edam:1476), gro (edam:2033), inpcrd (edam:3878), mdcrd (edam:3878), mol2 (edam:3816), nc (edam:3650), pdb (edam:1476), pdbqt (edam:1476), restrt (edam:3886), tng (edam:3876), trr (edam:3910), xtc (edam:3875), xyz (edam:3887).
24 output_leaflets_path (str): Path to the output leaflet assignments. File type: output. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/reference/lipyphilic_biobb/leaflets_data.csv>`_. Accepted formats: csv (edam:format_3752), npy (edam:format_4003).
25 properties (dic - Python dictionary object containing the tool parameters, not input/output files):
26 * **start** (*int*) - (None) Starting frame for slicing.
27 * **stop** (*int*) - (None) Ending frame for slicing.
28 * **steps** (*int*) - (None) Step for slicing.
29 * **lipid_sel** (*str*) - ("all") Selection string for the lipids in a membrane. The selection should cover **all** residues in the membrane, including cholesterol.
30 * **midplane_sel** (*str*) - (None) Selection string for residues that may be midplane. Any residues not in this selection will be assigned to a leaflet regardless of its proximity to the midplane. The default is `None`, in which case all lipids will be assigned to either the upper or lower leaflet.
31 * **midplane_cutoff** (*float*) - (0) Minimum distance in *z* an atom must be from the midplane to be assigned to a leaflet rather than the midplane. The default is `0`, in which case all lipids will be assigned to either the upper or lower leaflet. Must be non-negative.
32 * **n_bins** (*int*) - (1) Number of bins in *x* and *y* to use to create a grid of membrane patches. Local membrane midpoints are computed for each patch, and lipids assigned a leaflet based on the distance to their local membrane midpoint. The default is `1`, which is equivalent to computing a single global midpoint.
33 * **ignore_no_box** (*bool*) - (False) Ignore the absence of box information in the trajectory. If the trajectory does not contain box information, the box will be set to the minimum and maximum positions of the atoms in the trajectory.
34 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
35 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
36 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
38 Examples:
39 This is a use example of how to use the building block from Python::
41 from biobb_mem.lipyphilic_biobb.lpp_assign_leaflets import lpp_assign_leaflets
42 prop = {
43 'lipid_sel': 'name GL1 GL2 ROH',
44 }
45 lpp_assign_leaflets(input_top_path='/path/to/myTopology.tpr',
46 input_traj_path='/path/to/myTrajectory.xtc',
47 output_leaflets_path='/path/to/leaflets.csv',
48 properties=prop)
50 Info:
51 * wrapped_software:
52 * name: LiPyphilic
53 * version: 0.10.0
54 * license: GPL-2.0
55 * ontology:
56 * name: EDAM
57 * schema: http://edamontology.org/EDAM.owl
59 """
61 def __init__(self, input_top_path, input_traj_path, output_leaflets_path,
62 properties=None, **kwargs) -> None:
63 properties = properties or {}
65 # Call parent class constructor
66 super().__init__(properties)
67 self.locals_var_dict = locals().copy()
69 # Input/Output files
70 self.io_dict = {
71 "in": {"input_top_path": input_top_path, "input_traj_path": input_traj_path},
72 "out": {"output_leaflets_path": output_leaflets_path}
73 }
74 self.start = properties.get('start', None)
75 self.stop = properties.get('stop', None)
76 self.steps = properties.get('steps', None)
77 self.lipid_sel = properties.get('lipid_sel', 'all')
78 self.midplane_sel = properties.get('midplane_sel', None)
79 self.midplane_cutoff = properties.get('midplane_cutoff', None)
80 self.n_bins = properties.get('n_bins', 1)
81 # Properties specific for BB
82 self.ignore_no_box = properties.get('ignore_no_box', True)
83 self.properties = properties
85 # Check the properties
86 self.check_properties(properties)
87 self.check_arguments()
89 @launchlogger
90 def launch(self) -> int:
91 """Execute the :class:`LPPAssignLeaflets <lipyphilic_biobb.lpp_assign_leaflets.LPPAssignLeaflets>` lipyphilic_biobb.lpp_assign_leaflets.LPPAssignLeaflets object."""
93 # Setup Biobb
94 if self.check_restart():
95 return 0
96 self.stage_files()
98 # Load the trajectory
99 u = mda.Universe(self.stage_io_dict["in"]["input_top_path"], self.stage_io_dict["in"]["input_traj_path"])
100 if u.dimensions is None:
101 if self.ignore_no_box:
102 calculate_box(u)
103 else:
104 raise ValueError('The trajectory does not contain box information. Please set the ignore_no_box property to True to ignore this error.')
106 # Create AssignLeaflets object
107 leaflets = AssignLeaflets(
108 universe=u,
109 lipid_sel=self.lipid_sel,
110 midplane_sel=self.midplane_sel,
111 midplane_cutoff=self.midplane_cutoff,
112 n_bins=self.n_bins
113 )
114 # Run the analysis
115 leaflets.run(
116 start=self.start,
117 stop=self.stop,
118 step=self.steps
119 )
121 out_format = self.stage_io_dict["out"]["output_leaflets_path"].split('.')[-1]
122 if out_format == 'csv':
123 # Save the results
124 frames = leaflets.leaflets.shape[1]
125 resnames = np.repeat(leaflets.membrane.resnames, frames)
126 resindices = np.tile(leaflets.membrane.resindices, frames)
127 frame_numbers = np.repeat(np.arange(frames), leaflets.membrane.n_residues)
129 df = pd.DataFrame({
130 'resname': resnames,
131 'resindex': resindices,
132 'frame': frame_numbers,
133 'leaflet_index': leaflets.leaflets.T.flatten()
134 })
136 # Save the DataFrame to a CSV file
137 df.to_csv(self.stage_io_dict["out"]["output_leaflets_path"], index=False)
138 elif out_format == 'npy':
139 np.save(self.stage_io_dict["out"]["output_leaflets_path"], leaflets.leaflets)
140 # Copy files to host
141 self.copy_to_host()
142 # remove temporary folder(s)
143 self.tmp_files.extend([
144 self.stage_io_dict.get("unique_dir")
145 ])
146 self.remove_tmp_files()
148 self.check_arguments(output_files_created=True, raise_exception=False)
150 return self.return_code
153def lpp_assign_leaflets(input_top_path: str, input_traj_path: str, output_leaflets_path: str = None, properties: dict = None, **kwargs) -> int:
154 """Execute the :class:`LPPAssignLeaflets <lipyphilic_biobb.lpp_assign_leaflets.LPPAssignLeaflets>` class and
155 execute the :meth:`launch() <lipyphilic_biobb.lpp_assign_leaflets.LPPAssignLeaflets.launch>` method."""
157 return LPPAssignLeaflets(input_top_path=input_top_path,
158 input_traj_path=input_traj_path,
159 output_leaflets_path=output_leaflets_path,
160 properties=properties, **kwargs).launch()
163def main():
164 """Command line execution of this building block. Please check the command line documentation."""
165 parser = argparse.ArgumentParser(description="Assign lipids to leaflets in a bilayer.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))
166 parser.add_argument('--config', required=False, help='Configuration file')
168 # Specific args of each building block
169 required_args = parser.add_argument_group('required arguments')
170 required_args.add_argument('--input_top_path', required=True, help='Path to the input structure or topology file. Accepted formats: crd, gro, mdcrd, mol2, pdb, pdbqt, prmtop, psf, top, tpr, xml, xyz.')
171 required_args.add_argument('--input_traj_path', required=True, help='Path to the input trajectory to be processed. Accepted formats: arc, crd, dcd, ent, gro, inpcrd, mdcrd, mol2, nc, pdb, pdbqt, restrt, tng, trr, xtc, xyz.')
172 required_args.add_argument('--output_leaflets_path', required=True, help='Path to the output processed analysis.')
174 args = parser.parse_args()
175 args.config = args.config or "{}"
176 properties = settings.ConfReader(config=args.config).get_prop_dic()
178 # Specific call of each building block
179 lpp_assign_leaflets(input_top_path=args.input_top_path,
180 input_traj_path=args.input_traj_path,
181 output_leaflets_path=args.output_leaflets_path,
182 properties=properties)
185def display_nglview(input_top_path: str, output_leaflets_path: str, frame: int = 0):
186 """
187 Visualize the leaflets of a membrane using NGLView.
189 Args:
190 input_top_path (str): Path to the input topology file.
191 output_leaflets_path (str): Path to the CSV file containing leaflet assignments.
192 frame (int, optional): Frame number to visualize. Default is 0.
193 Returns:
194 nglview.NGLWidget: An NGLView widget displaying the membrane leaflets.
195 """
197 try:
198 import nglview as nv
199 except ImportError:
200 raise ImportError('Please install the nglview package to visualize the leaflets.')
201 # Read the leaflets DataFrame
202 df = pd.read_csv(output_leaflets_path)
203 top_idx = df[(df['frame'] == frame) & (df['leaflet_index'] == 1)]['resindex'].values
204 bot_idx = df[(df['frame'] == frame) & (df['leaflet_index'] == -1)]['resindex'].values
205 # Load the topology and convert the resindices to resnums (nglview uses resnums)
206 u = mda.Universe(input_top_path)
207 top_resnum = u.residues[top_idx].resnums
208 bot_resnum = u.residues[bot_idx].resnums
209 # Create the view
210 view = nv.show_file(input_top_path)
211 view.update_ball_and_stick(selection='all', opacity=0.0) # delete membrane
212 view.add_ball_and_stick(selection=", ".join(map(str, top_resnum)), color='blue')
213 view.add_ball_and_stick(selection=", ".join(map(str, bot_resnum)), color='yellow')
214 return view
217if __name__ == '__main__':
218 main()