Coverage for biobb_mem/lipyphilic_biobb/lpp_assign_leaflets.py: 62%

84 statements  

« prev     ^ index     » next       coverage.py v7.6.11, created at 2025-02-10 11:25 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Lipyphilic AssignLeaflets class and the command line interface.""" 

4import argparse 

5from biobb_common.generic.biobb_object import BiobbObject 

6from biobb_common.configuration import settings 

7from biobb_common.tools.file_utils import launchlogger 

8import MDAnalysis as mda 

9from biobb_mem.lipyphilic_biobb.common import calculate_box 

10from lipyphilic.lib.assign_leaflets import AssignLeaflets 

11import pandas as pd 

12import numpy as np 

13 

14 

15class LPPAssignLeaflets(BiobbObject): 

16 """ 

17 | biobb_mem LPPAssignLeaflets 

18 | Wrapper of the LiPyphilic AssignLeaflets module for assigning lipids to leaflets in a bilayer. 

19 | LiPyphilic is a Python package for analyzing MD simulations of lipid bilayers. The parameter names and defaults are the same as the ones in the official `Lipyphilic documentation <https://lipyphilic.readthedocs.io/en/latest/reference/analysis/leaflets.html>`_. 

20 

21 Args: 

22 input_top_path (str): Path to the input structure or topology file. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.pdb>`_. Accepted formats: crd (edam:3878), gro (edam:2033), mdcrd (edam:3878), mol2 (edam:3816), pdb (edam:1476), pdbqt (edam:1476), prmtop (edam:3881), psf (edam:3882), top (edam:3881), tpr (edam:2333), xml (edam:2332), xyz (edam:3887). 

23 input_traj_path (str): Path to the input trajectory to be processed. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.xtc>`_. Accepted formats: arc (edam:2333), crd (edam:3878), dcd (edam:3878), ent (edam:1476), gro (edam:2033), inpcrd (edam:3878), mdcrd (edam:3878), mol2 (edam:3816), nc (edam:3650), pdb (edam:1476), pdbqt (edam:1476), restrt (edam:3886), tng (edam:3876), trr (edam:3910), xtc (edam:3875), xyz (edam:3887). 

24 output_leaflets_path (str): Path to the output leaflet assignments. File type: output. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/reference/lipyphilic_biobb/leaflets_data.csv>`_. Accepted formats: csv (edam:format_3752), npy (edam:format_4003). 

25 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

26 * **start** (*int*) - (None) Starting frame for slicing. 

27 * **stop** (*int*) - (None) Ending frame for slicing. 

28 * **steps** (*int*) - (None) Step for slicing. 

29 * **lipid_sel** (*str*) - ("all") Selection string for the lipids in a membrane. The selection should cover **all** residues in the membrane, including cholesterol. 

30 * **midplane_sel** (*str*) - (None) Selection string for residues that may be midplane. Any residues not in this selection will be assigned to a leaflet regardless of its proximity to the midplane. The default is `None`, in which case all lipids will be assigned to either the upper or lower leaflet. 

31 * **midplane_cutoff** (*float*) - (0) Minimum distance in *z* an atom must be from the midplane to be assigned to a leaflet rather than the midplane. The default is `0`, in which case all lipids will be assigned to either the upper or lower leaflet. Must be non-negative. 

32 * **n_bins** (*int*) - (1) Number of bins in *x* and *y* to use to create a grid of membrane patches. Local membrane midpoints are computed for each patch, and lipids assigned a leaflet based on the distance to their local membrane midpoint. The default is `1`, which is equivalent to computing a single global midpoint. 

33 * **ignore_no_box** (*bool*) - (False) Ignore the absence of box information in the trajectory. If the trajectory does not contain box information, the box will be set to the minimum and maximum positions of the atoms in the trajectory. 

34 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

35 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

36 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

37 

38 Examples: 

39 This is a use example of how to use the building block from Python:: 

40 

41 from biobb_mem.lipyphilic_biobb.lpp_assign_leaflets import lpp_assign_leaflets 

42 prop = { 

43 'lipid_sel': 'name GL1 GL2 ROH', 

44 } 

45 lpp_assign_leaflets(input_top_path='/path/to/myTopology.tpr', 

46 input_traj_path='/path/to/myTrajectory.xtc', 

47 output_leaflets_path='/path/to/leaflets.csv', 

48 properties=prop) 

49 

50 Info: 

51 * wrapped_software: 

52 * name: LiPyphilic 

53 * version: 0.10.0 

54 * license: GPL-2.0 

55 * ontology: 

56 * name: EDAM 

57 * schema: http://edamontology.org/EDAM.owl 

58 

59 """ 

60 

61 def __init__(self, input_top_path, input_traj_path, output_leaflets_path, 

62 properties=None, **kwargs) -> None: 

63 properties = properties or {} 

64 

65 # Call parent class constructor 

66 super().__init__(properties) 

67 self.locals_var_dict = locals().copy() 

68 

69 # Input/Output files 

70 self.io_dict = { 

71 "in": {"input_top_path": input_top_path, "input_traj_path": input_traj_path}, 

72 "out": {"output_leaflets_path": output_leaflets_path} 

73 } 

74 self.start = properties.get('start', None) 

75 self.stop = properties.get('stop', None) 

76 self.steps = properties.get('steps', None) 

77 self.lipid_sel = properties.get('lipid_sel', 'all') 

78 self.midplane_sel = properties.get('midplane_sel', None) 

79 self.midplane_cutoff = properties.get('midplane_cutoff', None) 

80 self.n_bins = properties.get('n_bins', 1) 

81 # Properties specific for BB 

82 self.ignore_no_box = properties.get('ignore_no_box', True) 

83 self.properties = properties 

84 

85 # Check the properties 

86 self.check_properties(properties) 

87 self.check_arguments() 

88 

89 @launchlogger 

90 def launch(self) -> int: 

91 """Execute the :class:`LPPAssignLeaflets <lipyphilic_biobb.lpp_assign_leaflets.LPPAssignLeaflets>` lipyphilic_biobb.lpp_assign_leaflets.LPPAssignLeaflets object.""" 

92 

93 # Setup Biobb 

94 if self.check_restart(): 

95 return 0 

96 self.stage_files() 

97 

98 # Load the trajectory 

99 u = mda.Universe(self.stage_io_dict["in"]["input_top_path"], self.stage_io_dict["in"]["input_traj_path"]) 

100 if u.dimensions is None: 

101 if self.ignore_no_box: 

102 calculate_box(u) 

103 else: 

104 raise ValueError('The trajectory does not contain box information. Please set the ignore_no_box property to True to ignore this error.') 

105 

106 # Create AssignLeaflets object 

107 leaflets = AssignLeaflets( 

108 universe=u, 

109 lipid_sel=self.lipid_sel, 

110 midplane_sel=self.midplane_sel, 

111 midplane_cutoff=self.midplane_cutoff, 

112 n_bins=self.n_bins 

113 ) 

114 # Run the analysis 

115 leaflets.run( 

116 start=self.start, 

117 stop=self.stop, 

118 step=self.steps 

119 ) 

120 

121 out_format = self.stage_io_dict["out"]["output_leaflets_path"].split('.')[-1] 

122 if out_format == 'csv': 

123 # Save the results 

124 frames = leaflets.leaflets.shape[1] 

125 resnames = np.repeat(leaflets.membrane.resnames, frames) 

126 resindices = np.tile(leaflets.membrane.resindices, frames) 

127 frame_numbers = np.repeat(np.arange(frames), leaflets.membrane.n_residues) 

128 

129 df = pd.DataFrame({ 

130 'resname': resnames, 

131 'resindex': resindices, 

132 'frame': frame_numbers, 

133 'leaflet_index': leaflets.leaflets.T.flatten() 

134 }) 

135 

136 # Save the DataFrame to a CSV file 

137 df.to_csv(self.stage_io_dict["out"]["output_leaflets_path"], index=False) 

138 elif out_format == 'npy': 

139 np.save(self.stage_io_dict["out"]["output_leaflets_path"], leaflets.leaflets) 

140 # Copy files to host 

141 self.copy_to_host() 

142 # remove temporary folder(s) 

143 self.tmp_files.extend([ 

144 self.stage_io_dict.get("unique_dir") 

145 ]) 

146 self.remove_tmp_files() 

147 

148 self.check_arguments(output_files_created=True, raise_exception=False) 

149 

150 return self.return_code 

151 

152 

153def lpp_assign_leaflets(input_top_path: str, input_traj_path: str, output_leaflets_path: str = None, properties: dict = None, **kwargs) -> int: 

154 """Execute the :class:`LPPAssignLeaflets <lipyphilic_biobb.lpp_assign_leaflets.LPPAssignLeaflets>` class and 

155 execute the :meth:`launch() <lipyphilic_biobb.lpp_assign_leaflets.LPPAssignLeaflets.launch>` method.""" 

156 

157 return LPPAssignLeaflets(input_top_path=input_top_path, 

158 input_traj_path=input_traj_path, 

159 output_leaflets_path=output_leaflets_path, 

160 properties=properties, **kwargs).launch() 

161 

162 

163def main(): 

164 """Command line execution of this building block. Please check the command line documentation.""" 

165 parser = argparse.ArgumentParser(description="Assign lipids to leaflets in a bilayer.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

166 parser.add_argument('--config', required=False, help='Configuration file') 

167 

168 # Specific args of each building block 

169 required_args = parser.add_argument_group('required arguments') 

170 required_args.add_argument('--input_top_path', required=True, help='Path to the input structure or topology file. Accepted formats: crd, gro, mdcrd, mol2, pdb, pdbqt, prmtop, psf, top, tpr, xml, xyz.') 

171 required_args.add_argument('--input_traj_path', required=True, help='Path to the input trajectory to be processed. Accepted formats: arc, crd, dcd, ent, gro, inpcrd, mdcrd, mol2, nc, pdb, pdbqt, restrt, tng, trr, xtc, xyz.') 

172 required_args.add_argument('--output_leaflets_path', required=True, help='Path to the output processed analysis.') 

173 

174 args = parser.parse_args() 

175 args.config = args.config or "{}" 

176 properties = settings.ConfReader(config=args.config).get_prop_dic() 

177 

178 # Specific call of each building block 

179 lpp_assign_leaflets(input_top_path=args.input_top_path, 

180 input_traj_path=args.input_traj_path, 

181 output_leaflets_path=args.output_leaflets_path, 

182 properties=properties) 

183 

184 

185def display_nglview(input_top_path: str, output_leaflets_path: str, frame: int = 0): 

186 """ 

187 Visualize the leaflets of a membrane using NGLView. 

188 

189 Args: 

190 input_top_path (str): Path to the input topology file. 

191 output_leaflets_path (str): Path to the CSV file containing leaflet assignments. 

192 frame (int, optional): Frame number to visualize. Default is 0. 

193 Returns: 

194 nglview.NGLWidget: An NGLView widget displaying the membrane leaflets. 

195 """ 

196 

197 try: 

198 import nglview as nv 

199 except ImportError: 

200 raise ImportError('Please install the nglview package to visualize the leaflets.') 

201 # Read the leaflets DataFrame 

202 df = pd.read_csv(output_leaflets_path) 

203 top_idx = df[(df['frame'] == frame) & (df['leaflet_index'] == 1)]['resindex'].values 

204 bot_idx = df[(df['frame'] == frame) & (df['leaflet_index'] == -1)]['resindex'].values 

205 # Load the topology and convert the resindices to resnums (nglview uses resnums) 

206 u = mda.Universe(input_top_path) 

207 top_resnum = u.residues[top_idx].resnums 

208 bot_resnum = u.residues[bot_idx].resnums 

209 # Create the view 

210 view = nv.show_file(input_top_path) 

211 view.update_ball_and_stick(selection='all', opacity=0.0) # delete membrane 

212 view.add_ball_and_stick(selection=", ".join(map(str, top_resnum)), color='blue') 

213 view.add_ball_and_stick(selection=", ".join(map(str, bot_resnum)), color='yellow') 

214 return view 

215 

216 

217if __name__ == '__main__': 

218 main()