Coverage for biobb_mem/lipyphilic_biobb/lpp_zpositions.py: 72%

71 statements  

« prev     ^ index     » next       coverage.py v7.6.11, created at 2025-02-10 11:25 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Lipyphilic ZPositions class and the command line interface.""" 

4import argparse 

5from biobb_common.generic.biobb_object import BiobbObject 

6from biobb_common.configuration import settings 

7from biobb_common.tools.file_utils import launchlogger 

8import MDAnalysis as mda 

9from biobb_mem.lipyphilic_biobb.common import calculate_box 

10from lipyphilic.lib.z_positions import ZPositions 

11import pandas as pd 

12import numpy as np 

13 

14 

15class LPPZPositions(BiobbObject): 

16 """ 

17 | biobb_mem LPPZPositions 

18 | Wrapper of the LiPyphilic ZPositions module for calculating the z distance of lipids to the bilayer center. 

19 | LiPyphilic is a Python package for analyzing MD simulations of lipid bilayers. The parameter names and defaults are the same as the ones in the official `Lipyphilic documentation <https://lipyphilic.readthedocs.io/en/latest/reference/analysis/z_pos.html>`_. 

20 

21 Args: 

22 input_top_path (str): Path to the input structure or topology file. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.pdb>`_. Accepted formats: crd (edam:3878), gro (edam:2033), mdcrd (edam:3878), mol2 (edam:3816), pdb (edam:1476), pdbqt (edam:1476), prmtop (edam:3881), psf (edam:3882), top (edam:3881), tpr (edam:2333), xml (edam:2332), xyz (edam:3887). 

23 input_traj_path (str): Path to the input trajectory to be processed. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.xtc>`_. Accepted formats: arc (edam:2333), crd (edam:3878), dcd (edam:3878), ent (edam:1476), gro (edam:2033), inpcrd (edam:3878), mdcrd (edam:3878), mol2 (edam:3816), nc (edam:3650), pdb (edam:1476), pdbqt (edam:1476), restrt (edam:3886), tng (edam:3876), trr (edam:3910), xtc (edam:3875), xyz (edam:3887). 

24 output_positions_path (str): Path to the output z positions. File type: output. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/reference/lipyphilic_biobb/zpositions.csv>`_. Accepted formats: csv (edam:format_3752). 

25 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

26 * **start** (*int*) - (None) Starting frame for slicing. 

27 * **stop** (*int*) - (None) Ending frame for slicing. 

28 * **steps** (*int*) - (None) Step for slicing. 

29 * **lipid_sel** (*str*) - ("all") Selection string for the lipids in a membrane. The selection should cover **all** residues in the membrane, including cholesterol. 

30 * **height_sel** (*str*) - ("all") Atom selection for the molecules for which the z position will be calculated. 

31 * **n_bins** (*int*) - (1) Number of bins in *x* and *y* to use to create a grid of membrane patches. Local membrane midpoints are computed for each patch, and lipids assigned a leaflet based on the distance to their local membrane midpoint. The default is `1`, which is equivalent to computing a single global midpoint. 

32 * **ignore_no_box** (*bool*) - (False) Ignore the absence of box information in the trajectory. If the trajectory does not contain box information, the box will be set to the minimum and maximum positions of the atoms in the trajectory. 

33 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

34 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

35 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

36 

37 Examples: 

38 This is a use example of how to use the building block from Python:: 

39 

40 from biobb_mem.lipyphilic_biobb.lpp_zpositions import lpp_zpositions 

41 prop = { 

42 'lipid_sel': 'name GL1 GL2 ROH', 

43 } 

44 lpp_zpositions(input_top_path='/path/to/myTopology.tpr', 

45 input_traj_path='/path/to/myTrajectory.xtc', 

46 output_positions_path='/path/to/zpositions.csv.csv', 

47 properties=prop) 

48 

49 Info: 

50 * wrapped_software: 

51 * name: LiPyphilic 

52 * version: 0.10.0 

53 * license: GPL-2.0 

54 * ontology: 

55 * name: EDAM 

56 * schema: http://edamontology.org/EDAM.owl 

57 

58 """ 

59 

60 def __init__(self, input_top_path, input_traj_path, output_positions_path, 

61 properties=None, **kwargs) -> None: 

62 properties = properties or {} 

63 

64 # Call parent class constructor 

65 super().__init__(properties) 

66 self.locals_var_dict = locals().copy() 

67 

68 # Input/Output files 

69 self.io_dict = { 

70 "in": {"input_top_path": input_top_path, "input_traj_path": input_traj_path}, 

71 "out": {"output_positions_path": output_positions_path} 

72 } 

73 

74 self.start = properties.get('start', None) 

75 self.stop = properties.get('stop', None) 

76 self.steps = properties.get('steps', None) 

77 self.lipid_sel = properties.get('lipid_sel', 'all') 

78 self.height_sel = properties.get('height_sel', 'all') 

79 self.n_bins = properties.get('n_bins', 1) 

80 self.ignore_no_box = properties.get('ignore_no_box', False) 

81 # Properties specific for BB 

82 self.remove_tmp = properties.get('remove_tmp', True) 

83 self.restart = properties.get('restart', False) 

84 self.sandbox_path = properties.get('sandbox_path', "./") 

85 

86 # Check the properties 

87 self.check_properties(properties) 

88 self.check_arguments() 

89 

90 @launchlogger 

91 def launch(self) -> int: 

92 """Execute the :class:`LPPZPositions <lipyphilic_biobb.lpp_zpositions.LPPZPositions>` lipyphilic_biobb.lpp_zpositions.LPPZPositions object.""" 

93 

94 # Setup Biobb 

95 if self.check_restart(): 

96 return 0 

97 self.stage_files() 

98 

99 # Load the trajectory 

100 u = mda.Universe(self.stage_io_dict["in"]["input_top_path"], self.stage_io_dict["in"]["input_traj_path"]) 

101 if u.dimensions is None: 

102 if self.ignore_no_box: 

103 calculate_box(u) 

104 else: 

105 raise ValueError('The trajectory does not contain box information. Please set the ignore_no_box property to True to ignore this error.') 

106 

107 # Create ZPositions object 

108 positions = ZPositions( 

109 universe=u, 

110 lipid_sel=self.lipid_sel, 

111 height_sel=self.height_sel, 

112 n_bins=self.n_bins 

113 ) 

114 # Run the analysis 

115 positions.run( 

116 start=self.start, 

117 stop=self.stop, 

118 step=self.steps 

119 ) 

120 # Save the results 

121 frames = positions.z_positions.shape[1] 

122 resnames = np.repeat(positions._height_atoms.resnames, frames) 

123 resindices = np.tile(positions._height_atoms.resindices, frames) 

124 frame_numbers = np.repeat(np.arange(frames), positions._height_atoms.n_residues) 

125 

126 df = pd.DataFrame({ 

127 'resname': resnames, 

128 'resindex': resindices, 

129 'frame': frame_numbers, 

130 'zposition': positions.z_positions.T.flatten() 

131 }) 

132 

133 # Save the DataFrame to a CSV file 

134 df.to_csv(self.stage_io_dict["out"]["output_positions_path"], index=False) 

135 

136 # Copy files to host 

137 self.copy_to_host() 

138 # remove temporary folder(s) 

139 self.tmp_files.extend([ 

140 self.stage_io_dict.get("unique_dir") 

141 ]) 

142 self.remove_tmp_files() 

143 

144 self.check_arguments(output_files_created=True, raise_exception=False) 

145 

146 return self.return_code 

147 

148 

149def lpp_zpositions(input_top_path: str, input_traj_path: str, output_positions_path: str = None, properties: dict = None, **kwargs) -> int: 

150 """Execute the :class:`LPPZPositions <lipyphilic_biobb.lpp_zpositions.LPPZPositions>` class and 

151 execute the :meth:`launch() <lipyphilic_biobb.lpp_zpositions.LPPZPositions.launch>` method.""" 

152 

153 return LPPZPositions(input_top_path=input_top_path, 

154 input_traj_path=input_traj_path, 

155 output_positions_path=output_positions_path, 

156 properties=properties, **kwargs).launch() 

157 

158 

159def main(): 

160 """Command line execution of this building block. Please check the command line documentation.""" 

161 parser = argparse.ArgumentParser(description="Calculate the z distance in of lipids to the bilayer center.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

162 parser.add_argument('--config', required=False, help='Configuration file') 

163 

164 # Specific args of each building block 

165 required_args = parser.add_argument_group('required arguments') 

166 required_args.add_argument('--input_top_path', required=True, help='Path to the input structure or topology file. Accepted formats: crd, gro, mdcrd, mol2, pdb, pdbqt, prmtop, psf, top, tpr, xml, xyz.') 

167 required_args.add_argument('--input_traj_path', required=True, help='Path to the input trajectory to be processed. Accepted formats: arc, crd, dcd, ent, gro, inpcrd, mdcrd, mol2, nc, pdb, pdbqt, restrt, tng, trr, xtc, xyz.') 

168 required_args.add_argument('--output_positions_path', required=True, help=' Path to the output z positions.') 

169 

170 args = parser.parse_args() 

171 args.config = args.config or "{}" 

172 properties = settings.ConfReader(config=args.config).get_prop_dic() 

173 

174 # Specific call of each building block 

175 lpp_zpositions(input_top_path=args.input_top_path, 

176 input_traj_path=args.input_traj_path, 

177 output_positions_path=args.output_positions_path, 

178 properties=properties) 

179 

180 

181def frame_df(output_positions_path): 

182 """ 

183 Processes a CSV file containing z-position data and calculates the mean positive, mean negative, 

184 thickness, and standard deviation of thickness for each frame. 

185 

186 Args: 

187 output_positions_path (str): Path to the CSV file containing z-position data. 

188 Returns: 

189 pandas.DataFrame: A DataFrame with the following columns: 

190 - mean_positive: Mean of positive z-positions for each frame. 

191 - mean_negative: Mean of negative z-positions for each frame. 

192 - thickness: Difference between mean_positive and mean_negative for each frame. 

193 - std_thickness: Standard deviation of the absolute z-positions for each frame. 

194 """ 

195 

196 df = pd.read_csv(output_positions_path) 

197 grouped = df.groupby('frame')['zposition'].agg( 

198 mean_positive=lambda x: x[x > 0].mean(), 

199 mean_negative=lambda x: x[x < 0].mean(), 

200 std_positive=lambda x: x[x > 0].std(), 

201 std_negative=lambda x: x[x < 0].std() 

202 ) 

203 grouped['thickness'] = grouped['mean_positive'] - grouped['mean_negative'] 

204 grouped['std_thickness'] = df.groupby('frame')['zposition'].apply(lambda x: x.abs().std()) 

205 return grouped 

206 

207 

208if __name__ == '__main__': 

209 main()