Coverage for biobb_mem/fatslim/fatslim_membranes.py: 50%

117 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-08-01 02:58 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the FATSLiM Membranes class and the command line interface.""" 

4import argparse 

5from pathlib import PurePath 

6from biobb_common.generic.biobb_object import BiobbObject 

7from biobb_common.configuration import settings 

8from biobb_common.tools.file_utils import launchlogger 

9from biobb_common.tools import file_utils as fu 

10import MDAnalysis as mda 

11from biobb_mem.fatslim.common import calculate_box 

12import shutil 

13import numpy as np 

14 

15 

16class FatslimMembranes(BiobbObject): 

17 """ 

18 | biobb_mem FatslimMembranes 

19 | Wrapper of the `FATSLiM membranes <https://pythonhosted.org/fatslim/documentation/leaflets.html>`_ module for leaflet and membrane identification. 

20 | FATSLiM is designed to provide efficient and robust analysis of physical parameters from MD trajectories, with a focus on processing large trajectory files quickly. 

21 

22 Args: 

23 input_top_path (str): Path to the input topology file. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.pdb>`_. Accepted formats: tpr (edam:format_2333), gro (edam:format_2033), g96 (edam:format_2033), pdb (edam:format_1476), brk (edam:format_2033), ent (edam:format_1476). 

24 input_traj_path (str) (Optional): Path to the GROMACS trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.xtc>`_. Accepted formats: xtc (edam:format_3875), trr (edam:format_3910), cpt (edam:format_2333), gro (edam:format_2033), g96 (edam:format_2033), pdb (edam:format_1476), tng (edam:format_3876). 

25 input_ndx_path (str) (Optional): Path to the input lipid headgroups index NDX file. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.ndx>`_. Accepted formats: ndx (edam:format_2033). 

26 output_ndx_path (str): Path to the output index NDX file. File type: output. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/reference/fatslim/leaflets.ndx>`_. Accepted formats: ndx (edam:format_2033). 

27 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

28 * **selection** (*str*) - ("not protein and element P") Alternative ot the NDX file for choosing the Headgroups used in the identification using MDAnalysis `selection language <https://docs.mdanalysis.org/stable/documentation_pages/selections.html>`_. 

29 * **cutoff** (*float*) - (2) Cutoff distance (in nm) to be used when leaflet identification is performed. 

30 * **begin_frame** (*int*) - (-1) First frame index to be used for analysis. 

31 * **end_frame** (*int*) - (-1) Last frame index to be used for analysis. 

32 * **ignore_no_box** (*bool*) - (False) Ignore the absence of box information in the topology. If the topology does not contain box information, the box will be set to the minimum and maximum positions of the atoms. 

33 * **return_hydrogen** (*bool*) - (False) Include hydrogen atoms in the output index file. 

34 * **binary_path** (*str*) - ("fatslim") Path to the fatslim executable binary. 

35 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

36 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

37 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

38 

39 Examples: 

40 This is a use example of how to use the building block from Python:: 

41 

42 from biobb_mem.fatslim.fatslim_membranes import fatslim_membranes 

43 prop = { 

44 'selection': '(resname DPPC and name P8)', 

45 'cutoff': 2.2 

46 } 

47 fatslim_membranes(input_top_path='/path/to/myTopology.tpr', 

48 input_traj_path='/path/to/myTrajectory.xtc', 

49 output_ndx_path='/path/to/newIndex.ndx', 

50 properties=prop) 

51 

52 Info: 

53 * wrapped_software: 

54 * name: FATSLiM 

55 * version: 0.2.2 

56 * license: GNU 

57 * ontology: 

58 * name: EDAM 

59 * schema: http://edamontology.org/EDAM.owl 

60 

61 """ 

62 

63 def __init__(self, input_top_path, output_ndx_path, input_traj_path=None, 

64 input_ndx_path=None, properties=None, **kwargs) -> None: 

65 properties = properties or {} 

66 

67 # Call parent class constructor 

68 super().__init__(properties) 

69 self.locals_var_dict = locals().copy() 

70 

71 # Input/Output files 

72 self.io_dict = { 

73 "in": {"input_top_path": input_top_path, 

74 "input_traj_path": input_traj_path, 

75 "input_ndx_path": input_ndx_path 

76 }, 

77 "out": {"output_ndx_path": output_ndx_path} 

78 } 

79 

80 # Properties specific for BB 

81 self.selection = properties.get('selection', "not protein and element P") 

82 self.cutoff = properties.get('cutoff', 2) 

83 self.begin_frame = properties.get('begin_frame', -1) 

84 self.end_frame = properties.get('end_frame', -1) 

85 self.ignore_no_box = properties.get('ignore_no_box', False) 

86 self.return_hydrogen = properties.get('return_hydrogen', False) 

87 self.binary_path = properties.get('binary_path', 'fatslim') 

88 self.properties = properties 

89 

90 # Check the properties 

91 self.check_properties(properties) 

92 self.check_arguments() 

93 

94 @launchlogger 

95 def launch(self) -> int: 

96 """Execute the :class:`FatslimMembranes <fatslim.fatslim_membranes.FatslimMembranes>` fatslim.fatslim_membranes.FatslimMembranes object.""" 

97 

98 # Setup Biobb 

99 if self.check_restart(): 

100 return 0 

101 self.stage_files() 

102 

103 # Create index file using MDAnalysis 

104 u = mda.Universe(topology=self.stage_io_dict["in"]["input_top_path"], 

105 coordinates=self.stage_io_dict["in"].get("input_traj_path")) 

106 if u.dimensions is None: 

107 # FATSLiM ValueError: Box does not correspond to PBC=xyz 

108 if self.ignore_no_box: 

109 fu.log('Setting box dimensions using the minimum and maximum positions of the atoms.', 

110 self.out_log, self.global_log) 

111 calculate_box(u) 

112 else: 

113 fu.log('The trajectory does not contain box information. ' 

114 'Please set the ignore_no_box property to True to ignore this error.', 

115 self.out_log, self.global_log) 

116 

117 # Build the index to select the atoms from the membrane 

118 if self.stage_io_dict["in"].get('input_ndx_path', None): 

119 self.tmp_ndx = self.stage_io_dict["in"]["input_ndx_path"] 

120 else: 

121 self.tmp_ndx = str(PurePath(fu.create_unique_dir()).joinpath('headgroups.ndx')) 

122 with mda.selections.gromacs.SelectionWriter(self.tmp_ndx, mode='w') as ndx: 

123 ndx.write(u.select_atoms(self.selection), name='headgroups') 

124 

125 if self.stage_io_dict["in"]["input_top_path"].endswith('gro'): 

126 self.cfg = self.stage_io_dict["in"]["input_top_path"] 

127 self.cmd = [] 

128 else: 

129 # Convert topology .gro and add box dimensions if not available in the topology 

130 self.cfg = str(PurePath(fu.create_unique_dir()).joinpath('output.gro')) 

131 self.tmp_files.extend([PurePath(self.cfg).parent]) 

132 self.cmd = ['gmx', 'editconf', 

133 '-f', self.stage_io_dict["in"]["input_top_path"], 

134 '-o', self.cfg, 

135 '-box', ' '.join(map(str, u.dimensions[:3])), ';', 

136 ] 

137 self.tmp_out = str(PurePath(fu.create_unique_dir()).joinpath('output.ndx')) 

138 

139 # Build command 

140 self.cmd.extend([ 

141 self.binary_path, "membranes", 

142 "-n", self.tmp_ndx, 

143 "-c", self.cfg, 

144 "--output-index", self.tmp_out, 

145 "--cutoff", str(self.cutoff), 

146 "--begin-frame", str(self.begin_frame), 

147 "--end-frame", str(self.end_frame) 

148 ]) 

149 

150 # Run Biobb block 

151 self.run_biobb() 

152 # Fatslim ignore H atoms so we add them manually 

153 if self.return_hydrogen: 

154 # Parse the atoms indices of the membrane without Hs 

155 leaflet_groups = parse_index(self.tmp_out[:-4]+'_0000.ndx') 

156 with mda.selections.gromacs.SelectionWriter(self.stage_io_dict["out"]["output_ndx_path"], mode='w') as ndx: 

157 for key, value in leaflet_groups.items(): 

158 # Select the residues using atom indexes 

159 res_sele = set(u.atoms[np.array(value)-1].residues.resindices) 

160 # Use the rexindex to select all the atoms of the residue 

161 sele = f"resindex {' '.join(map(str, res_sele))}" 

162 ndx.write(u.select_atoms(sele), name=key) 

163 else: 

164 shutil.move(self.tmp_out[:-4]+'_0000.ndx', self.stage_io_dict["out"]["output_ndx_path"]) 

165 # Copy files to host 

166 self.copy_to_host() 

167 

168 # Remove temporary files 

169 self.tmp_files.extend([ 

170 self.stage_io_dict.get("unique_dir"), 

171 PurePath(self.tmp_ndx).parent, 

172 PurePath(self.tmp_out).parent 

173 ]) 

174 self.remove_tmp_files() 

175 

176 self.check_arguments(output_files_created=True, raise_exception=False) 

177 

178 return self.return_code 

179 

180 

181def fatslim_membranes(input_top_path: str, output_ndx_path: str, input_traj_path: str = None, input_ndx_path: str = None, properties: dict = None, **kwargs) -> int: 

182 """Execute the :class:`FatslimMembranes <fatslim.fatslim_membranes.FatslimMembranes>` class and 

183 execute the :meth:`launch() <fatslim.fatslim_membranes.FatslimMembranes.launch>` method.""" 

184 

185 return FatslimMembranes(input_top_path=input_top_path, 

186 input_traj_path=input_traj_path, 

187 input_ndx_path=input_ndx_path, 

188 output_ndx_path=output_ndx_path, 

189 properties=properties, **kwargs).launch() 

190 

191 

192def main(): 

193 """Command line execution of this building block. Please check the command line documentation.""" 

194 parser = argparse.ArgumentParser(description="Calculates the density along an axis of a given cpptraj compatible trajectory.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

195 parser.add_argument('--config', required=False, help='Configuration file') 

196 

197 # Specific args of each building block 

198 required_args = parser.add_argument_group('required arguments') 

199 required_args.add_argument('--input_top_path', required=True, help='Path to the input structure or topology file. Accepted formats: ent, gro, pdb, tpr.') 

200 required_args.add_argument('--output_ndx_path', required=True, help='Path to the GROMACS index file. Accepted formats: ndx') 

201 parser.add_argument('--input_traj_path', required=False, help='Path to the input trajectory to be processed. Accepted formats: gro, pdb, tng, trr, xtc.') 

202 parser.add_argument('--input_ndx_path', required=False, help='Path to the input lipid headgroups index NDX file. Accepted formats: ndx.') 

203 

204 args = parser.parse_args() 

205 args.config = args.config or "{}" 

206 properties = settings.ConfReader(config=args.config).get_prop_dic() 

207 

208 # Specific call of each building block 

209 fatslim_membranes(input_top_path=args.input_top_path, 

210 output_ndx_path=args.output_ndx_path, 

211 input_traj_path=args.input_traj_path, 

212 input_ndx_path=args.input_ndx_path, 

213 properties=properties) 

214 

215 

216def parse_index(ndx): 

217 """ 

218 Parses a GROMACS index file (.ndx) to extract leaflet groups. 

219 

220 Args: 

221 ndx (str): Path to the GROMACS index file (.ndx). 

222 Returns: 

223 dict: A dictionary where keys are group names for each leaflet in format "membrane_1_leaflet_1" and values are lists of integers representing atom indices starting from 1. 

224 """ 

225 

226 # Read the leaflet.ndx file 

227 with open(ndx, 'r') as file: 

228 leaflet_data = file.readlines() 

229 

230 # Initialize dictionaries to store leaflet groups 

231 leaflet_groups = {} 

232 current_group = None 

233 

234 # Parse the leaflet.ndx file 

235 for line in leaflet_data: 

236 line = line.strip() 

237 if line.startswith('[') and line.endswith(']'): 

238 current_group = line[1:-1].strip() 

239 leaflet_groups[current_group] = [] 

240 elif current_group is not None: 

241 leaflet_groups[current_group].extend(map(int, line.split())) 

242 return leaflet_groups 

243 

244 

245def display_fatslim(input_top_path: str, lipid_sel: str, input_traj_path: str = None, output_ndx_path="leaflets.ndx", leaflets=True, 

246 colors=['blue', 'cyan', 'yellow', 'orange', 'purple', 'magenta'], non_mem_color='red'): 

247 """ 

248 Visualize the leaflets of a membrane using NGLView. The lipids in the membrane are colored according to their leaflet. The ones not in the membrane are colored in red. 

249 

250 Args: 

251 input_top_path (str): Path to the input topology file. 

252 input_traj_path (str, optional): Path to the input trajectory file. Default is None. 

253 output_ndx_path (str, optional): Path to the output index file containing leaflet information. Default is "leaflets.ndx". 

254 leaflets (bool, optional): If True, visualize individual leaflets. If False, visualize entire membranes. Default is True. 

255 colors (list of str, optional): List of colors to use for visualizing the leaflets or membranes. Default is ['blue', 'cyan', 'yellow', 'orange', 'purple', 'magenta']. 

256 non_mem_color (str, optional): Color to use for visualizing lipids not in the membrane. Default is 'red'. 

257 Returns: 

258 nglview.NGLWidget: An NGLView widget displaying the membrane leaflets. 

259 """ 

260 try: 

261 import nglview as nv 

262 except ImportError: 

263 raise ImportError('Please install the nglview package to visualize the membrane/s.') 

264 

265 u = mda.Universe(topology=input_top_path, 

266 coordinates=input_traj_path) 

267 # Visualize the system with NGLView 

268 view = nv.show_mdanalysis(u) 

269 view.clear_representations() 

270 

271 leaflet_groups = parse_index(output_ndx_path) 

272 n_mems = len(leaflet_groups.keys())//2 

273 

274 non_mem_resn = set(u.select_atoms(lipid_sel).residues.resnums) 

275 for n in range(n_mems): 

276 # Convert atoms list to resnums (nglview uses cannot use resindex) 

277 top_resn = u.atoms[np.array(leaflet_groups[f'membrane_{n+1}_leaflet_1'])-1].residues.resnums 

278 bot_resn = u.atoms[np.array(leaflet_groups[f'membrane_{n+1}_leaflet_2'])-1].residues.resnums 

279 non_mem_resn -= set(top_resn) 

280 non_mem_resn -= set(bot_resn) 

281 if leaflets: 

282 view.add_point(selection=", ".join(map(str, top_resn)), color=colors[n*2]) # lipids in top leaflet 

283 view.add_point(selection=", ".join(map(str, bot_resn)), color=colors[n*2+1]) # lipids in bot leaflet 

284 else: 

285 mem_resn = np.concatenate((top_resn, bot_resn)) 

286 view.add_point(selection=", ".join(map(str, mem_resn)), color=colors[n*2]) # lipids in membrane 

287 if len(non_mem_resn) > 0: 

288 view.add_point(selection=", ".join(map(str, non_mem_resn)), color=non_mem_color) # lipids without membrane 

289 return view 

290 

291 

292if __name__ == '__main__': 

293 main()