Coverage for biobb_mem/fatslim/fatslim

1#!/usr/bin/env python3

3"""Module containing the FATSLiM Membranes class and the command line interface."""

4import argparse

5from pathlib import PurePath

6from biobb_common.generic.biobb_object import BiobbObject

7from biobb_common.configuration import settings

8from biobb_common.tools.file_utils import launchlogger

9from biobb_common.tools import file_utils as fu

10import MDAnalysis as mda

11from biobb_mem.fatslim.common import calculate_box

12import shutil

13import numpy as np

16class FatslimMembranes(BiobbObject):

17 """

18 | biobb_mem FatslimMembranes

19 | Wrapper of the `FATSLiM membranes <https://pythonhosted.org/fatslim/documentation/leaflets.html>`_ module for leaflet and membrane identification.

20 | FATSLiM is designed to provide efficient and robust analysis of physical parameters from MD trajectories, with a focus on processing large trajectory files quickly.

22 Args:

23 input_top_path (str): Path to the input topology file. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.pdb>`_. Accepted formats: tpr (edam:format_2333), gro (edam:format_2033), g96 (edam:format_2033), pdb (edam:format_1476), brk (edam:format_2033), ent (edam:format_1476).

24 input_traj_path (str) (Optional): Path to the GROMACS trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.xtc>`_. Accepted formats: xtc (edam:format_3875), trr (edam:format_3910), cpt (edam:format_2333), gro (edam:format_2033), g96 (edam:format_2033), pdb (edam:format_1476), tng (edam:format_3876).

25 input_ndx_path (str) (Optional): Path to the input lipid headgroups index NDX file. File type: input. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/data/A01JD/A01JD.ndx>`_. Accepted formats: ndx (edam:format_2033).

26 output_ndx_path (str): Path to the output index NDX file. File type: output. `Sample file <https://github.com/bioexcel/biobb_mem/raw/main/biobb_mem/test/reference/fatslim/leaflets.ndx>`_. Accepted formats: ndx (edam:format_2033).

27 properties (dic - Python dictionary object containing the tool parameters, not input/output files):

28 * **selection** (*str*) - ("not protein and element P") Alternative ot the NDX file for choosing the Headgroups used in the identification using MDAnalysis `selection language <https://docs.mdanalysis.org/stable/documentation_pages/selections.html>`_.

29 * **cutoff** (*float*) - (2) Cutoff distance (in nm) to be used when leaflet identification is performed.

30 * **begin_frame** (*int*) - (-1) First frame index to be used for analysis.

31 * **end_frame** (*int*) - (-1) Last frame index to be used for analysis.

32 * **ignore_no_box** (*bool*) - (False) Ignore the absence of box information in the topology. If the topology does not contain box information, the box will be set to the minimum and maximum positions of the atoms.

33 * **return_hydrogen** (*bool*) - (False) Include hydrogen atoms in the output index file.

34 * **binary_path** (*str*) - ("fatslim") Path to the fatslim executable binary.

35 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.

36 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.

37 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.

39 Examples:

40 This is a use example of how to use the building block from Python::

42 from biobb_mem.fatslim.fatslim_membranes import fatslim_membranes

43 prop = {

44 'selection': '(resname DPPC and name P8)',

45 'cutoff': 2.2

46 }

47 fatslim_membranes(input_top_path='/path/to/myTopology.tpr',

48 input_traj_path='/path/to/myTrajectory.xtc',

49 output_ndx_path='/path/to/newIndex.ndx',

50 properties=prop)

52 Info:

53 * wrapped_software:

54 * name: FATSLiM

55 * version: 0.2.2

56 * license: GNU

57 * ontology:

58 * name: EDAM

59 * schema: http://edamontology.org/EDAM.owl

61 """

63 def __init__(self, input_top_path, output_ndx_path, input_traj_path=None,

64 input_ndx_path=None, properties=None, **kwargs) -> None:

65 properties = properties or {}

67 # Call parent class constructor

68 super().__init__(properties)

69 self.locals_var_dict = locals().copy()

71 # Input/Output files

72 self.io_dict = {

73 "in": {"input_top_path": input_top_path,

74 "input_traj_path": input_traj_path,

75 "input_ndx_path": input_ndx_path

76 },

77 "out": {"output_ndx_path": output_ndx_path}

78 }

80 # Properties specific for BB

81 self.selection = properties.get('selection', "not protein and element P")

82 self.cutoff = properties.get('cutoff', 2)

83 self.begin_frame = properties.get('begin_frame', -1)

84 self.end_frame = properties.get('end_frame', -1)

85 self.ignore_no_box = properties.get('ignore_no_box', False)

86 self.return_hydrogen = properties.get('return_hydrogen', False)

87 self.binary_path = properties.get('binary_path', 'fatslim')

88 self.properties = properties

90 # Check the properties

91 self.check_properties(properties)

92 self.check_arguments()

94 @launchlogger

95 def launch(self) -> int:

96 """Execute the :class:`FatslimMembranes <fatslim.fatslim_membranes.FatslimMembranes>` fatslim.fatslim_membranes.FatslimMembranes object."""

98 # Setup Biobb

99 if self.check_restart():

100 return 0

101 self.stage_files()

102

103 # Create index file using MDAnalysis

104 u = mda.Universe(topology=self.stage_io_dict["in"]["input_top_path"],

105 coordinates=self.stage_io_dict["in"].get("input_traj_path"))

106 if u.dimensions is None:

107 # FATSLiM ValueError: Box does not correspond to PBC=xyz

108 if self.ignore_no_box:

109 calculate_box(u)

110 else:

111 print('The trajectory does not contain box information. Please set the ignore_no_box property to True to ignore this error.')

112

113 # Build the index to select the atoms from the membrane

114 if self.stage_io_dict["in"].get('input_ndx_path', None):

115 self.tmp_ndx = self.stage_io_dict["in"]["input_ndx_path"]

116 else:

117 self.tmp_ndx = str(PurePath(fu.create_unique_dir()).joinpath('headgroups.ndx'))

118 with mda.selections.gromacs.SelectionWriter(self.tmp_ndx, mode='w') as ndx:

119 ndx.write(u.select_atoms(self.selection), name='headgroups')

120

121 if self.stage_io_dict["in"]["input_top_path"].endswith('gro'):

122 self.cfg = self.stage_io_dict["in"]["input_top_path"]

123 self.cmd = []

124 else:

125 # Convert topology .gro and add box dimensions if not available in the topology

126 self.cfg = str(PurePath(fu.create_unique_dir()).joinpath('output.gro'))

127 self.tmp_files.extend([PurePath(self.cfg).parent])

128 self.cmd = ['gmx', 'editconf',

129 '-f', self.stage_io_dict["in"]["input_top_path"],

130 '-o', self.cfg,

131 '-box', ' '.join(map(str, u.dimensions[:3])), ';',

132 ]

133 self.tmp_out = str(PurePath(fu.create_unique_dir()).joinpath('output.ndx'))

134

135 # Build command

136 self.cmd.extend([

137 self.binary_path, "membranes",

138 "-n", self.tmp_ndx,

139 "-c", self.cfg,

140 "--output-index", self.tmp_out,

141 "--cutoff", str(self.cutoff),

142 "--begin-frame", str(self.begin_frame),

143 "--end-frame", str(self.end_frame)

144 ])

145

146 # Run Biobb block

147 self.run_biobb()

148 # Fatslim ignore H atoms so we add them manually

149 if self.return_hydrogen:

150 # Parse the atoms indices of the membrane without Hs

151 leaflet_groups = parse_index(self.tmp_out[:-4]+'_0000.ndx')

152 with mda.selections.gromacs.SelectionWriter(self.stage_io_dict["out"]["output_ndx_path"], mode='w') as ndx:

153 for key, value in leaflet_groups.items():

154 # Select the residues using atom indexes

155 res_sele = set(u.atoms[np.array(value)-1].residues.resindices)

156 # Use the rexindex to select all the atoms of the residue

157 sele = f"resindex {' '.join(map(str, res_sele))}"

158 ndx.write(u.select_atoms(sele), name=key)

159 else:

160 shutil.move(self.tmp_out[:-4]+'_0000.ndx', self.stage_io_dict["out"]["output_ndx_path"])

161 # Copy files to host

162 self.copy_to_host()

163

164 # Remove temporary files

165 self.tmp_files.extend([

166 self.stage_io_dict.get("unique_dir"),

167 PurePath(self.tmp_ndx).parent,

168 PurePath(self.tmp_out).parent

169 ])

170 self.remove_tmp_files()

171

172 self.check_arguments(output_files_created=True, raise_exception=False)

173

174 return self.return_code

175

176

177def fatslim_membranes(input_top_path: str, output_ndx_path: str, input_traj_path: str = None, input_ndx_path: str = None, properties: dict = None, **kwargs) -> int:

178 """Execute the :class:`FatslimMembranes <fatslim.fatslim_membranes.FatslimMembranes>` class and

179 execute the :meth:`launch() <fatslim.fatslim_membranes.FatslimMembranes.launch>` method."""

180

181 return FatslimMembranes(input_top_path=input_top_path,

182 input_traj_path=input_traj_path,

183 input_ndx_path=input_ndx_path,

184 output_ndx_path=output_ndx_path,

185 properties=properties, **kwargs).launch()

186

187

188def main():

189 """Command line execution of this building block. Please check the command line documentation."""

190 parser = argparse.ArgumentParser(description="Calculates the density along an axis of a given cpptraj compatible trajectory.", formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))

191 parser.add_argument('--config', required=False, help='Configuration file')

192

193 # Specific args of each building block

194 required_args = parser.add_argument_group('required arguments')

195 required_args.add_argument('--input_top_path', required=True, help='Path to the input structure or topology file. Accepted formats: ent, gro, pdb, tpr.')

196 required_args.add_argument('--output_ndx_path', required=True, help='Path to the GROMACS index file. Accepted formats: ndx')

197 parser.add_argument('--input_traj_path', required=False, help='Path to the input trajectory to be processed. Accepted formats: gro, pdb, tng, trr, xtc.')

198 parser.add_argument('--input_ndx_path', required=False, help='Path to the input lipid headgroups index NDX file. Accepted formats: ndx.')

199

200 args = parser.parse_args()

201 args.config = args.config or "{}"

202 properties = settings.ConfReader(config=args.config).get_prop_dic()

203

204 # Specific call of each building block

205 fatslim_membranes(input_top_path=args.input_top_path,

206 output_ndx_path=args.output_ndx_path,

207 input_traj_path=args.input_traj_path,

208 input_ndx_path=args.input_ndx_path,

209 properties=properties)

210

211

212def parse_index(ndx):

213 """

214 Parses a GROMACS index file (.ndx) to extract leaflet groups.

215

216 Args:

217 ndx (str): Path to the GROMACS index file (.ndx).

218 Returns:

219 dict: A dictionary where keys are group names for each leaflet in format "membrane_1_leaflet_1" and values are lists of integers representing atom indices starting from 1.

220 """

221

222 # Read the leaflet.ndx file

223 with open(ndx, 'r') as file:

224 leaflet_data = file.readlines()

225

226 # Initialize dictionaries to store leaflet groups

227 leaflet_groups = {}

228 current_group = None

229

230 # Parse the leaflet.ndx file

231 for line in leaflet_data:

232 line = line.strip()

233 if line.startswith('[') and line.endswith(']'):

234 current_group = line[1:-1].strip()

235 leaflet_groups[current_group] = []

236 elif current_group is not None:

237 leaflet_groups[current_group].extend(map(int, line.split()))

238 return leaflet_groups

239

240

241def display_fatslim(input_top_path: str, lipid_sel: str, input_traj_path: str = None, output_ndx_path="leaflets.ndx", leaflets=True,

242 colors=['blue', 'cyan', 'yellow', 'orange', 'purple', 'magenta'], non_mem_color='red'):

243 """

244 Visualize the leaflets of a membrane using NGLView. The lipids in the membrane are colored according to their leaflet. The ones not in the membrane are colored in red.

245

246 Args:

247 input_top_path (str): Path to the input topology file.

248 input_traj_path (str, optional): Path to the input trajectory file. Default is None.

249 output_ndx_path (str, optional): Path to the output index file containing leaflet information. Default is "leaflets.ndx".

250 leaflets (bool, optional): If True, visualize individual leaflets. If False, visualize entire membranes. Default is True.

251 colors (list of str, optional): List of colors to use for visualizing the leaflets or membranes. Default is ['blue', 'cyan', 'yellow', 'orange', 'purple', 'magenta'].

252 non_mem_color (str, optional): Color to use for visualizing lipids not in the membrane. Default is 'red'.

253 Returns:

254 nglview.NGLWidget: An NGLView widget displaying the membrane leaflets.

255 """

256 try:

257 import nglview as nv

258 except ImportError:

259 raise ImportError('Please install the nglview package to visualize the membrane/s.')

260

261 u = mda.Universe(topology=input_top_path,

262 coordinates=input_traj_path)

263 # Visualize the system with NGLView

264 view = nv.show_mdanalysis(u)

265 view.clear_representations()

266

267 leaflet_groups = parse_index(output_ndx_path)

268 n_mems = len(leaflet_groups.keys())//2

269

270 non_mem_resn = set(u.select_atoms(lipid_sel).residues.resnums)

271 for n in range(n_mems):

272 # Convert atoms list to resnums (nglview uses cannot use resindex)

273 top_resn = u.atoms[np.array(leaflet_groups[f'membrane_{n+1}_leaflet_1'])-1].residues.resnums

274 bot_resn = u.atoms[np.array(leaflet_groups[f'membrane_{n+1}_leaflet_2'])-1].residues.resnums

275 non_mem_resn -= set(top_resn)

276 non_mem_resn -= set(bot_resn)

277 if leaflets:

278 view.add_point(selection=", ".join(map(str, top_resn)), color=colors[n*2]) # lipids in top leaflet

279 view.add_point(selection=", ".join(map(str, bot_resn)), color=colors[n*2+1]) # lipids in bot leaflet

280 else:

281 mem_resn = np.concatenate((top_resn, bot_resn))

282 view.add_point(selection=", ".join(map(str, mem_resn)), color=colors[n*2]) # lipids in membrane

283 if len(non_mem_resn) > 0:

284 view.add_point(selection=", ".join(map(str, non_mem_resn)), color=non_mem_color) # lipids without membrane

285 return view

286

287

288if __name__ == '__main__':

289 main()

Coverage for biobb_mem/fatslim/fatslim_membranes.py: 50%

116 statements