Coverage for biobb_amber/leap/leap_build_linear_structure.py: 68%

97 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-28 08:28 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the LeapBuildLinearStructure class and the command line interface.""" 

4 

5import os 

6import argparse 

7from pathlib import PurePath 

8from typing import List, Optional 

9 

10from biobb_common.configuration import settings 

11from biobb_common.generic.biobb_object import BiobbObject 

12from biobb_common.tools import file_utils as fu 

13from biobb_common.tools.file_utils import launchlogger 

14 

15from biobb_amber.leap.common import _from_string_to_list, check_output_path 

16 

17 

18class LeapBuildLinearStructure(BiobbObject): 

19 """ 

20 | biobb_amber.leap.leap_build_linear_structure LeapBuildLinearStructure 

21 | Wrapper of the `AmberTools (AMBER MD Package) leap tool <https://ambermd.org/AmberTools.php>`_ module. 

22 | Builds a linear (unfolded) 3D structure from an AA sequence using tLeap tool from the AmberTools MD package. 

23 

24 Args: 

25 output_pdb_path (str): Linear (unfolded) 3D structure PDB file. File type: output. `Sample file <https://github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/reference/leap/structure.pdb>`_. Accepted formats: pdb (edam:format_1476). 

26 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

27 * **sequence** (*str*) - ("ALA GLY SER PRO ARG ALA PRO GLY") Aminoacid sequence to convert to a linear 3D structure. Aminoacids should be written in 3-letter code, with a blank space between them. 

28 * **forcefield** (*list*) - (["protein.ff14SB","DNA.bsc1","gaff"]) Forcefields to be used for the structure generation. Each item should be either a path to a leaprc file or a string with the leaprc file name if the force field is included with Amber (e.g. "/path/to/leaprc.protein.ff14SB" or "protein.ff14SB"). Default values: ["protein.ff14SB","DNA.bsc1","gaff"]. 

29 * **build_library** (*bool*) - (False) Generate AMBER lib file for the structure. 

30 * **binary_path** (*str*) - ("tleap") Path to the tleap executable binary. 

31 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

32 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

33 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

34 * **container_path** (*str*) - (None) Container path definition. 

35 * **container_image** (*str*) - ('afandiadib/ambertools:serial') Container image definition. 

36 * **container_volume_path** (*str*) - ('/tmp') Container volume path definition. 

37 * **container_working_dir** (*str*) - (None) Container working directory definition. 

38 * **container_user_id** (*str*) - (None) Container user_id definition. 

39 * **container_shell_path** (*str*) - ('/bin/bash') Path to default shell inside the container. 

40 

41 Examples: 

42 This is a use example of how to use the building block from Python:: 

43 

44 from biobb_amber.leap.leap_build_linear_structure import leap_build_linear_structure 

45 prop = { 

46 'sequence' : 'ALA PRO SER ARG LYS ASP GLU GLY GLY ALA', 

47 'build_library': False, 

48 'forcefield': ['protein.ff14SB'] 

49 } 

50 leap_build_linear_structure(output_pdb_path='/path/to/newStructure.pdb', 

51 properties=prop) 

52 

53 

54 Info: 

55 * wrapped_software: 

56 * name: AmberTools tLeap 

57 * version: >20.9 

58 * license: LGPL 2.1 

59 * ontology: 

60 * name: EDAM 

61 * schema: http://edamontology.org/EDAM.owl 

62 

63 """ 

64 

65 def __init__(self, output_pdb_path, properties, **kwargs): 

66 properties = properties or {} 

67 

68 # Call parent class constructor 

69 super().__init__(properties) 

70 self.locals_var_dict = locals().copy() 

71 

72 # Input/Output files 

73 self.io_dict = {"in": {}, "out": {"output_pdb_path": output_pdb_path}} 

74 

75 # Set default forcefields 

76 amber_home_path = os.getenv("AMBERHOME") 

77 protein_ff14SB_path = os.path.join(amber_home_path, 'dat', 'leap', 'cmd', 'leaprc.protein.ff14SB') 

78 dna_bsc1_path = os.path.join(amber_home_path, 'dat', 'leap', 'cmd', 'leaprc.DNA.bsc1') 

79 gaff_path = os.path.join(amber_home_path, 'dat', 'leap', 'cmd', 'leaprc.gaff') 

80 

81 # Properties specific for BB 

82 self.properties = properties 

83 self.sequence = properties.get("sequence", "ALA GLY SER PRO ARG ALA PRO GLY") 

84 self.forcefield = _from_string_to_list( 

85 properties.get("forcefield", [protein_ff14SB_path, dna_bsc1_path, gaff_path]) 

86 ) 

87 # Find the paths of the leaprc files if only the force field names are provided 

88 self.forcefield = self.find_leaprc_paths(self.forcefield) 

89 self.build_library = properties.get("build_library", False) 

90 self.binary_path = properties.get("binary_path", "tleap") 

91 

92 # Check the properties 

93 self.check_properties(properties) 

94 self.check_arguments() 

95 

96 def check_data_params(self, out_log, err_log): 

97 """Checks input/output paths correctness""" 

98 

99 # Check output(s) 

100 self.io_dict["out"]["output_pdb_path"] = check_output_path( 

101 self.io_dict["out"]["output_pdb_path"], 

102 "output_pdb_path", 

103 False, 

104 out_log, 

105 self.__class__.__name__, 

106 ) 

107 

108 def find_leaprc_paths(self, forcefields: List[str]) -> List[str]: 

109 """ 

110 Find the leaprc paths for the force fields provided. 

111 

112 For each item in the forcefields list, the function checks if the str is a path to an existing file. 

113 If not, it tries to find the file in the $AMBERHOME/dat/leap/cmd/ directory or the $AMBERHOME/dat/leap/cmd/oldff/ 

114 directory with and without the leaprc prefix. 

115 

116 Args: 

117 forcefields (List[str]): List of force fields to find the leaprc files for. 

118 

119 Returns: 

120 List[str]: List of leaprc file paths. 

121 """ 

122 

123 leaprc_paths = [] 

124 

125 for forcefield in forcefields: 

126 

127 num_paths = len(leaprc_paths) 

128 

129 # Check if the forcefield is a path to an existing file 

130 if os.path.exists(forcefield): 

131 leaprc_paths.append(forcefield) 

132 continue 

133 

134 # Check if the forcefield is in the leaprc directory 

135 leaprc_path = os.path.join(os.environ.get('AMBERHOME', ''), 'dat', 'leap', 'cmd', f"leaprc.{forcefield}") 

136 if os.path.exists(leaprc_path): 

137 leaprc_paths.append(leaprc_path) 

138 continue 

139 

140 # Check if the forcefield is in the oldff directory 

141 leaprc_path = os.path.join(os.environ.get('AMBERHOME', ''), 'dat', 'leap', 'cmd', 'oldff', f"leaprc.{forcefield}") 

142 if os.path.exists(leaprc_path): 

143 leaprc_paths.append(leaprc_path) 

144 continue 

145 

146 # Check if the forcefield is in the leaprc directory without the leaprc prefix 

147 leaprc_path = os.path.join(os.environ.get('AMBERHOME', ''), 'dat', 'leap', 'cmd', f"{forcefield}") 

148 if os.path.exists(leaprc_path): 

149 leaprc_paths.append(leaprc_path) 

150 continue 

151 

152 # Check if the forcefield is in the oldff directory without the leaprc prefix 

153 leaprc_path = os.path.join(os.environ.get('AMBERHOME', ''), 'dat', 'leap', 'cmd', 'oldff', f"{forcefield}") 

154 if os.path.exists(leaprc_path): 

155 leaprc_paths.append(leaprc_path) 

156 continue 

157 

158 new_num_paths = len(leaprc_paths) 

159 

160 if new_num_paths == num_paths: 

161 raise ValueError(f"Force field {forcefield} not found. Check the $AMBERHOME/dat/leap/cmd/ directory for available force fields or provide the path to an existing leaprc file.") 

162 

163 return leaprc_paths 

164 

165 @launchlogger 

166 def launch(self): 

167 """Launches the execution of the LeapBuildLinearStructure module.""" 

168 

169 # check input/output paths and parameters 

170 self.check_data_params(self.out_log, self.err_log) 

171 

172 # Setup Biobb 

173 if self.check_restart(): 

174 return 0 

175 self.stage_files() 

176 

177 # create .in file 

178 # TC5b = sequence { NASN LEU TYR ILE GLN TRP LEU LYS ASP GLY GLY PRO SER SER GLY ARG PRO PRO PRO CSER } 

179 # savepdb TC5b TC5b_linear.pdb 

180 # quit 

181 

182 # Creating temporary folder & Leap configuration (instructions) file 

183 if self.container_path: 

184 instructions_file = str( 

185 PurePath(self.stage_io_dict["unique_dir"]).joinpath("leap.in") 

186 ) 

187 instructions_file_path = str( 

188 PurePath(self.container_volume_path).joinpath("leap.in") 

189 ) 

190 self.tmp_folder = None 

191 else: 

192 self.tmp_folder = fu.create_unique_dir() 

193 instructions_file = str(PurePath(self.tmp_folder).joinpath("leap.in")) 

194 fu.log("Creating %s temporary folder" % self.tmp_folder, self.out_log) 

195 instructions_file_path = instructions_file 

196 

197 # instructions_file = str(PurePath(self.tmp_folder).joinpath("leap.in")) 

198 with open(instructions_file, "w") as leapin: 

199 # Forcefields loaded from input forcefield property 

200 for t in self.forcefield: 

201 leapin.write("source {}\n".format(t)) 

202 

203 leapin.write("struct = sequence {" + self.sequence + " } \n") 

204 leapin.write( 

205 "savepdb struct " + self.stage_io_dict["out"]["output_pdb_path"] + "\n" 

206 ) 

207 leapin.write("quit \n") 

208 

209 # Command line 

210 self.cmd = [self.binary_path, "-f", instructions_file_path] 

211 

212 # Run Biobb block 

213 self.run_biobb() 

214 

215 # Copy files to host 

216 self.copy_to_host() 

217 

218 # remove temporary folder(s) 

219 self.tmp_files.extend([ 

220 # self.stage_io_dict.get("unique_dir", ""), 

221 str(self.tmp_folder), "leap.log" 

222 ]) 

223 self.remove_tmp_files() 

224 

225 self.check_arguments(output_files_created=True, raise_exception=False) 

226 

227 return self.return_code 

228 

229 

230def leap_build_linear_structure( 

231 output_pdb_path: str, properties: Optional[dict] = None, **kwargs 

232) -> int: 

233 """Create :class:`LeapBuildLinearStructure <leap.leap_build_linear_structure.LeapBuildLinearStructure>`leap.leap_build_linear_structure.LeapBuildLinearStructure class and 

234 execute :meth:`launch() <leap.leap_build_linear_structure.LeapBuildLinearStructure.launch>` method""" 

235 

236 return LeapBuildLinearStructure( 

237 output_pdb_path=output_pdb_path, properties=properties 

238 ).launch() 

239 

240 leap_build_linear_structure.__doc__ = LeapBuildLinearStructure.__doc__ 

241 

242 

243def main(): 

244 parser = argparse.ArgumentParser( 

245 description="Building a linear (unfolded) 3D structure from an AA sequence.", 

246 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999), 

247 ) 

248 parser.add_argument("--config", required=False, help="Configuration file") 

249 

250 # Specific args 

251 required_args = parser.add_argument_group("required arguments") 

252 required_args.add_argument( 

253 "--output_pdb_path", 

254 required=True, 

255 help="Linear (unfolded) 3D structure PDB file. Accepted formats: pdb.", 

256 ) 

257 

258 args = parser.parse_args() 

259 config = args.config if args.config else None 

260 properties = settings.ConfReader(config=config).get_prop_dic() 

261 

262 # Specific call 

263 leap_build_linear_structure( 

264 output_pdb_path=args.output_pdb_path, properties=properties 

265 ) 

266 

267 

268if __name__ == "__main__": 

269 main()