Coverage for biobb_amber / leap / leap_build_linear_structure.py: 75%

87 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-15 15:57 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the LeapBuildLinearStructure class and the command line interface.""" 

4 

5import os 

6from pathlib import PurePath 

7from typing import List, Optional 

8 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.tools import file_utils as fu 

11from biobb_common.tools.file_utils import launchlogger 

12 

13from biobb_amber.leap.common import _from_string_to_list, check_output_path 

14 

15 

16class LeapBuildLinearStructure(BiobbObject): 

17 """ 

18 | biobb_amber.leap.leap_build_linear_structure LeapBuildLinearStructure 

19 | Wrapper of the `AmberTools (AMBER MD Package) leap tool <https://ambermd.org/AmberTools.php>`_ module. 

20 | Builds a linear (unfolded) 3D structure from an AA sequence using tLeap tool from the AmberTools MD package. 

21 

22 Args: 

23 output_pdb_path (str): Linear (unfolded) 3D structure PDB file. File type: output. `Sample file <https://github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/reference/leap/structure.pdb>`_. Accepted formats: pdb (edam:format_1476). 

24 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

25 * **sequence** (*str*) - ("ALA GLY SER PRO ARG ALA PRO GLY") Aminoacid sequence to convert to a linear 3D structure. Aminoacids should be written in 3-letter code, with a blank space between them. 

26 * **forcefield** (*list*) - (["protein.ff14SB","DNA.bsc1","gaff"]) Forcefields to be used for the structure generation. Each item should be either a path to a leaprc file or a string with the leaprc file name if the force field is included with Amber (e.g. "/path/to/leaprc.protein.ff14SB" or "protein.ff14SB"). Default values: ["protein.ff14SB","DNA.bsc1","gaff"]. 

27 * **build_library** (*bool*) - (False) Generate AMBER lib file for the structure. 

28 * **binary_path** (*str*) - ("tleap") Path to the tleap executable binary. 

29 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

30 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

31 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

32 * **container_path** (*str*) - (None) Container path definition. 

33 * **container_image** (*str*) - ('afandiadib/ambertools:serial') Container image definition. 

34 * **container_volume_path** (*str*) - ('/tmp') Container volume path definition. 

35 * **container_working_dir** (*str*) - (None) Container working directory definition. 

36 * **container_user_id** (*str*) - (None) Container user_id definition. 

37 * **container_shell_path** (*str*) - ('/bin/bash') Path to default shell inside the container. 

38 

39 Examples: 

40 This is a use example of how to use the building block from Python:: 

41 

42 from biobb_amber.leap.leap_build_linear_structure import leap_build_linear_structure 

43 prop = { 

44 'sequence' : 'ALA PRO SER ARG LYS ASP GLU GLY GLY ALA', 

45 'build_library': False, 

46 'forcefield': ['protein.ff14SB'] 

47 } 

48 leap_build_linear_structure(output_pdb_path='/path/to/newStructure.pdb', 

49 properties=prop) 

50 

51 

52 Info: 

53 * wrapped_software: 

54 * name: AmberTools tLeap 

55 * version: >20.9 

56 * license: LGPL 2.1 

57 * ontology: 

58 * name: EDAM 

59 * schema: http://edamontology.org/EDAM.owl 

60 

61 """ 

62 

63 def __init__(self, output_pdb_path, properties, **kwargs): 

64 properties = properties or {} 

65 

66 # Call parent class constructor 

67 super().__init__(properties) 

68 self.locals_var_dict = locals().copy() 

69 

70 # Input/Output files 

71 self.io_dict = {"in": {}, "out": {"output_pdb_path": output_pdb_path}} 

72 

73 # Set default forcefields 

74 amber_home_path = os.getenv("AMBERHOME") 

75 protein_ff14SB_path = os.path.join(amber_home_path, 'dat', 'leap', 'cmd', 'leaprc.protein.ff14SB') 

76 dna_bsc1_path = os.path.join(amber_home_path, 'dat', 'leap', 'cmd', 'leaprc.DNA.bsc1') 

77 gaff_path = os.path.join(amber_home_path, 'dat', 'leap', 'cmd', 'leaprc.gaff') 

78 

79 # Properties specific for BB 

80 self.properties = properties 

81 self.sequence = properties.get("sequence", "ALA GLY SER PRO ARG ALA PRO GLY") 

82 self.forcefield = _from_string_to_list( 

83 properties.get("forcefield", [protein_ff14SB_path, dna_bsc1_path, gaff_path]) 

84 ) 

85 # Find the paths of the leaprc files if only the force field names are provided 

86 self.forcefield = self.find_leaprc_paths(self.forcefield) 

87 self.build_library = properties.get("build_library", False) 

88 self.binary_path = properties.get("binary_path", "tleap") 

89 

90 # Check the properties 

91 self.check_properties(properties) 

92 self.check_arguments() 

93 

94 def check_data_params(self, out_log, err_log): 

95 """Checks input/output paths correctness""" 

96 

97 # Check output(s) 

98 self.io_dict["out"]["output_pdb_path"] = check_output_path( 

99 self.io_dict["out"]["output_pdb_path"], 

100 "output_pdb_path", 

101 False, 

102 out_log, 

103 self.__class__.__name__, 

104 ) 

105 

106 def find_leaprc_paths(self, forcefields: List[str]) -> List[str]: 

107 """ 

108 Find the leaprc paths for the force fields provided. 

109 

110 For each item in the forcefields list, the function checks if the str is a path to an existing file. 

111 If not, it tries to find the file in the $AMBERHOME/dat/leap/cmd/ directory or the $AMBERHOME/dat/leap/cmd/oldff/ 

112 directory with and without the leaprc prefix. 

113 

114 Args: 

115 forcefields (List[str]): List of force fields to find the leaprc files for. 

116 

117 Returns: 

118 List[str]: List of leaprc file paths. 

119 """ 

120 

121 leaprc_paths = [] 

122 

123 for forcefield in forcefields: 

124 

125 num_paths = len(leaprc_paths) 

126 

127 # Check if the forcefield is a path to an existing file 

128 if os.path.exists(forcefield): 

129 leaprc_paths.append(forcefield) 

130 continue 

131 

132 # Check if the forcefield is in the leaprc directory 

133 leaprc_path = os.path.join(os.environ.get('AMBERHOME', ''), 'dat', 'leap', 'cmd', f"leaprc.{forcefield}") 

134 if os.path.exists(leaprc_path): 

135 leaprc_paths.append(leaprc_path) 

136 continue 

137 

138 # Check if the forcefield is in the oldff directory 

139 leaprc_path = os.path.join(os.environ.get('AMBERHOME', ''), 'dat', 'leap', 'cmd', 'oldff', f"leaprc.{forcefield}") 

140 if os.path.exists(leaprc_path): 

141 leaprc_paths.append(leaprc_path) 

142 continue 

143 

144 # Check if the forcefield is in the leaprc directory without the leaprc prefix 

145 leaprc_path = os.path.join(os.environ.get('AMBERHOME', ''), 'dat', 'leap', 'cmd', f"{forcefield}") 

146 if os.path.exists(leaprc_path): 

147 leaprc_paths.append(leaprc_path) 

148 continue 

149 

150 # Check if the forcefield is in the oldff directory without the leaprc prefix 

151 leaprc_path = os.path.join(os.environ.get('AMBERHOME', ''), 'dat', 'leap', 'cmd', 'oldff', f"{forcefield}") 

152 if os.path.exists(leaprc_path): 

153 leaprc_paths.append(leaprc_path) 

154 continue 

155 

156 new_num_paths = len(leaprc_paths) 

157 

158 if new_num_paths == num_paths: 

159 raise ValueError(f"Force field {forcefield} not found. Check the $AMBERHOME/dat/leap/cmd/ directory for available force fields or provide the path to an existing leaprc file.") 

160 

161 return leaprc_paths 

162 

163 @launchlogger 

164 def launch(self): 

165 """Launches the execution of the LeapBuildLinearStructure module.""" 

166 

167 # check input/output paths and parameters 

168 self.check_data_params(self.out_log, self.err_log) 

169 

170 # Setup Biobb 

171 if self.check_restart(): 

172 return 0 

173 self.stage_files() 

174 

175 # create .in file 

176 # TC5b = sequence { NASN LEU TYR ILE GLN TRP LEU LYS ASP GLY GLY PRO SER SER GLY ARG PRO PRO PRO CSER } 

177 # savepdb TC5b TC5b_linear.pdb 

178 # quit 

179 

180 # Creating temporary folder & Leap configuration (instructions) file 

181 if self.container_path: 

182 instructions_file = str( 

183 PurePath(self.stage_io_dict["unique_dir"]).joinpath("leap.in") 

184 ) 

185 instructions_file_path = str( 

186 PurePath(self.container_volume_path).joinpath("leap.in") 

187 ) 

188 self.tmp_folder = None 

189 else: 

190 self.tmp_folder = fu.create_unique_dir() 

191 instructions_file = str(PurePath(self.tmp_folder).joinpath("leap.in")) 

192 fu.log("Creating %s temporary folder" % self.tmp_folder, self.out_log) 

193 instructions_file_path = instructions_file 

194 

195 # instructions_file = str(PurePath(self.tmp_folder).joinpath("leap.in")) 

196 with open(instructions_file, "w") as leapin: 

197 # Forcefields loaded from input forcefield property 

198 for t in self.forcefield: 

199 leapin.write("source {}\n".format(t)) 

200 

201 leapin.write("struct = sequence {" + self.sequence + " } \n") 

202 leapin.write( 

203 "savepdb struct " + self.stage_io_dict["out"]["output_pdb_path"] + "\n" 

204 ) 

205 leapin.write("quit \n") 

206 

207 # Command line 

208 self.cmd = [self.binary_path, "-f", instructions_file_path] 

209 

210 # Run Biobb block 

211 self.run_biobb() 

212 

213 # Copy files to host 

214 self.copy_to_host() 

215 

216 # remove temporary folder(s) 

217 self.tmp_files.extend([str(self.tmp_folder), "leap.log"]) 

218 self.remove_tmp_files() 

219 

220 self.check_arguments(output_files_created=True, raise_exception=False) 

221 

222 return self.return_code 

223 

224 

225def leap_build_linear_structure( 

226 output_pdb_path: str, properties: Optional[dict] = None, **kwargs 

227) -> int: 

228 """Create the :class:`LeapBuildLinearStructure <leap.leap_build_linear_structure.LeapBuildLinearStructure>` class and 

229 execute the :meth:`launch() <leap.leap_build_linear_structure.LeapBuildLinearStructure.launch>` method.""" 

230 return LeapBuildLinearStructure(**dict(locals())).launch() 

231 

232 

233leap_build_linear_structure.__doc__ = LeapBuildLinearStructure.__doc__ 

234main = LeapBuildLinearStructure.get_main(leap_build_linear_structure, "Building a linear (unfolded) 3D structure from an AA sequence.") 

235 

236if __name__ == "__main__": 

237 main()