Coverage for biobb_dna/curvesplus/biobb_curves.py: 77%

99 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-07 09:06 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Curves class and the command line interface.""" 

4import os 

5import zipfile 

6import argparse 

7import shutil 

8from pathlib import Path 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.configuration import settings 

11from biobb_common.tools import file_utils as fu 

12from biobb_common.tools.file_utils import launchlogger 

13 

14 

15class Curves(BiobbObject): 

16 """ 

17 | biobb_dna Curves 

18 | Wrapper for the Cur+ executable that is part of the Curves+ software suite. 

19 

20 Args: 

21 input_struc_path (str): Trajectory or PDB input file. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/structure.stripped.trj>`_. Accepted formats: trj (edam:format_3910), pdb (edam:format_1476), netcdf (edam:format_3650), nc (edam:format_3650). 

22 input_top_path (str) (Optional): Topology file, needed along with .trj file (optional). File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/structure.stripped.top>`_. Accepted formats: top (edam:format_3881). 

23 output_cda_path (str): Filename for Curves+ output .cda file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.cda>`_. Accepted formats: cda (edam:format_2330). 

24 output_lis_path (str): Filename for Curves+ output .lis file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.lis>`_. Accepted formats: lis (edam:format_2330). 

25 output_zip_path (str) (Optional): Filename for .zip files containing Curves+ output that is not .cda or .lis files. File type: output. Accepted formats: zip (edam:format_3987). 

26 properties (dict): 

27 * **s1range** (*str*) - (None) Range of first strand. Must be specified in the form "start:end". 

28 * **s2range** (*str*) - (None) Range of second strand. Must be specified in the form "start:end". 

29 * **stdlib_path** (*str*) - ('standard') Path to Curves' standard library files for nucleotides. If not specified will look for 'standard' files in current directory. 

30 * **itst** (*int*) - (0) Iteration start index. 

31 * **itnd** (*int*) - (0) Iteration end index. 

32 * **itdel** (*int*) - (1) Iteration delimiter. 

33 * **ions** (*bool*) - (False) If True, helicoidal analysis of ions (or solvent molecules) around solute is carried out. 

34 * **test** (*bool*) - (False) If True, provide addition output in .lis file on fitting and axis generation. 

35 * **line** (*bool*) - (False) if True, find the best linear helical axis. 

36 * **fit** (*bool*) - (True) if True, fit a standard bases to the input coordinates (important for MD snapshots to avoid base distortions leading to noisy helical parameters). 

37 * **axfrm** (*bool*) - (False) if True, generates closely spaced helical axis frames as input for Canal and Canion. 

38 * **binary_path** (*str*) - (Cur+) Path to Curves+ executable, otherwise the program wil look for Cur+ executable in the binaries folder. 

39 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

40 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

41 Examples: 

42 This is a use example of how to use the building block from Python:: 

43 

44 from biobb_dna.curvesplus.biobb_curves import biobb_curves 

45 prop = { 

46 's1range': '1:12', 

47 's2range': '24:13', 

48 } 

49 biobb_curves( 

50 input_struc_path='/path/to/structure/file.trj', 

51 input_top_path='/path/to/topology/file.top', 

52 output_cda_path='/path/to/output/file.cda', 

53 output_lis_path='/path/to/output/file.lis', 

54 properties=prop) 

55 Info: 

56 * wrapped_software: 

57 * name: Curves 

58 * version: >=2.6 

59 * license: BSD 3-Clause 

60 * ontology: 

61 * name: EDAM 

62 * schema: http://edamontology.org/EDAM.owl 

63 """ 

64 

65 def __init__( 

66 self, input_struc_path, output_lis_path, 

67 output_cda_path, output_zip_path=None, 

68 input_top_path=None, properties=None, **kwargs) -> None: 

69 properties = properties or {} 

70 

71 # Call parent class constructor 

72 super().__init__(properties) 

73 self.locals_var_dict = locals().copy() 

74 

75 # Input/Output files 

76 self.io_dict = { 

77 'in': { 

78 'input_struc_path': input_struc_path, 

79 'input_top_path': input_top_path 

80 }, 

81 'out': { 

82 'output_lis_path': output_lis_path, 

83 'output_cda_path': output_cda_path, 

84 'output_zip_path': output_zip_path 

85 } 

86 } 

87 

88 # Properties specific for BB 

89 self.s1range = properties.get('s1range', None) 

90 self.binary_path = properties.get('binary_path', 'Cur+') 

91 self.stdlib_path = properties.get('stdlib_path', None) 

92 self.s2range = properties.get('s2range', None) 

93 self.itst = properties.get('itst', 0) 

94 self.itnd = properties.get('itnd', 0) 

95 self.itdel = properties.get('itdel', 1) 

96 self.ions = properties.get('ions', '.f.') 

97 self.test = properties.get('test', '.f.') 

98 self.line = properties.get('line', '.f.') 

99 self.fit = properties.get('fit', '.t.') 

100 self.axfrm = properties.get('axfrm', '.f.') 

101 self.properties = properties 

102 

103 # Check the properties 

104 self.check_properties(properties) 

105 self.check_arguments() 

106 

107 @launchlogger 

108 def launch(self) -> int: 

109 """Execute the :class:`Curves <biobb_dna.curvesplus.biobb_curves.Curves>` object.""" 

110 

111 # Setup Biobb 

112 if self.check_restart(): 

113 return 0 

114 self.stage_files() 

115 

116 if self.s1range is None: 

117 raise ValueError("property 's1range' must be specified!") 

118 if self.s2range is None: 

119 # compute s2range if not provided 

120 range1_end = int(self.s1range.split(":")[1]) 

121 s2start = range1_end + 1 

122 s2end = 2 * range1_end 

123 self.s2range = f"{s2end}:{s2start}" 

124 

125 # check standard library files location if not provided 

126 if self.stdlib_path is None: 

127 if os.getenv("CONDA_PREFIX", False): 

128 curves_aux_path = Path( 

129 os.getenv("CONDA_PREFIX")) / ".curvesplus" 

130 # check if .curvesplus directory is in $CONDA_PREFIX 

131 if curves_aux_path.exists(): 

132 if len(list(curves_aux_path.glob("standard_*.lib"))) != 3: 

133 raise FileNotFoundError( 

134 "One or all standard library files " 

135 f"missing from {curves_aux_path}! " 

136 "Check files standard_b.lib, " 

137 "standard_s.lib and standard_i.lib exist.") 

138 self.stdlib_path = curves_aux_path / "standard" 

139 else: 

140 raise FileNotFoundError( 

141 ".curvesplus directory not found in " 

142 f"{os.getenv('CONDA_PREFIX')} !" 

143 "Please indicate where standard_*.lib files are " 

144 "located with the stdlib_path property.") 

145 else: 

146 # CONDA_PREFIX undefined 

147 self.stdlib_path = Path.cwd() / "standard" 

148 

149 # Creating temporary folder 

150 self.tmp_folder = fu.create_unique_dir(prefix="curves_") 

151 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) 

152 

153 # copy input files to temporary folder 

154 shutil.copy(self.io_dict['in']['input_struc_path'], self.tmp_folder) 

155 tmp_struc_input = Path(self.io_dict['in']['input_struc_path']).name 

156 if self.io_dict['in']['input_top_path'] is not None: 

157 shutil.copy(self.io_dict['in']['input_top_path'], self.tmp_folder) 

158 tmp_top_input = Path(self.io_dict['in']['input_top_path']).name 

159 

160 # change directory to temporary folder 

161 original_directory = os.getcwd() 

162 os.chdir(self.tmp_folder) 

163 

164 # create intructions 

165 instructions = [ 

166 f"{self.binary_path} <<! ", 

167 "&inp", 

168 f" file={tmp_struc_input},"] 

169 if self.io_dict['in']['input_top_path'] is not None: 

170 # add topology file if needed 

171 fu.log('Appending provided topology to command', 

172 self.out_log, self.global_log) 

173 instructions.append( 

174 f" ftop={tmp_top_input},") 

175 

176 # create intructions 

177 instructions = instructions + [ 

178 " lis='curves_output',", 

179 f" lib={self.stdlib_path},", 

180 f" ions={self.ions},", 

181 f" test={self.test},", 

182 f" line={self.line},", 

183 f" fit={self.fit},", 

184 f" axfrm={self.axfrm},", 

185 f" itst={self.itst},itnd={self.itnd},itdel={self.itdel},", 

186 "&end", 

187 "2 1 -1 0 0", 

188 f"{self.s1range}", 

189 f"{self.s2range}", 

190 "!" 

191 ] 

192 self.cmd = ["\n".join(instructions)] 

193 fu.log('Creating command line with instructions and required arguments', 

194 self.out_log, self.global_log) 

195 

196 # Run Biobb block 

197 self.run_biobb() 

198 

199 # change back to original directory 

200 os.chdir(original_directory) 

201 

202 # create zipfile and write output inside 

203 if self.io_dict["out"]["output_zip_path"] is not None: 

204 zf = zipfile.ZipFile( 

205 Path(self.io_dict["out"]["output_zip_path"]), 

206 "w") 

207 for curves_outfile in Path(self.tmp_folder).glob("curves_output*"): 

208 if curves_outfile.suffix not in (".cda", ".lis"): 

209 zf.write( 

210 curves_outfile, 

211 arcname=curves_outfile.name) 

212 zf.close() 

213 

214 # rename cda and lis files 

215 (Path(self.tmp_folder) / "curves_output.cda").rename( 

216 self.io_dict["out"]["output_cda_path"]) 

217 (Path(self.tmp_folder) / "curves_output.lis").rename( 

218 self.io_dict["out"]["output_lis_path"]) 

219 

220 # Remove temporary file(s) 

221 self.tmp_files.extend([ 

222 self.stage_io_dict.get("unique_dir"), 

223 self.tmp_folder 

224 ]) 

225 self.remove_tmp_files() 

226 

227 self.check_arguments(output_files_created=True, raise_exception=False) 

228 

229 return self.return_code 

230 

231 

232def biobb_curves( 

233 input_struc_path: str, output_lis_path: str, output_cda_path: str, 

234 input_top_path: str = None, output_zip_path: str = None, 

235 properties: dict = None, **kwargs) -> int: 

236 """Create :class:`Curves <biobb_dna.curvesplus.biobb_curves.Curves>` class and 

237 execute the :meth:`launch() <biobb_dna.curvesplus.biobb_curves.Curves.launch>` method.""" 

238 

239 return Curves( 

240 input_struc_path=input_struc_path, 

241 input_top_path=input_top_path, 

242 output_lis_path=output_lis_path, 

243 output_cda_path=output_cda_path, 

244 output_zip_path=output_zip_path, 

245 properties=properties, **kwargs).launch() 

246 

247 

248def main(): 

249 """Command line execution of this building block. Please check the command line documentation.""" 

250 parser = argparse.ArgumentParser(description='Execute Cur+ form the Curves+ software suite.', 

251 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

252 parser.add_argument('--config', required=False, help='Configuration file') 

253 

254 required_args = parser.add_argument_group('required arguments') 

255 required_args.add_argument('--input_struc_path', required=True, 

256 help='Trajectory or PDB input file. Accepted formats: trj, pdb.') 

257 required_args.add_argument('--output_cda_path', required=True, 

258 help='Filename to give to output .cda file. Accepted formats: str.') 

259 required_args.add_argument('--output_lis_path', required=True, 

260 help='Filename to give to output .lis file. Accepted formats: str.') 

261 parser.add_argument('--input_top_path', required=False, 

262 help='Topology file, needed along with .trj file (optional). Accepted formats: top.') 

263 parser.add_argument('--output_zip_path', required=False, 

264 help='Filename to give to output files (except .cda and .lis files). Accepted formats: str.') 

265 

266 args = parser.parse_args() 

267 args.config = args.config or "{}" 

268 properties = settings.ConfReader(config=args.config).get_prop_dic() 

269 

270 biobb_curves( 

271 input_struc_path=args.input_struc_path, 

272 input_top_path=args.input_top_path, 

273 output_cda_path=args.output_cda_path, 

274 output_lis_path=args.output_lis_path, 

275 output_zip_path=args.output_zip_path, 

276 properties=properties) 

277 

278 

279if __name__ == '__main__': 

280 main()