Coverage for biobb_dna/curvesplus/biobb_canion.py: 79%

81 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-28 10:36 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Canion class and the command line interface.""" 

4import os 

5import zipfile 

6import argparse 

7from typing import Optional 

8from pathlib import Path 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.configuration import settings 

11from biobb_common.tools import file_utils as fu 

12from biobb_common.tools.file_utils import launchlogger 

13 

14 

15class Canion(BiobbObject): 

16 """ 

17 | biobb_dna Canion 

18 | Wrapper for the Canion executable that is part of the Curves+ software suite. 

19 | Analyzes the trajectory of ions around a DNA molecule. 

20 

21 Args: 

22 input_cdi_path (str): Trajectory input file. File type: input. `Sample file <https://mmb.irbbarcelona.org/biobb-dev/biobb-api/public/samples/THGA_K.cdi>`_. Accepted formats: cdi (edam:format_2330). 

23 input_afr_path (str): Helical axis frames corresponding to the input conformation to be analyzed. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/THGA.afr>`_. Accepted formats: afr (edam:format_2330). 

24 input_avg_struc_path (str): Average DNA conformation. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/THGA_avg.pdb>`_. Accepted formats: pdb (edam:format_1476). 

25 output_zip_path (str): Filename for .zip files containing Canion output files. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canion_output.zip>`_. Accepted formats: zip (edam:format_3987). 

26 properties (dict): 

27 * **bases** (*str*) - (None) Sequence of bases to be analyzed (default is blank, meaning no specified sequence). 

28 * **type** (*str*) - ('*') Ions (or atoms) to be analyzed. Options are 'Na+', 'K', 'K+', 'Cl', 'Cl-', 'CL', 'P', 'C1*', 'NH1', 'NH2', 'NZ', '1' for all cations, '-1' for all anions, '0' for neutral species or '*' for all available data. 

29 * **dlow** (*float*) - (0) Select starting segment of the oglimer to analyze. If both dhig and dlow are 0, entire oglimer is analyzed. 

30 * **dhig** (*float*) - (0) Select ending segment of the oglimer to analyze, being the maximum value the total number of base pairs in the oligomer. If both dhig and dlow are 0, entire oglimer is analyzed. 

31 * **rlow** (*float*) - (0) Minimal distances from the helical axis taken into account in the analysis. 

32 * **rhig** (*float*) - (0) Maximal distances from the helical axis taken into account in the analysis. 

33 * **alow** (*float*) - (0) Minimal angle range to analyze. 

34 * **ahig** (*float*) - (360) Maximal angle range to analyze. 

35 * **itst** (*int*) - (0) Number of first snapshot to be analyzed. 

36 * **itnd** (*int*) - (0) Number of last snapshot to be analyzed. 

37 * **itdel** (*int*) - (1) Spacing between analyzed snapshots. 

38 * **rmsf** (*bool*) - (False) If set to True uses the combination of the helical ion parameters and an average helical axis to map the ions into Cartesian space and then calculates their average position (pdb output) and their root mean square fluctuation values (rmsf output). A single pass rmsf algorithm to make this calculation possible with a single read of the trajectory file. This option is generally used for solute atoms and not for solvent molecules or ions. 

39 * **circ** (*bool*) - (False) If set to True, minicircles are analyzed. 

40 * **binary_path** (*str*) - (Canion) Path to Canion executable, otherwise the program wil look for Canion executable in the binaries folder. 

41 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

42 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

43 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

44 Examples: 

45 This is a use example of how to use the building block from Python:: 

46 

47 from biobb_dna.curvesplus.biobb_canion import biobb_canion 

48 prop = { 

49 'type': 'K+', 

50 'bases': 'G' 

51 } 

52 biobb_canion( 

53 input_cdi_path='/path/to/input.cdi', 

54 input_afr_path='/path/to/input.afr', 

55 input_avg_struc_path='/path/to/input.pdb', 

56 output_zip_path='/path/to/output.zip', 

57 properties=prop) 

58 Info: 

59 * wrapped_software: 

60 * name: Canion 

61 * version: >=2.6 

62 * license: BSD 3-Clause 

63 * ontology: 

64 * name: EDAM 

65 * schema: http://edamontology.org/EDAM.owl 

66 """ 

67 

68 def __init__( 

69 self, input_cdi_path, input_afr_path, input_avg_struc_path, 

70 output_zip_path=None, properties=None, **kwargs) -> None: 

71 properties = properties or {} 

72 

73 # Call parent class constructor 

74 super().__init__(properties) 

75 self.locals_var_dict = locals().copy() 

76 

77 # Input/Output files 

78 self.io_dict = { 

79 'in': { 

80 'input_cdi_path': input_cdi_path, 

81 'input_afr_path': input_afr_path, 

82 'input_avg_struc_path': input_avg_struc_path, 

83 }, 

84 'out': { 

85 'output_zip_path': output_zip_path 

86 } 

87 } 

88 

89 # Properties specific for BB 

90 self.binary_path = properties.get('binary_path', 'Canion') 

91 self.bases = properties.get('bases', None) 

92 self.type = properties.get('type', '*') 

93 self.dlow = properties.get('dlow', 0) 

94 self.dhig = properties.get('dhig', 0) 

95 self.rlow = properties.get('rlow', 0) 

96 self.rhig = properties.get('rhig', 0) 

97 self.alow = properties.get('alow', 0) 

98 self.ahig = properties.get('ahig', 360) 

99 self.itst = properties.get('itst', 0) 

100 self.itnd = properties.get('itnd', 0) 

101 self.itdel = properties.get('itdel', 1) 

102 self.rmsf = ".t." if properties.get('rmsf', False) else ".f." 

103 self.circ = ".t." if properties.get('circ', False) else ".f." 

104 self.properties = properties 

105 

106 # Check the properties 

107 self.check_properties(properties) 

108 self.check_arguments() 

109 

110 @launchlogger 

111 def launch(self) -> int: 

112 """Execute the :class:`Canion <biobb_dna.curvesplus.biobb_canion.Canion>` object.""" 

113 

114 # Setup Biobb 

115 if self.check_restart(): 

116 return 0 

117 self.stage_files() 

118 

119 ion_type_options = [ 

120 'Na+', 

121 'K', 

122 'K+', 

123 'Cl', 

124 'Cl-', 

125 'CL', 

126 'P', 

127 'C1*', 

128 'NH1', 

129 'NH2', 

130 'NZ', 

131 '1', 

132 '-1', 

133 '0', 

134 '*' 

135 ] 

136 if self.type not in ion_type_options: 

137 raise ValueError(("Invalid value for property type! " 

138 f"Option include: {ion_type_options}")) 

139 

140 # define temporary file names 

141 input_cdi_file = Path(self.stage_io_dict['in']['input_cdi_path']).name 

142 input_afr_file = Path(self.stage_io_dict['in']['input_afr_path']).name 

143 input_avg_struc = Path(self.stage_io_dict['in']['input_avg_struc_path']).name 

144 

145 # change directory to temporary folder 

146 original_directory = os.getcwd() 

147 os.chdir(self.stage_io_dict.get("unique_dir", "")) 

148 

149 # create intructions 

150 instructions = [ 

151 f"{self.binary_path} <<! ", 

152 "&inp", 

153 " lis=canion_output,", 

154 f" dat={input_cdi_file[:-4]},", 

155 f" axfrm={input_afr_file[:-4]},", 

156 f" solute={input_avg_struc[:-4]},", 

157 f" type={self.type},", 

158 f" dlow={self.dlow},", 

159 f" dhig={self.dhig},", 

160 f" rlow={self.rlow},", 

161 f" rhig={self.rhig},", 

162 f" alow={self.alow},", 

163 f" ahig={self.ahig},", 

164 f" itst={self.itst},", 

165 f" itnd={self.itnd},", 

166 f" itdel={self.itdel},", 

167 f" rmsf={self.rmsf},", 

168 f" circ={self.circ},"] 

169 if self.bases is not None: 

170 # add topology file if needed 

171 fu.log('Appending sequence of bases to be searched to command', 

172 self.out_log, self.global_log) 

173 instructions.append(f" seq={self.bases},") 

174 instructions.append("&end") 

175 instructions.append("!") 

176 self.cmd = ["\n".join(instructions)] 

177 

178 fu.log('Creating command line with instructions and required arguments', 

179 self.out_log, self.global_log) 

180 # Run Biobb block 

181 self.run_biobb() 

182 

183 # change back to original directory 

184 os.chdir(original_directory) 

185 

186 # create zipfile and write output inside 

187 zf = zipfile.ZipFile( 

188 Path(self.stage_io_dict["out"]["output_zip_path"]), 

189 "w") 

190 for curves_outfile in Path(self.stage_io_dict.get("unique_dir", "")).glob("canion_output*"): 

191 if curves_outfile.suffix not in (".zip"): 

192 zf.write(curves_outfile, arcname=curves_outfile.name) 

193 zf.close() 

194 

195 # Copy files to host 

196 self.copy_to_host() 

197 

198 # Remove temporary file(s) 

199 # self.tmp_files.extend([ 

200 # self.stage_io_dict.get("unique_dir", "") 

201 # ]) 

202 self.remove_tmp_files() 

203 

204 self.check_arguments(output_files_created=True, raise_exception=False) 

205 

206 return self.return_code 

207 

208 

209def biobb_canion( 

210 input_cdi_path: str, input_afr_path: str, input_avg_struc_path: str, 

211 output_zip_path: Optional[str] = None, properties: Optional[dict] = None, **kwargs) -> int: 

212 """Create :class:`Canion <biobb_dna.curvesplus.biobb_canion.Canion>` class and 

213 execute the :meth:`launch() <biobb_dna.curvesplus.biobb_canion.Canion.launch>` method.""" 

214 

215 return Canion( 

216 input_cdi_path=input_cdi_path, 

217 input_afr_path=input_afr_path, 

218 input_avg_struc_path=input_avg_struc_path, 

219 output_zip_path=output_zip_path, 

220 properties=properties, **kwargs).launch() 

221 

222 biobb_canion.__doc__ = Canion.__doc__ 

223 

224 

225def main(): 

226 """Command line execution of this building block. Please check the command line documentation.""" 

227 parser = argparse.ArgumentParser(description='Execute Canion form the Curves+ software suite.', 

228 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

229 parser.add_argument('--config', required=False, help='Configuration file') 

230 

231 required_args = parser.add_argument_group('required arguments') 

232 required_args.add_argument('--input_cdi_path', required=True, 

233 help='Ion position data file. Accepted formats: cdi.') 

234 required_args.add_argument('--input_afr_path', required=True, 

235 help='Helical axis frames data. Accepted formats: afr.') 

236 required_args.add_argument('--input_avg_struc_path', required=True, 

237 help='Average DNA conformation fike file. Accepted formats: pdb.') 

238 parser.add_argument('--output_zip_path', required=False, 

239 help='Filename to give to output files. Accepted formats: zip.') 

240 

241 args = parser.parse_args() 

242 args.config = args.config or "{}" 

243 properties = settings.ConfReader(config=args.config).get_prop_dic() 

244 

245 biobb_canion( 

246 input_cdi_path=args.input_cdi_path, 

247 input_afr_path=args.input_afr_path, 

248 input_avg_struc_path=args.input_avg_struc_path, 

249 output_zip_path=args.output_zip_path, 

250 properties=properties) 

251 

252 

253if __name__ == '__main__': 

254 main()