Coverage for biobb_dna/curvesplus/biobb_canion.py: 81%

85 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-07 09:06 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Canion class and the command line interface.""" 

4import os 

5import zipfile 

6import argparse 

7import shutil 

8from pathlib import Path 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.configuration import settings 

11from biobb_common.tools import file_utils as fu 

12from biobb_common.tools.file_utils import launchlogger 

13 

14 

15class Canion(BiobbObject): 

16 """ 

17 | biobb_dna Canion 

18 | Wrapper for the Canion executable that is part of the Curves+ software suite. 

19 

20 Args: 

21 input_cdi_path (str): Trajectory input file. File type: input. `Sample file <https://mmb.irbbarcelona.org/biobb-dev/biobb-api/public/samples/THGA_K.cdi>`_. Accepted formats: cdi (edam:format_2330). 

22 input_afr_path (str): Helical axis frames corresponding to the input conformation to be analyzed. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/THGA.afr>`_. Accepted formats: afr (edam:format_2330). 

23 input_avg_struc_path (str): Average DNA conformation. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/THGA_avg.pdb>`_. Accepted formats: pdb (edam:format_1476). 

24 output_zip_path (str) (Optional): Filename for .zip files containing Canion output files. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canion_output.zip>`_. Accepted formats: zip (edam:format_3987). 

25 properties (dict): 

26 * **bases** (*str*) - (None) Sequence of bases to be analyzed (default is blank, meaning no specified sequence). 

27 * **type** (*str*) - ('*') Ions (or atoms) to be analyzed. Options are 'Na+', 'K', 'K+', 'Cl', 'Cl-', 'CL', 'P', 'C1*', 'NH1', 'NH2', 'NZ', '1' for all cations, '-1' for all anions, '0' for neutral species or '*' for all available data. 

28 * **dlow** (*float*) - (0) Select starting segment of the oglimer to analyze. If both dhig and dlow are 0, entire oglimer is analyzed. 

29 * **dhig** (*float*) - (0) Select ending segment of the oglimer to analyze, being the maximum value the total number of base pairs in the oligomer. If both dhig and dlow are 0, entire oglimer is analyzed. 

30 * **rlow** (*float*) - (0) Minimal distances from the helical axis taken into account in the analysis. 

31 * **rhig** (*float*) - (0) Maximal distances from the helical axis taken into account in the analysis. 

32 * **alow** (*float*) - (0) Minimal angle range to analyze. 

33 * **ahig** (*float*) - (360) Maximal angle range to analyze. 

34 * **itst** (*int*) - (None) Number of first snapshot to be analyzed. 

35 * **itnd** (*int*) - (None) Number of last snapshot to be analyzed. 

36 * **itdel** (*int*) - (None) Spacing between analyzed snapshots. 

37 * **rmsf** (*bool*) - (False) If set to True uses the combination of the helical ion parameters and an average helical axis to map the ions into Cartesian space and then calculates their average position (pdb output) and their root mean square fluctuation values (rmsf output). A single pass rmsf algorithm to make this calculation possible with a single read of the trajectory file. This option is generally used for solute atoms and not for solvent molecules or ions. 

38 * **circ** (*bool*) - (False) If set to True, minicircles are analyzed. 

39 * **binary_path** (*str*) - (Canion) Path to Canion executable, otherwise the program wil look for Canion executable in the binaries folder. 

40 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

41 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

42 Examples: 

43 This is a use example of how to use the building block from Python:: 

44 

45 from biobb_dna.curvesplus.biobb_canion import biobb_canion 

46 prop = { 

47 'type': 'K+', 

48 'bases': 'G' 

49 } 

50 biobb_canion( 

51 input_cdi_path='/path/to/input.cdi', 

52 input_afr_path='/path/to/input.afr', 

53 input_avg_struc_path='/path/to/input.pdb', 

54 output_zip_path='/path/to/output.zip', 

55 properties=prop) 

56 Info: 

57 * wrapped_software: 

58 * name: Canion 

59 * version: >=2.6 

60 * license: BSD 3-Clause 

61 * ontology: 

62 * name: EDAM 

63 * schema: http://edamontology.org/EDAM.owl 

64 """ 

65 

66 def __init__( 

67 self, input_cdi_path, input_afr_path, input_avg_struc_path, 

68 output_zip_path=None, properties=None, **kwargs) -> None: 

69 properties = properties or {} 

70 

71 # Call parent class constructor 

72 super().__init__(properties) 

73 self.locals_var_dict = locals().copy() 

74 

75 # Input/Output files 

76 self.io_dict = { 

77 'in': { 

78 'input_cdi_path': input_cdi_path, 

79 'input_afr_path': input_afr_path, 

80 'input_avg_struc_path': input_avg_struc_path, 

81 }, 

82 'out': { 

83 'output_zip_path': output_zip_path 

84 } 

85 } 

86 

87 # Properties specific for BB 

88 self.binary_path = properties.get('binary_path', 'Canion') 

89 self.bases = properties.get('bases', None) 

90 self.type = properties.get('type', '*') 

91 self.dlow = properties.get('dlow', 0) 

92 self.dhig = properties.get('dhig', 0) 

93 self.rlow = properties.get('rlow', 0) 

94 self.rhig = properties.get('rhig', 0) 

95 self.alow = properties.get('alow', 0) 

96 self.ahig = properties.get('ahig', 360) 

97 self.itst = properties.get('itst', None) 

98 self.itnd = properties.get('itnd', None) 

99 self.itdel = properties.get('itdel', None) 

100 self.rmsf = properties.get('rmsf', '.f.') 

101 self.circ = properties.get('circ', '.f.') 

102 self.properties = properties 

103 

104 # Check the properties 

105 self.check_properties(properties) 

106 self.check_arguments() 

107 

108 @launchlogger 

109 def launch(self) -> int: 

110 """Execute the :class:`Canion <biobb_dna.curvesplus.biobb_canion.Canion>` object.""" 

111 

112 # Setup Biobb 

113 if self.check_restart(): 

114 return 0 

115 self.stage_files() 

116 

117 ion_type_options = [ 

118 'Na+', 

119 'K', 

120 'K+', 

121 'Cl', 

122 'Cl-', 

123 'CL', 

124 'P', 

125 'C1*', 

126 'NH1', 

127 'NH2', 

128 'NZ', 

129 '1', 

130 '-1', 

131 '0', 

132 '*' 

133 ] 

134 if self.type not in ion_type_options: 

135 raise ValueError(("Invalid value for property type! " 

136 f"Option include: {ion_type_options}")) 

137 

138 # Creating temporary folder 

139 self.tmp_folder = fu.create_unique_dir(prefix="canion_") 

140 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) 

141 

142 # copy input files to temporary folder 

143 shutil.copy(self.io_dict['in']['input_cdi_path'], self.tmp_folder) 

144 shutil.copy(self.io_dict['in']['input_afr_path'], self.tmp_folder) 

145 shutil.copy( 

146 self.io_dict['in']['input_avg_struc_path'], self.tmp_folder) 

147 input_cdi_file = Path(self.io_dict['in']['input_cdi_path']).name 

148 input_afr_file = Path(self.io_dict['in']['input_afr_path']).name 

149 input_avg_struc = Path(self.io_dict['in']['input_avg_struc_path']).name 

150 

151 # change directory to temporary folder 

152 original_directory = os.getcwd() 

153 os.chdir(self.tmp_folder) 

154 

155 # create intructions 

156 instructions = [ 

157 f"{self.binary_path} <<! ", 

158 "&inp", 

159 " lis=canion_output,", 

160 f" dat={input_cdi_file[:-4]},", 

161 f" axfrm={input_afr_file[:-4]},", 

162 f" solute={input_avg_struc[:-4]},", 

163 f" type={self.type},", 

164 f" dlow={self.dlow},", 

165 f" dhig={self.dhig},", 

166 f" rlow={self.rlow},", 

167 f" rhig={self.rhig},", 

168 f" alow={self.alow},", 

169 f" ahig={self.ahig},"] 

170 if self.bases is not None: 

171 # add topology file if needed 

172 fu.log('Appending sequence of bases to be searched to command', 

173 self.out_log, self.global_log) 

174 instructions.append(f" seq={self.bases},") 

175 instructions.append("&end") 

176 instructions.append("!") 

177 self.cmd = ["\n".join(instructions)] 

178 

179 fu.log('Creating command line with instructions and required arguments', 

180 self.out_log, self.global_log) 

181 # Run Biobb block 

182 self.run_biobb() 

183 

184 # change back to original directory 

185 os.chdir(original_directory) 

186 

187 # create zipfile and write output inside 

188 zf = zipfile.ZipFile( 

189 Path(self.io_dict["out"]["output_zip_path"]), 

190 "w") 

191 for curves_outfile in Path(self.tmp_folder).glob("canion_output*"): 

192 zf.write(curves_outfile, arcname=curves_outfile.name) 

193 zf.close() 

194 

195 # Remove temporary file(s) 

196 self.tmp_files.extend([ 

197 self.stage_io_dict.get("unique_dir"), 

198 self.tmp_folder 

199 ]) 

200 self.remove_tmp_files() 

201 

202 self.check_arguments(output_files_created=True, raise_exception=False) 

203 

204 return self.return_code 

205 

206 

207def biobb_canion( 

208 input_cdi_path: str, input_afr_path: str, input_avg_struc_path: str, 

209 output_zip_path: str = None, properties: dict = None, **kwargs) -> int: 

210 """Create :class:`Canion <biobb_dna.curvesplus.biobb_canion.Canion>` class and 

211 execute the :meth:`launch() <biobb_dna.curvesplus.biobb_canion.Canion.launch>` method.""" 

212 

213 return Canion( 

214 input_cdi_path=input_cdi_path, 

215 input_afr_path=input_afr_path, 

216 input_avg_struc_path=input_avg_struc_path, 

217 output_zip_path=output_zip_path, 

218 properties=properties, **kwargs).launch() 

219 

220 

221def main(): 

222 """Command line execution of this building block. Please check the command line documentation.""" 

223 parser = argparse.ArgumentParser(description='Execute Canion form the Curves+ software suite.', 

224 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

225 parser.add_argument('--config', required=False, help='Configuration file') 

226 

227 required_args = parser.add_argument_group('required arguments') 

228 required_args.add_argument('--input_cdi_path', required=True, 

229 help='Ion position data file. Accepted formats: cdi.') 

230 required_args.add_argument('--input_afr_path', required=True, 

231 help='Helical axis frames data. Accepted formats: afr.') 

232 required_args.add_argument('--input_avg_struc_path', required=True, 

233 help='Average DNA conformation fike file. Accepted formats: pdb.') 

234 parser.add_argument('--output_zip_path', required=False, 

235 help='Filename to give to output files. Accepted formats: zip.') 

236 

237 args = parser.parse_args() 

238 args.config = args.config or "{}" 

239 properties = settings.ConfReader(config=args.config).get_prop_dic() 

240 

241 biobb_canion( 

242 input_cdi_path=args.input_cdi_path, 

243 input_afr_path=args.input_afr_path, 

244 input_avg_struc_path=args.input_avg_struc_path, 

245 output_zip_path=args.output_zip_path, 

246 properties=properties) 

247 

248 

249if __name__ == '__main__': 

250 main()