Coverage for biobb_dna / curvesplus / biobb_canion.py: 92%

72 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-22 14:18 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Canion class and the command line interface.""" 

4import os 

5import zipfile 

6from typing import Optional 

7from pathlib import Path 

8from biobb_common.generic.biobb_object import BiobbObject 

9from biobb_common.tools import file_utils as fu 

10from biobb_common.tools.file_utils import launchlogger 

11 

12 

13class Canion(BiobbObject): 

14 """ 

15 | biobb_dna Canion 

16 | Wrapper for the Canion executable that is part of the Curves+ software suite. 

17 | Analyzes the trajectory of ions around a DNA molecule. 

18 

19 Args: 

20 input_cdi_path (str): Trajectory input file. File type: input. `Sample file <https://github.com/bioexcel/biobb_dna/releases/download/assets/THGA_K.cdi>`_. Accepted formats: cdi (edam:format_2330). 

21 input_afr_path (str): Helical axis frames corresponding to the input conformation to be analyzed. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/THGA.afr>`_. Accepted formats: afr (edam:format_2330). 

22 input_avg_struc_path (str): Average DNA conformation. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/THGA_avg.pdb>`_. Accepted formats: pdb (edam:format_1476). 

23 output_zip_path (str): Filename for .zip files containing Canion output files. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canion_output.zip>`_. Accepted formats: zip (edam:format_3987). 

24 properties (dict): 

25 * **bases** (*str*) - (None) Sequence of bases to be analyzed (default is blank, meaning no specified sequence). 

26 * **type** (*str*) - ('*') Ions (or atoms) to be analyzed. Options are 'Na+', 'K', 'K+', 'Cl', 'Cl-', 'CL', 'P', 'C1*', 'NH1', 'NH2', 'NZ', '1' for all cations, '-1' for all anions, '0' for neutral species or '*' for all available data. 

27 * **dlow** (*float*) - (0) Select starting segment of the oglimer to analyze. If both dhig and dlow are 0, entire oglimer is analyzed. 

28 * **dhig** (*float*) - (0) Select ending segment of the oglimer to analyze, being the maximum value the total number of base pairs in the oligomer. If both dhig and dlow are 0, entire oglimer is analyzed. 

29 * **rlow** (*float*) - (0) Minimal distances from the helical axis taken into account in the analysis. 

30 * **rhig** (*float*) - (0) Maximal distances from the helical axis taken into account in the analysis. 

31 * **alow** (*float*) - (0) Minimal angle range to analyze. 

32 * **ahig** (*float*) - (360) Maximal angle range to analyze. 

33 * **itst** (*int*) - (0) Number of first snapshot to be analyzed. 

34 * **itnd** (*int*) - (0) Number of last snapshot to be analyzed. 

35 * **itdel** (*int*) - (1) Spacing between analyzed snapshots. 

36 * **rmsf** (*bool*) - (False) If set to True uses the combination of the helical ion parameters and an average helical axis to map the ions into Cartesian space and then calculates their average position (pdb output) and their root mean square fluctuation values (rmsf output). A single pass rmsf algorithm to make this calculation possible with a single read of the trajectory file. This option is generally used for solute atoms and not for solvent molecules or ions. 

37 * **circ** (*bool*) - (False) If set to True, minicircles are analyzed. 

38 * **binary_path** (*str*) - (Canion) Path to Canion executable, otherwise the program wil look for Canion executable in the binaries folder. 

39 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

40 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

41 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

42 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

43 * **container_image** (*str*) - ("cmip/cmip:latest") Container Image identifier. 

44 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

45 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

46 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

47 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. 

48 Examples: 

49 This is a use example of how to use the building block from Python:: 

50 

51 from biobb_dna.curvesplus.biobb_canion import biobb_canion 

52 prop = { 

53 'type': 'K+', 

54 'bases': 'G' 

55 } 

56 biobb_canion( 

57 input_cdi_path='/path/to/input.cdi', 

58 input_afr_path='/path/to/input.afr', 

59 input_avg_struc_path='/path/to/input.pdb', 

60 output_zip_path='/path/to/output.zip', 

61 properties=prop) 

62 Info: 

63 * wrapped_software: 

64 * name: Canion 

65 * version: >=2.6 

66 * license: BSD 3-Clause 

67 * ontology: 

68 * name: EDAM 

69 * schema: http://edamontology.org/EDAM.owl 

70 """ 

71 

72 def __init__( 

73 self, input_cdi_path, input_afr_path, input_avg_struc_path, 

74 output_zip_path=None, properties=None, **kwargs) -> None: 

75 properties = properties or {} 

76 

77 # Call parent class constructor 

78 super().__init__(properties) 

79 self.locals_var_dict = locals().copy() 

80 

81 # Input/Output files 

82 self.io_dict = { 

83 'in': { 

84 'input_cdi_path': input_cdi_path, 

85 'input_afr_path': input_afr_path, 

86 'input_avg_struc_path': input_avg_struc_path, 

87 }, 

88 'out': { 

89 'output_zip_path': output_zip_path 

90 } 

91 } 

92 

93 # Properties specific for BB 

94 self.binary_path = properties.get('binary_path', 'Canion') 

95 self.bases = properties.get('bases', None) 

96 self.type = properties.get('type', '*') 

97 self.dlow = properties.get('dlow', 0) 

98 self.dhig = properties.get('dhig', 0) 

99 self.rlow = properties.get('rlow', 0) 

100 self.rhig = properties.get('rhig', 0) 

101 self.alow = properties.get('alow', 0) 

102 self.ahig = properties.get('ahig', 360) 

103 self.itst = properties.get('itst', 0) 

104 self.itnd = properties.get('itnd', 0) 

105 self.itdel = properties.get('itdel', 1) 

106 self.rmsf = ".t." if properties.get('rmsf', False) else ".f." 

107 self.circ = ".t." if properties.get('circ', False) else ".f." 

108 self.properties = properties 

109 

110 # Check the properties 

111 self.check_properties(properties) 

112 self.check_arguments() 

113 

114 @launchlogger 

115 def launch(self) -> int: 

116 """Execute the :class:`Canion <biobb_dna.curvesplus.biobb_canion.Canion>` object.""" 

117 

118 # Setup Biobb 

119 if self.check_restart(): 

120 return 0 

121 self.stage_files() 

122 

123 ion_type_options = [ 

124 'Na+', 

125 'K', 

126 'K+', 

127 'Cl', 

128 'Cl-', 

129 'CL', 

130 'P', 

131 'C1*', 

132 'NH1', 

133 'NH2', 

134 'NZ', 

135 '1', 

136 '-1', 

137 '0', 

138 '*' 

139 ] 

140 if self.type not in ion_type_options: 

141 raise ValueError(("Invalid value for property type! " 

142 f"Option include: {ion_type_options}")) 

143 

144 # define temporary file names 

145 input_cdi_file = Path(self.stage_io_dict['in']['input_cdi_path']).name 

146 input_afr_file = Path(self.stage_io_dict['in']['input_afr_path']).name 

147 input_avg_struc = Path(self.stage_io_dict['in']['input_avg_struc_path']).name 

148 

149 # change directory to temporary folder 

150 original_directory = os.getcwd() 

151 

152 if self.container_path: 

153 os.chdir(self.container_working_dir) 

154 else: 

155 os.chdir(self.stage_io_dict.get("unique_dir", "")) 

156 

157 # create intructions 

158 instructions = [ 

159 f"{self.binary_path} <<! ", 

160 "&inp", 

161 " lis=canion_output,", 

162 f" dat={input_cdi_file[:-4]},", 

163 f" axfrm={input_afr_file[:-4]},", 

164 f" solute={input_avg_struc[:-4]},", 

165 f" type={self.type},", 

166 f" dlow={self.dlow},", 

167 f" dhig={self.dhig},", 

168 f" rlow={self.rlow},", 

169 f" rhig={self.rhig},", 

170 f" alow={self.alow},", 

171 f" ahig={self.ahig},", 

172 f" itst={self.itst},", 

173 f" itnd={self.itnd},", 

174 f" itdel={self.itdel},", 

175 f" rmsf={self.rmsf},", 

176 f" circ={self.circ},"] 

177 if self.bases is not None: 

178 # add topology file if needed 

179 fu.log('Appending sequence of bases to be searched to command', 

180 self.out_log, self.global_log) 

181 instructions.append(f" seq={self.bases},") 

182 instructions.append("&end") 

183 instructions.append("!") 

184 self.cmd = ["\n".join(instructions)] 

185 

186 fu.log('Creating command line with instructions and required arguments', 

187 self.out_log, self.global_log) 

188 # Run Biobb block 

189 self.run_biobb() 

190 

191 # change back to original directory 

192 os.chdir(original_directory) 

193 

194 workdir = self.stage_io_dict.get("unique_dir", "") 

195 zip_host_path = Path(workdir) / Path(self.io_dict["out"]["output_zip_path"]).name 

196 

197 # create zipfile and write output inside 

198 with zipfile.ZipFile(zip_host_path, "w") as zf: 

199 for curves_outfile in Path(workdir).glob("canion_output*"): 

200 fu.log(f"Adding {curves_outfile} to zip file", self.out_log, self.global_log) 

201 if curves_outfile.suffix != ".zip": 

202 zf.write( 

203 curves_outfile, 

204 arcname=curves_outfile.name) 

205 

206 # Copy files to host 

207 self.copy_to_host() 

208 

209 # Remove temporary file(s) 

210 self.remove_tmp_files() 

211 

212 self.check_arguments(output_files_created=True, raise_exception=False) 

213 

214 return self.return_code 

215 

216 

217def biobb_canion( 

218 input_cdi_path: str, input_afr_path: str, input_avg_struc_path: str, 

219 output_zip_path: Optional[str] = None, properties: Optional[dict] = None, **kwargs) -> int: 

220 """Create :class:`Canion <biobb_dna.curvesplus.biobb_canion.Canion>` class and 

221 execute the :meth:`launch() <biobb_dna.curvesplus.biobb_canion.Canion.launch>` method.""" 

222 return Canion(**dict(locals())).launch() 

223 

224 

225biobb_canion.__doc__ = Canion.__doc__ 

226main = Canion.get_main(biobb_canion, "Execute Canion form the Curves+ software suite.") 

227 

228if __name__ == '__main__': 

229 main()