Coverage for biobb_dna/curvesplus/biobb_curves.py: 82%

100 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-05-28 06:38 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Curves class and the command line interface.""" 

4import os 

5import zipfile 

6from typing import Optional 

7import shutil 

8from pathlib import Path 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.tools import file_utils as fu 

11from biobb_common.tools.file_utils import launchlogger 

12 

13 

14class Curves(BiobbObject): 

15 """ 

16 | biobb_dna Curves 

17 | Wrapper for the Cur+ executable that is part of the Curves+ software suite. 

18 | The Cur+ program is used to analyze the structure of nucleic acids and their complexes. 

19 

20 Args: 

21 input_struc_path (str): Trajectory or PDB input file. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/structure.stripped.trj>`_. Accepted formats: trj (edam:format_3910), pdb (edam:format_1476), netcdf (edam:format_3650), nc (edam:format_3650). 

22 input_top_path (str) (Optional): Topology file, needed along with .trj file (optional). File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/structure.stripped.top>`_. Accepted formats: top (edam:format_3881), pdb (edam:format_1476). 

23 output_cda_path (str): Filename for Curves+ output .cda file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.cda>`_. Accepted formats: cda (edam:format_2330). 

24 output_lis_path (str): Filename for Curves+ output .lis file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.lis>`_. Accepted formats: lis (edam:format_2330). 

25 output_zip_path (str) (Optional): Filename for .zip files containing Curves+ output that is not .cda or .lis files. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.zip>`_. Accepted formats: zip (edam:format_3987). 

26 properties (dict): 

27 * **s1range** (*str*) - (None) Range of first strand. Must be specified in the form "start:end". 

28 * **s2range** (*str*) - (None) Range of second strand. Must be specified in the form "start:end". 

29 * **stdlib_path** (*str*) - ('standard') Path to Curves' standard library files for nucleotides. If not specified will look for 'standard' files in current directory. 

30 * **itst** (*int*) - (0) Iteration start index. 

31 * **itnd** (*int*) - (0) Iteration end index. 

32 * **itdel** (*int*) - (1) Iteration delimiter. 

33 * **ions** (*bool*) - (False) If True, helicoidal analysis of ions (or solvent molecules) around solute is carried out. 

34 * **test** (*bool*) - (False) If True, provide addition output in .lis file on fitting and axis generation. 

35 * **line** (*bool*) - (False) if True, find the best linear helical axis. 

36 * **fit** (*bool*) - (True) if True, fit a standard bases to the input coordinates (important for MD snapshots to avoid base distortions leading to noisy helical parameters). 

37 * **axfrm** (*bool*) - (False) if True, generates closely spaced helical axis frames as input for Canal and Canion. 

38 * **binary_path** (*str*) - (Cur+) Path to Curves+ executable, otherwise the program wil look for Cur+ executable in the binaries folder. 

39 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

40 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

41 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

42 * **container_path** (*str*) - (None) Path to the binary executable of your container. 

43 * **container_image** (*str*) - ("cmip/cmip:latest") Container Image identifier. 

44 * **container_volume_path** (*str*) - ("/data") Path to an internal directory in the container. 

45 * **container_working_dir** (*str*) - (None) Path to the internal CWD in the container. 

46 * **container_user_id** (*str*) - (None) User number id to be mapped inside the container. 

47 * **container_shell_path** (*str*) - ("/bin/bash") Path to the binary executable of the container shell. 

48 Examples: 

49 This is a use example of how to use the building block from Python:: 

50 

51 from biobb_dna.curvesplus.biobb_curves import biobb_curves 

52 prop = { 

53 's1range': '1:12', 

54 's2range': '24:13', 

55 } 

56 biobb_curves( 

57 input_struc_path='/path/to/structure/file.trj', 

58 input_top_path='/path/to/topology/file.top', 

59 output_cda_path='/path/to/output/file.cda', 

60 output_lis_path='/path/to/output/file.lis', 

61 properties=prop) 

62 Info: 

63 * wrapped_software: 

64 * name: Curves 

65 * version: >=2.6 

66 * license: BSD 3-Clause 

67 * ontology: 

68 * name: EDAM 

69 * schema: http://edamontology.org/EDAM.owl 

70 """ 

71 

72 def __init__( 

73 self, input_struc_path, output_lis_path, 

74 output_cda_path, output_zip_path=None, 

75 input_top_path=None, properties=None, **kwargs) -> None: 

76 properties = properties or {} 

77 

78 # Call parent class constructor 

79 super().__init__(properties) 

80 self.locals_var_dict = locals().copy() 

81 

82 # Input/Output files 

83 self.io_dict = { 

84 'in': { 

85 'input_struc_path': input_struc_path, 

86 'input_top_path': input_top_path 

87 }, 

88 'out': { 

89 'output_lis_path': output_lis_path, 

90 'output_cda_path': output_cda_path, 

91 'output_zip_path': output_zip_path 

92 } 

93 } 

94 

95 # Properties specific for BB 

96 self.s1range = properties.get('s1range', None) 

97 self.binary_path = properties.get('binary_path', 'Cur+') 

98 self.stdlib_path = properties.get('stdlib_path', None) 

99 self.s2range = properties.get('s2range', None) 

100 self.itst = properties.get('itst', 0) 

101 self.itnd = properties.get('itnd', 0) 

102 self.itdel = properties.get('itdel', 1) 

103 self.ions = ".t." if properties.get('ions', False) else ".f." 

104 self.test = ".t." if properties.get('test', False) else ".f." 

105 self.line = ".t." if properties.get('line', False) else ".f." 

106 self.fit = ".t." if properties.get('fit', True) else ".f." 

107 self.axfrm = ".t." if properties.get('axfrm', False) else ".f." 

108 self.properties = properties 

109 

110 # Check the properties 

111 self.check_properties(properties) 

112 self.check_arguments() 

113 

114 def create_curvesplus_folder(self): 

115 """Create .curvesplus folder in the current temporal folder and copy the lib files inside.""" 

116 # Create .curvesplus directory in temporary folder 

117 dst_dir = self.stage_io_dict.get("unique_dir", "") + '/.curvesplus' 

118 os.makedirs(dst_dir, exist_ok=True) 

119 # Get lib files from stdlib_path 

120 lib_files = list(Path(os.path.dirname(self.stdlib_path)).glob("*.lib")) 

121 # Copy each lib file to the .curvesplus directory in temporary folder 

122 for file in lib_files: 

123 shutil.copy(file, dst_dir) 

124 

125 @launchlogger 

126 def launch(self) -> int: 

127 """Execute the :class:`Curves <biobb_dna.curvesplus.biobb_curves.Curves>` object.""" 

128 

129 # Setup Biobb 

130 if self.check_restart(): 

131 return 0 

132 self.stage_files() 

133 

134 if self.s1range is None: 

135 raise ValueError("property 's1range' must be specified!") 

136 if self.s2range is None: 

137 # compute s2range if not provided 

138 range1_end = int(self.s1range.split(":")[1]) 

139 s2start = range1_end + 1 

140 s2end = 2 * range1_end 

141 self.s2range = f"{s2end}:{s2start}" 

142 

143 # check standard library files location if not provided 

144 if self.stdlib_path is None: 

145 if os.getenv("CONDA_PREFIX", False): 

146 curves_aux_path = Path( 

147 os.getenv("CONDA_PREFIX", "")) / ".curvesplus" 

148 # check if .curvesplus directory is in $CONDA_PREFIX 

149 if curves_aux_path.exists(): 

150 if len(list(curves_aux_path.glob("standard_*.lib"))) != 3: 

151 raise FileNotFoundError( 

152 "One or all standard library files " 

153 f"missing from {curves_aux_path}! " 

154 "Check files standard_b.lib, " 

155 "standard_s.lib and standard_i.lib exist.") 

156 self.stdlib_path = curves_aux_path / "standard" 

157 else: 

158 raise FileNotFoundError( 

159 ".curvesplus directory not found in " 

160 f"{os.getenv('CONDA_PREFIX')} !" 

161 "Please indicate where standard_*.lib files are " 

162 "located with the stdlib_path property.") 

163 # copy standard library files to temporary folder 

164 shutil.copytree(curves_aux_path, self.stage_io_dict.get("unique_dir", "") + '/.curvesplus') 

165 relative_lib_path = '.curvesplus/standard' 

166 else: 

167 # CONDA_PREFIX undefined 

168 fu.log('CONDA_PREFIX undefined, please put the standard_b.lib, standard_s.lib and standard_i.lib files in the current working directory', self.out_log) 

169 self.stdlib_path = Path.cwd() / "standard" 

170 # create .curvesplus folder in the current temporal folder and copy the lib files inside 

171 self.create_curvesplus_folder() 

172 # set relative path 

173 relative_lib_path = '.curvesplus/standard' 

174 else: 

175 # create .curvesplus folder in the current temporal folder and copy the lib files inside 

176 self.create_curvesplus_folder() 

177 # set relative path 

178 path_parts = str(self.stdlib_path).split(os.sep) 

179 relative_lib_path = '.curvesplus/' + os.sep.join(path_parts[-1:]) 

180 

181 # change directory to temporary folder 

182 original_directory = os.getcwd() 

183 

184 if self.container_path: 

185 os.chdir(self.container_working_dir) 

186 else: 

187 os.chdir(self.stage_io_dict.get("unique_dir", "")) 

188 

189 # define temporary file names 

190 tmp_struc_input = Path(self.stage_io_dict['in']['input_struc_path']).name 

191 if self.stage_io_dict['in']['input_top_path'] is not None: 

192 tmp_top_input = Path(self.stage_io_dict['in']['input_top_path']).name 

193 

194 # create intructions 

195 instructions = [ 

196 f"{self.binary_path} <<! ", 

197 "&inp", 

198 f" file={tmp_struc_input},"] 

199 if self.stage_io_dict['in']['input_top_path'] is not None: 

200 # add topology file if needed 

201 fu.log('Appending provided topology to command', 

202 self.out_log, self.global_log) 

203 instructions.append( 

204 f" ftop={tmp_top_input},") 

205 

206 # create intructions 

207 instructions = instructions + [ 

208 " lis='curves_output',", 

209 f" lib={relative_lib_path},", 

210 f" ions={self.ions},", 

211 f" test={self.test},", 

212 f" line={self.line},", 

213 f" fit={self.fit},", 

214 f" axfrm={self.axfrm},", 

215 f" itst={self.itst},itnd={self.itnd},itdel={self.itdel},", 

216 "&end", 

217 "2 1 -1 0 0", 

218 f"{self.s1range}", 

219 f"{self.s2range}", 

220 "!" 

221 ] 

222 self.cmd = ["\n".join(instructions)] 

223 fu.log('Creating command line with instructions and required arguments', 

224 self.out_log, self.global_log) 

225 

226 # Run Biobb block 

227 self.run_biobb() 

228 

229 # change back to original directory 

230 os.chdir(original_directory) 

231 

232 workdir = self.stage_io_dict.get("unique_dir", "") 

233 zip_host_path = Path(workdir) / Path(self.io_dict["out"]["output_zip_path"]).name 

234 

235 # create zipfile and write output inside 

236 with zipfile.ZipFile(zip_host_path, "w") as zf: 

237 for curves_outfile in Path(workdir).glob("curves_output*"): 

238 fu.log(f"Adding {curves_outfile} to zip file", self.out_log, self.global_log) 

239 if curves_outfile.suffix not in (".cda", ".lis", ".zip"): 

240 zf.write( 

241 curves_outfile, 

242 arcname=curves_outfile.name) 

243 

244 # rename cda and lis files 

245 # In container mode stage_io_dict["out"] paths are container-internal 

246 # (e.g. /data/file.cda), not host paths. Always rename within unique_dir 

247 # so that copy_to_host() can find them by filename. 

248 unique_dir = Path(self.stage_io_dict.get("unique_dir", "")) 

249 (unique_dir / "curves_output.cda").rename( 

250 unique_dir / Path(self.stage_io_dict["out"]["output_cda_path"]).name) 

251 (unique_dir / "curves_output.lis").rename( 

252 unique_dir / Path(self.stage_io_dict["out"]["output_lis_path"]).name) 

253 

254 # Copy files to host 

255 self.copy_to_host() 

256 

257 # Remove temporary file(s) 

258 self.remove_tmp_files() 

259 

260 self.check_arguments(output_files_created=True, raise_exception=False) 

261 

262 return self.return_code 

263 

264 

265def biobb_curves( 

266 input_struc_path: str, output_lis_path: str, output_cda_path: str, 

267 input_top_path: Optional[str] = None, output_zip_path: Optional[str] = None, 

268 properties: Optional[dict] = None, **kwargs) -> int: 

269 """Create :class:`Curves <biobb_dna.curvesplus.biobb_curves.Curves>` class and 

270 execute the :meth:`launch() <biobb_dna.curvesplus.biobb_curves.Curves.launch>` method.""" 

271 return Curves(**dict(locals())).launch() 

272 

273 

274biobb_curves.__doc__ = Curves.__doc__ 

275main = Curves.get_main(biobb_curves, "Execute Cur+ form the Curves+ software suite.") 

276 

277if __name__ == '__main__': 

278 main()