Coverage for biobb_dna / curvesplus / biobb_curves.py: 82%

96 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-15 18:49 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Curves class and the command line interface.""" 

4import os 

5import zipfile 

6from typing import Optional 

7import shutil 

8from pathlib import Path 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.tools import file_utils as fu 

11from biobb_common.tools.file_utils import launchlogger 

12 

13 

14class Curves(BiobbObject): 

15 """ 

16 | biobb_dna Curves 

17 | Wrapper for the Cur+ executable that is part of the Curves+ software suite. 

18 | The Cur+ program is used to analyze the structure of nucleic acids and their complexes. 

19 

20 Args: 

21 input_struc_path (str): Trajectory or PDB input file. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/structure.stripped.trj>`_. Accepted formats: trj (edam:format_3910), pdb (edam:format_1476), netcdf (edam:format_3650), nc (edam:format_3650). 

22 input_top_path (str) (Optional): Topology file, needed along with .trj file (optional). File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/structure.stripped.top>`_. Accepted formats: top (edam:format_3881), pdb (edam:format_1476). 

23 output_cda_path (str): Filename for Curves+ output .cda file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.cda>`_. Accepted formats: cda (edam:format_2330). 

24 output_lis_path (str): Filename for Curves+ output .lis file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.lis>`_. Accepted formats: lis (edam:format_2330). 

25 output_zip_path (str) (Optional): Filename for .zip files containing Curves+ output that is not .cda or .lis files. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.zip>`_. Accepted formats: zip (edam:format_3987). 

26 properties (dict): 

27 * **s1range** (*str*) - (None) Range of first strand. Must be specified in the form "start:end". 

28 * **s2range** (*str*) - (None) Range of second strand. Must be specified in the form "start:end". 

29 * **stdlib_path** (*str*) - ('standard') Path to Curves' standard library files for nucleotides. If not specified will look for 'standard' files in current directory. 

30 * **itst** (*int*) - (0) Iteration start index. 

31 * **itnd** (*int*) - (0) Iteration end index. 

32 * **itdel** (*int*) - (1) Iteration delimiter. 

33 * **ions** (*bool*) - (False) If True, helicoidal analysis of ions (or solvent molecules) around solute is carried out. 

34 * **test** (*bool*) - (False) If True, provide addition output in .lis file on fitting and axis generation. 

35 * **line** (*bool*) - (False) if True, find the best linear helical axis. 

36 * **fit** (*bool*) - (True) if True, fit a standard bases to the input coordinates (important for MD snapshots to avoid base distortions leading to noisy helical parameters). 

37 * **axfrm** (*bool*) - (False) if True, generates closely spaced helical axis frames as input for Canal and Canion. 

38 * **binary_path** (*str*) - (Cur+) Path to Curves+ executable, otherwise the program wil look for Cur+ executable in the binaries folder. 

39 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

40 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

41 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

42 Examples: 

43 This is a use example of how to use the building block from Python:: 

44 

45 from biobb_dna.curvesplus.biobb_curves import biobb_curves 

46 prop = { 

47 's1range': '1:12', 

48 's2range': '24:13', 

49 } 

50 biobb_curves( 

51 input_struc_path='/path/to/structure/file.trj', 

52 input_top_path='/path/to/topology/file.top', 

53 output_cda_path='/path/to/output/file.cda', 

54 output_lis_path='/path/to/output/file.lis', 

55 properties=prop) 

56 Info: 

57 * wrapped_software: 

58 * name: Curves 

59 * version: >=2.6 

60 * license: BSD 3-Clause 

61 * ontology: 

62 * name: EDAM 

63 * schema: http://edamontology.org/EDAM.owl 

64 """ 

65 

66 def __init__( 

67 self, input_struc_path, output_lis_path, 

68 output_cda_path, output_zip_path=None, 

69 input_top_path=None, properties=None, **kwargs) -> None: 

70 properties = properties or {} 

71 

72 # Call parent class constructor 

73 super().__init__(properties) 

74 self.locals_var_dict = locals().copy() 

75 

76 # Input/Output files 

77 self.io_dict = { 

78 'in': { 

79 'input_struc_path': input_struc_path, 

80 'input_top_path': input_top_path 

81 }, 

82 'out': { 

83 'output_lis_path': output_lis_path, 

84 'output_cda_path': output_cda_path, 

85 'output_zip_path': output_zip_path 

86 } 

87 } 

88 

89 # Properties specific for BB 

90 self.s1range = properties.get('s1range', None) 

91 self.binary_path = properties.get('binary_path', 'Cur+') 

92 self.stdlib_path = properties.get('stdlib_path', None) 

93 self.s2range = properties.get('s2range', None) 

94 self.itst = properties.get('itst', 0) 

95 self.itnd = properties.get('itnd', 0) 

96 self.itdel = properties.get('itdel', 1) 

97 self.ions = ".t." if properties.get('ions', False) else ".f." 

98 self.test = ".t." if properties.get('test', False) else ".f." 

99 self.line = ".t." if properties.get('line', False) else ".f." 

100 self.fit = ".t." if properties.get('fit', True) else ".f." 

101 self.axfrm = ".t." if properties.get('axfrm', False) else ".f." 

102 self.properties = properties 

103 

104 # Check the properties 

105 self.check_properties(properties) 

106 self.check_arguments() 

107 

108 def create_curvesplus_folder(self): 

109 """Create .curvesplus folder in the current temporal folder and copy the lib files inside.""" 

110 # Create .curvesplus directory in temporary folder 

111 dst_dir = self.stage_io_dict.get("unique_dir", "") + '/.curvesplus' 

112 os.makedirs(dst_dir, exist_ok=True) 

113 # Get lib files from stdlib_path 

114 lib_files = list(Path(os.path.dirname(self.stdlib_path)).glob("*.lib")) 

115 # Copy each lib file to the .curvesplus directory in temporary folder 

116 for file in lib_files: 

117 shutil.copy(file, dst_dir) 

118 

119 @launchlogger 

120 def launch(self) -> int: 

121 """Execute the :class:`Curves <biobb_dna.curvesplus.biobb_curves.Curves>` object.""" 

122 

123 # Setup Biobb 

124 if self.check_restart(): 

125 return 0 

126 self.stage_files() 

127 

128 if self.s1range is None: 

129 raise ValueError("property 's1range' must be specified!") 

130 if self.s2range is None: 

131 # compute s2range if not provided 

132 range1_end = int(self.s1range.split(":")[1]) 

133 s2start = range1_end + 1 

134 s2end = 2 * range1_end 

135 self.s2range = f"{s2end}:{s2start}" 

136 

137 # check standard library files location if not provided 

138 if self.stdlib_path is None: 

139 if os.getenv("CONDA_PREFIX", False): 

140 curves_aux_path = Path( 

141 os.getenv("CONDA_PREFIX", "")) / ".curvesplus" 

142 # check if .curvesplus directory is in $CONDA_PREFIX 

143 if curves_aux_path.exists(): 

144 if len(list(curves_aux_path.glob("standard_*.lib"))) != 3: 

145 raise FileNotFoundError( 

146 "One or all standard library files " 

147 f"missing from {curves_aux_path}! " 

148 "Check files standard_b.lib, " 

149 "standard_s.lib and standard_i.lib exist.") 

150 self.stdlib_path = curves_aux_path / "standard" 

151 else: 

152 raise FileNotFoundError( 

153 ".curvesplus directory not found in " 

154 f"{os.getenv('CONDA_PREFIX')} !" 

155 "Please indicate where standard_*.lib files are " 

156 "located with the stdlib_path property.") 

157 # copy standard library files to temporary folder 

158 shutil.copytree(curves_aux_path, self.stage_io_dict.get("unique_dir", "") + '/.curvesplus') 

159 relative_lib_path = '.curvesplus/standard' 

160 else: 

161 # CONDA_PREFIX undefined 

162 fu.log('CONDA_PREFIX undefined, please put the standard_b.lib, standard_s.lib and standard_i.lib files in the current working directory', self.out_log) 

163 self.stdlib_path = Path.cwd() / "standard" 

164 # create .curvesplus folder in the current temporal folder and copy the lib files inside 

165 self.create_curvesplus_folder() 

166 # set relative path 

167 relative_lib_path = '.curvesplus/standard' 

168 else: 

169 # create .curvesplus folder in the current temporal folder and copy the lib files inside 

170 self.create_curvesplus_folder() 

171 # set relative path 

172 path_parts = str(self.stdlib_path).split(os.sep) 

173 relative_lib_path = '.curvesplus/' + os.sep.join(path_parts[-1:]) 

174 

175 # change directory to temporary folder 

176 original_directory = os.getcwd() 

177 os.chdir(self.stage_io_dict.get("unique_dir", "")) 

178 

179 # define temporary file names 

180 tmp_struc_input = Path(self.stage_io_dict['in']['input_struc_path']).name 

181 if self.stage_io_dict['in']['input_top_path'] is not None: 

182 tmp_top_input = Path(self.stage_io_dict['in']['input_top_path']).name 

183 

184 # create intructions 

185 instructions = [ 

186 f"{self.binary_path} <<! ", 

187 "&inp", 

188 f" file={tmp_struc_input},"] 

189 if self.stage_io_dict['in']['input_top_path'] is not None: 

190 # add topology file if needed 

191 fu.log('Appending provided topology to command', 

192 self.out_log, self.global_log) 

193 instructions.append( 

194 f" ftop={tmp_top_input},") 

195 

196 # create intructions 

197 instructions = instructions + [ 

198 " lis='curves_output',", 

199 f" lib={relative_lib_path},", 

200 f" ions={self.ions},", 

201 f" test={self.test},", 

202 f" line={self.line},", 

203 f" fit={self.fit},", 

204 f" axfrm={self.axfrm},", 

205 f" itst={self.itst},itnd={self.itnd},itdel={self.itdel},", 

206 "&end", 

207 "2 1 -1 0 0", 

208 f"{self.s1range}", 

209 f"{self.s2range}", 

210 "!" 

211 ] 

212 self.cmd = ["\n".join(instructions)] 

213 fu.log('Creating command line with instructions and required arguments', 

214 self.out_log, self.global_log) 

215 

216 # Run Biobb block 

217 self.run_biobb() 

218 

219 # change back to original directory 

220 os.chdir(original_directory) 

221 

222 # create zipfile and write output inside 

223 if self.stage_io_dict.get("out", {}).get("output_zip_path") is not None: 

224 zf = zipfile.ZipFile( 

225 Path(self.stage_io_dict["out"]["output_zip_path"]), 

226 "w") 

227 for curves_outfile in Path(self.stage_io_dict.get("unique_dir", "")).glob("curves_output*"): 

228 if curves_outfile.suffix not in (".cda", ".lis", ".zip"): 

229 zf.write( 

230 curves_outfile, 

231 arcname=curves_outfile.name) 

232 zf.close() 

233 

234 # rename cda and lis files 

235 (Path(self.stage_io_dict.get("unique_dir", "")) / "curves_output.cda").rename( 

236 self.stage_io_dict["out"]["output_cda_path"]) 

237 (Path(self.stage_io_dict.get("unique_dir", "")) / "curves_output.lis").rename( 

238 self.stage_io_dict["out"]["output_lis_path"]) 

239 

240 # Copy files to host 

241 self.copy_to_host() 

242 

243 # Remove temporary file(s) 

244 self.remove_tmp_files() 

245 

246 self.check_arguments(output_files_created=True, raise_exception=False) 

247 

248 return self.return_code 

249 

250 

251def biobb_curves( 

252 input_struc_path: str, output_lis_path: str, output_cda_path: str, 

253 input_top_path: Optional[str] = None, output_zip_path: Optional[str] = None, 

254 properties: Optional[dict] = None, **kwargs) -> int: 

255 """Create :class:`Curves <biobb_dna.curvesplus.biobb_curves.Curves>` class and 

256 execute the :meth:`launch() <biobb_dna.curvesplus.biobb_curves.Curves.launch>` method.""" 

257 return Curves(**dict(locals())).launch() 

258 

259 

260biobb_curves.__doc__ = Curves.__doc__ 

261main = Curves.get_main(biobb_curves, "Execute Cur+ form the Curves+ software suite.") 

262 

263if __name__ == '__main__': 

264 main()