Coverage for biobb_dna/curvesplus/biobb_curves.py: 73%
110 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 10:36 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 10:36 +0000
1#!/usr/bin/env python3
3"""Module containing the Curves class and the command line interface."""
4import os
5import zipfile
6import argparse
7from typing import Optional
8import shutil
9from pathlib import Path
10from biobb_common.generic.biobb_object import BiobbObject
11from biobb_common.configuration import settings
12from biobb_common.tools import file_utils as fu
13from biobb_common.tools.file_utils import launchlogger
16class Curves(BiobbObject):
17 """
18 | biobb_dna Curves
19 | Wrapper for the Cur+ executable that is part of the Curves+ software suite.
20 | The Cur+ program is used to analyze the structure of nucleic acids and their complexes.
22 Args:
23 input_struc_path (str): Trajectory or PDB input file. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/structure.stripped.trj>`_. Accepted formats: trj (edam:format_3910), pdb (edam:format_1476), netcdf (edam:format_3650), nc (edam:format_3650).
24 input_top_path (str) (Optional): Topology file, needed along with .trj file (optional). File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/structure.stripped.top>`_. Accepted formats: top (edam:format_3881), pdb (edam:format_1476).
25 output_cda_path (str): Filename for Curves+ output .cda file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.cda>`_. Accepted formats: cda (edam:format_2330).
26 output_lis_path (str): Filename for Curves+ output .lis file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.lis>`_. Accepted formats: lis (edam:format_2330).
27 output_zip_path (str) (Optional): Filename for .zip files containing Curves+ output that is not .cda or .lis files. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/curves_trj_output.zip>`_. Accepted formats: zip (edam:format_3987).
28 properties (dict):
29 * **s1range** (*str*) - (None) Range of first strand. Must be specified in the form "start:end".
30 * **s2range** (*str*) - (None) Range of second strand. Must be specified in the form "start:end".
31 * **stdlib_path** (*str*) - ('standard') Path to Curves' standard library files for nucleotides. If not specified will look for 'standard' files in current directory.
32 * **itst** (*int*) - (0) Iteration start index.
33 * **itnd** (*int*) - (0) Iteration end index.
34 * **itdel** (*int*) - (1) Iteration delimiter.
35 * **ions** (*bool*) - (False) If True, helicoidal analysis of ions (or solvent molecules) around solute is carried out.
36 * **test** (*bool*) - (False) If True, provide addition output in .lis file on fitting and axis generation.
37 * **line** (*bool*) - (False) if True, find the best linear helical axis.
38 * **fit** (*bool*) - (True) if True, fit a standard bases to the input coordinates (important for MD snapshots to avoid base distortions leading to noisy helical parameters).
39 * **axfrm** (*bool*) - (False) if True, generates closely spaced helical axis frames as input for Canal and Canion.
40 * **binary_path** (*str*) - (Cur+) Path to Curves+ executable, otherwise the program wil look for Cur+ executable in the binaries folder.
41 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
42 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
43 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
44 Examples:
45 This is a use example of how to use the building block from Python::
47 from biobb_dna.curvesplus.biobb_curves import biobb_curves
48 prop = {
49 's1range': '1:12',
50 's2range': '24:13',
51 }
52 biobb_curves(
53 input_struc_path='/path/to/structure/file.trj',
54 input_top_path='/path/to/topology/file.top',
55 output_cda_path='/path/to/output/file.cda',
56 output_lis_path='/path/to/output/file.lis',
57 properties=prop)
58 Info:
59 * wrapped_software:
60 * name: Curves
61 * version: >=2.6
62 * license: BSD 3-Clause
63 * ontology:
64 * name: EDAM
65 * schema: http://edamontology.org/EDAM.owl
66 """
68 def __init__(
69 self, input_struc_path, output_lis_path,
70 output_cda_path, output_zip_path=None,
71 input_top_path=None, properties=None, **kwargs) -> None:
72 properties = properties or {}
74 # Call parent class constructor
75 super().__init__(properties)
76 self.locals_var_dict = locals().copy()
78 # Input/Output files
79 self.io_dict = {
80 'in': {
81 'input_struc_path': input_struc_path,
82 'input_top_path': input_top_path
83 },
84 'out': {
85 'output_lis_path': output_lis_path,
86 'output_cda_path': output_cda_path,
87 'output_zip_path': output_zip_path
88 }
89 }
91 # Properties specific for BB
92 self.s1range = properties.get('s1range', None)
93 self.binary_path = properties.get('binary_path', 'Cur+')
94 self.stdlib_path = properties.get('stdlib_path', None)
95 self.s2range = properties.get('s2range', None)
96 self.itst = properties.get('itst', 0)
97 self.itnd = properties.get('itnd', 0)
98 self.itdel = properties.get('itdel', 1)
99 self.ions = ".t." if properties.get('ions', False) else ".f."
100 self.test = ".t." if properties.get('test', False) else ".f."
101 self.line = ".t." if properties.get('line', False) else ".f."
102 self.fit = ".t." if properties.get('fit', True) else ".f."
103 self.axfrm = ".t." if properties.get('axfrm', False) else ".f."
104 self.properties = properties
106 # Check the properties
107 self.check_properties(properties)
108 self.check_arguments()
110 def create_curvesplus_folder(self):
111 """Create .curvesplus folder in the current temporal folder and copy the lib files inside."""
112 # Create .curvesplus directory in temporary folder
113 dst_dir = self.stage_io_dict.get("unique_dir", "") + '/.curvesplus'
114 os.makedirs(dst_dir, exist_ok=True)
115 # Get lib files from stdlib_path
116 lib_files = list(Path(os.path.dirname(self.stdlib_path)).glob("*.lib"))
117 # Copy each lib file to the .curvesplus directory in temporary folder
118 for file in lib_files:
119 shutil.copy(file, dst_dir)
121 @launchlogger
122 def launch(self) -> int:
123 """Execute the :class:`Curves <biobb_dna.curvesplus.biobb_curves.Curves>` object."""
125 # Setup Biobb
126 if self.check_restart():
127 return 0
128 self.stage_files()
130 if self.s1range is None:
131 raise ValueError("property 's1range' must be specified!")
132 if self.s2range is None:
133 # compute s2range if not provided
134 range1_end = int(self.s1range.split(":")[1])
135 s2start = range1_end + 1
136 s2end = 2 * range1_end
137 self.s2range = f"{s2end}:{s2start}"
139 # check standard library files location if not provided
140 if self.stdlib_path is None:
141 if os.getenv("CONDA_PREFIX", False):
142 curves_aux_path = Path(
143 os.getenv("CONDA_PREFIX", "")) / ".curvesplus"
144 # check if .curvesplus directory is in $CONDA_PREFIX
145 if curves_aux_path.exists():
146 if len(list(curves_aux_path.glob("standard_*.lib"))) != 3:
147 raise FileNotFoundError(
148 "One or all standard library files "
149 f"missing from {curves_aux_path}! "
150 "Check files standard_b.lib, "
151 "standard_s.lib and standard_i.lib exist.")
152 self.stdlib_path = curves_aux_path / "standard"
153 else:
154 raise FileNotFoundError(
155 ".curvesplus directory not found in "
156 f"{os.getenv('CONDA_PREFIX')} !"
157 "Please indicate where standard_*.lib files are "
158 "located with the stdlib_path property.")
159 # copy standard library files to temporary folder
160 shutil.copytree(curves_aux_path, self.stage_io_dict.get("unique_dir", "") + '/.curvesplus')
161 relative_lib_path = '.curvesplus/standard'
162 else:
163 # CONDA_PREFIX undefined
164 fu.log('CONDA_PREFIX undefined, please put the standard_b.lib, standard_s.lib and standard_i.lib files in the current working directory', self.out_log)
165 self.stdlib_path = Path.cwd() / "standard"
166 # create .curvesplus folder in the current temporal folder and copy the lib files inside
167 self.create_curvesplus_folder()
168 # set relative path
169 relative_lib_path = '.curvesplus/standard'
170 else:
171 # create .curvesplus folder in the current temporal folder and copy the lib files inside
172 self.create_curvesplus_folder()
173 # set relative path
174 path_parts = str(self.stdlib_path).split(os.sep)
175 relative_lib_path = '.curvesplus/' + os.sep.join(path_parts[-1:])
177 # change directory to temporary folder
178 original_directory = os.getcwd()
179 os.chdir(self.stage_io_dict.get("unique_dir", ""))
181 # define temporary file names
182 tmp_struc_input = Path(self.stage_io_dict['in']['input_struc_path']).name
183 if self.stage_io_dict['in']['input_top_path'] is not None:
184 tmp_top_input = Path(self.stage_io_dict['in']['input_top_path']).name
186 # create intructions
187 instructions = [
188 f"{self.binary_path} <<! ",
189 "&inp",
190 f" file={tmp_struc_input},"]
191 if self.stage_io_dict['in']['input_top_path'] is not None:
192 # add topology file if needed
193 fu.log('Appending provided topology to command',
194 self.out_log, self.global_log)
195 instructions.append(
196 f" ftop={tmp_top_input},")
198 # create intructions
199 instructions = instructions + [
200 " lis='curves_output',",
201 f" lib={relative_lib_path},",
202 f" ions={self.ions},",
203 f" test={self.test},",
204 f" line={self.line},",
205 f" fit={self.fit},",
206 f" axfrm={self.axfrm},",
207 f" itst={self.itst},itnd={self.itnd},itdel={self.itdel},",
208 "&end",
209 "2 1 -1 0 0",
210 f"{self.s1range}",
211 f"{self.s2range}",
212 "!"
213 ]
214 self.cmd = ["\n".join(instructions)]
215 fu.log('Creating command line with instructions and required arguments',
216 self.out_log, self.global_log)
218 # Run Biobb block
219 self.run_biobb()
221 # change back to original directory
222 os.chdir(original_directory)
224 # create zipfile and write output inside
225 if self.stage_io_dict.get("out", {}).get("output_zip_path") is not None:
226 zf = zipfile.ZipFile(
227 Path(self.stage_io_dict["out"]["output_zip_path"]),
228 "w")
229 for curves_outfile in Path(self.stage_io_dict.get("unique_dir", "")).glob("curves_output*"):
230 if curves_outfile.suffix not in (".cda", ".lis", ".zip"):
231 zf.write(
232 curves_outfile,
233 arcname=curves_outfile.name)
234 zf.close()
236 # rename cda and lis files
237 (Path(self.stage_io_dict.get("unique_dir", "")) / "curves_output.cda").rename(
238 self.stage_io_dict["out"]["output_cda_path"])
239 (Path(self.stage_io_dict.get("unique_dir", "")) / "curves_output.lis").rename(
240 self.stage_io_dict["out"]["output_lis_path"])
242 # Copy files to host
243 self.copy_to_host()
245 # Remove temporary file(s)
246 # self.tmp_files.extend([
247 # self.stage_io_dict.get("unique_dir", "")
248 # ])
249 self.remove_tmp_files()
251 self.check_arguments(output_files_created=True, raise_exception=False)
253 return self.return_code
256def biobb_curves(
257 input_struc_path: str, output_lis_path: str, output_cda_path: str,
258 input_top_path: Optional[str] = None, output_zip_path: Optional[str] = None,
259 properties: Optional[dict] = None, **kwargs) -> int:
260 """Create :class:`Curves <biobb_dna.curvesplus.biobb_curves.Curves>` class and
261 execute the :meth:`launch() <biobb_dna.curvesplus.biobb_curves.Curves.launch>` method."""
263 return Curves(
264 input_struc_path=input_struc_path,
265 input_top_path=input_top_path,
266 output_lis_path=output_lis_path,
267 output_cda_path=output_cda_path,
268 output_zip_path=output_zip_path,
269 properties=properties, **kwargs).launch()
271 biobb_curves.__doc__ = Curves.__doc__
274def main():
275 """Command line execution of this building block. Please check the command line documentation."""
276 parser = argparse.ArgumentParser(description='Execute Cur+ form the Curves+ software suite.',
277 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999))
278 parser.add_argument('--config', required=False, help='Configuration file')
280 required_args = parser.add_argument_group('required arguments')
281 required_args.add_argument('--input_struc_path', required=True,
282 help='Trajectory or PDB input file. Accepted formats: trj, pdb.')
283 required_args.add_argument('--output_cda_path', required=True,
284 help='Filename to give to output .cda file. Accepted formats: str.')
285 required_args.add_argument('--output_lis_path', required=True,
286 help='Filename to give to output .lis file. Accepted formats: str.')
287 parser.add_argument('--input_top_path', required=False,
288 help='Topology file, needed along with .trj file (optional). Accepted formats: top.')
289 parser.add_argument('--output_zip_path', required=False,
290 help='Filename to give to output files (except .cda and .lis files). Accepted formats: str.')
292 args = parser.parse_args()
293 args.config = args.config or "{}"
294 properties = settings.ConfReader(config=args.config).get_prop_dic()
296 biobb_curves(
297 input_struc_path=args.input_struc_path,
298 input_top_path=args.input_top_path,
299 output_cda_path=args.output_cda_path,
300 output_lis_path=args.output_lis_path,
301 output_zip_path=args.output_zip_path,
302 properties=properties)
305if __name__ == '__main__':
306 main()