Coverage for biobb_pdb_tools/pdb_tools/biobb_pdb_tofasta.py: 95%
37 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-04 08:26 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-04 08:26 +0000
1#!/usr/bin/env python3
3"""Module containing the Pdbtofasta class and the command line interface."""
5from typing import Optional
6from biobb_common.generic.biobb_object import BiobbObject
7from biobb_common.tools import file_utils as fu
8from biobb_common.tools.file_utils import launchlogger
11class Pdbtofasta(BiobbObject):
12 """
13 | biobb_pdb_tofasta Pdbtofasta
14 | Extracts the residue sequence in a PDB file to FASTA format.
15 | This tool extracts the residue sequence in a PDB file to FASTA format. It can be used to extract the sequence of a PDB file to FASTA format.
17 Args:
18 input_file_path (str): Input PDB file. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_pdb_tools/master/biobb_pdb_tools/test/data/pdb_tools/1AKI.pdb>`_. Accepted formats: pdb (edam:format_1476).
19 output_file_path (str): FASTA file containing the aminoacids sequence. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_pdb_tools/master/biobb_pdb_tools/test/reference/pdb_tools/ref_pdb_tofasta.pdb>`_. Accepted formats: fasta (edam:format_1929), fa (edam:format_1929).
20 properties (dic):
21 * **multi** (*bool*) - (True) Splits the different chains into different records in the FASTA file.
22 * **binary_path** (*str*) - ("pdb_tofasta") Path to the pdb_tofasta executable binary.
23 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
24 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
26 Examples:
27 This is a use example of how to use the building block from Python::
29 from biobb_pdb_tools.pdb_tools.biobb_pdb_tofasta import biobb_pdb_tofasta
31 prop = {
32 'multi': True
33 }
34 biobb_pdb_tofasta(input_file_path='/path/to/input.pdb',
35 output_file_path='/path/to/output.fasta',
36 properties=prop)
38 Info:
39 * wrapped_software:
40 * name: pdb_tools
41 * version: >=2.5.0
42 * license: Apache-2.0
43 * ontology:
44 * name: EDAM
45 * schema: http://edamontology.org/EDAM.owl
47 """
49 def __init__(
50 self, input_file_path, output_file_path, properties=None, **kwargs
51 ) -> None:
52 properties = properties or {}
54 super().__init__(properties)
55 self.locals_var_dict = locals().copy()
57 self.io_dict = {
58 "in": {"input_file_path": input_file_path},
59 "out": {"output_file_path": output_file_path},
60 }
62 self.binary_path = properties.get("binary_path", "pdb_tofasta")
63 self.multi = properties.get("multi", True)
64 self.properties = properties
65 self.check_init(properties)
67 @launchlogger
68 def launch(self) -> int:
69 """Execute the :class:`Pdbtofasta <biobb_pdb_tools.pdb_tools.pdb_tofasta>` object."""
71 if self.check_restart():
72 return 0
73 self.stage_files()
75 instructions = []
76 if self.multi:
77 instructions.append("-multi")
78 fu.log("Appending optional boolean property",
79 self.out_log, self.global_log)
81 self.cmd = [
82 self.binary_path,
83 " ".join(instructions),
84 self.stage_io_dict["in"]["input_file_path"],
85 ">",
86 self.io_dict["out"]["output_file_path"],
87 ]
89 fu.log(" ".join(self.cmd), self.out_log, self.global_log)
91 fu.log(
92 "Creating command line with instructions and required arguments",
93 self.out_log,
94 self.global_log,
95 )
97 self.run_biobb()
98 self.copy_to_host()
100 self.remove_tmp_files()
101 self.check_arguments(output_files_created=True, raise_exception=False)
103 return self.return_code
106def biobb_pdb_tofasta(
107 input_file_path: str,
108 output_file_path: str,
109 properties: Optional[dict] = None,
110 **kwargs,
111) -> int:
112 """Create :class:`Pdbtofasta <biobb_pdb_tools.pdb_tools.pdb_tofasta>` class and
113 execute the :meth:`launch() <biobb_pdb_tools.pdb_tools.pdb_tofasta.launch>` method."""
115 return Pdbtofasta(**dict(locals())).launch()
118biobb_pdb_tofasta.__doc__ = Pdbtofasta.__doc__
119main = Pdbtofasta.get_main(biobb_pdb_tofasta, "Extracts the residue sequence in a PDB file to FASTA format.")
121if __name__ == "__main__":
122 main()