Coverage for biobb_pdb_tools/pdb_tools/biobb_pdb_tofasta.py: 78%

50 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-05-06 12:37 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the Pdbtofasta class and the command line interface.""" 

4 

5import argparse 

6from typing import Optional 

7 

8from biobb_common.configuration import settings 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.tools import file_utils as fu 

11from biobb_common.tools.file_utils import launchlogger 

12 

13 

14class Pdbtofasta(BiobbObject): 

15 """ 

16 | biobb_pdb_tofasta Pdbtofasta 

17 | Extracts the residue sequence in a PDB file to FASTA format. 

18 | This tool extracts the residue sequence in a PDB file to FASTA format. It can be used to extract the sequence of a PDB file to FASTA format. 

19 

20 Args: 

21 input_file_path (str): PDB file. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_pdb_tools/master/biobb_pdb_tools/test/data/pdb_tools/1AKI.pdb>`_. Accepted formats: pdb (edam:format_1476). 

22 output_file_path (str): FASTA file containing the aminoacids sequence. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_pdb_tools/master/biobb_pdb_tools/test/reference/pdb_tools/ref_pdb_tofasta.pdb>`_. Accepted formats: fasta (edam:format_1929), fa (edam:format_1929). 

23 properties (dic): 

24 * **multi** (*bool*) - (True) Splits the different chains into different records in the FASTA file. 

25 * **binary_path** (*str*) - ("pdb_tofasta") Path to the pdb_tofasta executable binary. 

26 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

27 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

28 

29 Examples: 

30 This is a use example of how to use the building block from Python:: 

31 

32 from biobb_pdb_tools.pdb_tools.biobb_pdb_tofasta import biobb_pdb_tofasta 

33 

34 prop = { 

35 'multi': True 

36 } 

37 biobb_pdb_tofasta(input_file_path='/path/to/input.pdb', 

38 output_file_path='/path/to/output.fasta', 

39 properties=prop) 

40 

41 Info: 

42 * wrapped_software: 

43 * name: pdb_tools 

44 * version: >=2.5.0 

45 * license: Apache-2.0 

46 * ontology: 

47 * name: EDAM 

48 * schema: http://edamontology.org/EDAM.owl 

49 

50 """ 

51 

52 def __init__( 

53 self, input_file_path, output_file_path, properties=None, **kwargs 

54 ) -> None: 

55 properties = properties or {} 

56 

57 super().__init__(properties) 

58 self.locals_var_dict = locals().copy() 

59 

60 self.io_dict = { 

61 "in": {"input_file_path": input_file_path}, 

62 "out": {"output_file_path": output_file_path}, 

63 } 

64 

65 self.binary_path = properties.get("binary_path", "pdb_tofasta") 

66 self.multi = properties.get("multi", True) 

67 self.properties = properties 

68 

69 self.check_properties(properties) 

70 self.check_arguments() 

71 

72 @launchlogger 

73 def launch(self) -> int: 

74 """Execute the :class:`Pdbtofasta <biobb_pdb_tools.pdb_tools.pdb_tofasta>` object.""" 

75 

76 if self.check_restart(): 

77 return 0 

78 self.stage_files() 

79 

80 instructions = [] 

81 if self.multi: 

82 instructions.append("-multi") 

83 fu.log("Appending optional boolean property", self.out_log, self.global_log) 

84 

85 self.cmd = [ 

86 self.binary_path, 

87 " ".join(instructions), 

88 self.stage_io_dict["in"]["input_file_path"], 

89 ">", 

90 self.io_dict["out"]["output_file_path"], 

91 ] 

92 

93 fu.log(" ".join(self.cmd), self.out_log, self.global_log) 

94 

95 fu.log( 

96 "Creating command line with instructions and required arguments", 

97 self.out_log, 

98 self.global_log, 

99 ) 

100 

101 self.run_biobb() 

102 self.copy_to_host() 

103 

104 self.tmp_files.extend([self.stage_io_dict.get("unique_dir", "")]) 

105 self.remove_tmp_files() 

106 self.check_arguments(output_files_created=True, raise_exception=False) 

107 

108 return self.return_code 

109 

110 

111def biobb_pdb_tofasta( 

112 input_file_path: str, 

113 output_file_path: str, 

114 properties: Optional[dict] = None, 

115 **kwargs, 

116) -> int: 

117 """Create :class:`Pdbtofasta <biobb_pdb_tools.pdb_tools.pdb_tofasta>` class and 

118 execute the :meth:`launch() <biobb_pdb_tools.pdb_tools.pdb_tofasta.launch>` method.""" 

119 

120 return Pdbtofasta( 

121 input_file_path=input_file_path, 

122 output_file_path=output_file_path, 

123 properties=properties, 

124 **kwargs, 

125 ).launch() 

126 

127biobb_pdb_tofasta.__doc__ = Pdbtofasta.__doc__ 

128 

129 

130def main(): 

131 """Command line execution of this building block. Please check the command line documentation.""" 

132 parser = argparse.ArgumentParser( 

133 description="Extracts the residue sequence in a PDB file to FASTA format.", 

134 formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999), 

135 ) 

136 parser.add_argument("--config", required=True, help="Configuration file") 

137 

138 required_args = parser.add_argument_group("required arguments") 

139 required_args.add_argument( 

140 "--input_file_path", 

141 required=True, 

142 help="Description for the first input file path. Accepted formats: pdb.", 

143 ) 

144 required_args.add_argument( 

145 "--output_file_path", 

146 required=True, 

147 help="Description for the output file path. Accepted formats: fasta.", 

148 ) 

149 

150 args = parser.parse_args() 

151 args.config = args.config or "{}" 

152 properties = settings.ConfReader(config=args.config).get_prop_dic() 

153 

154 biobb_pdb_tofasta( 

155 input_file_path=args.input_file_path, 

156 output_file_path=args.output_file_path, 

157 properties=properties, 

158 ) 

159 

160 

161if __name__ == "__main__": 

162 main()