Coverage for biobb_structure_utils / utils / cat_pdb.py: 96%

47 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-22 13:23 +0000

1"""Module containing the CatPDB class and the command line interface.""" 

2from typing import Optional 

3from biobb_common.generic.biobb_object import BiobbObject 

4from biobb_common.tools.file_utils import launchlogger 

5 

6from biobb_structure_utils.utils.common import check_input_path, check_output_path 

7 

8 

9class CatPDB(BiobbObject): 

10 """ 

11 | biobb_structure_utils CatPDB 

12 | Class to concat two PDB structures in a single PDB file. 

13 | Class to concat two PDB structures in a single PDB file. 

14 

15 Args: 

16 input_structure1 (str): Input structure 1 file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/cat_protein.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

17 input_structure2 (str): Input structure 2 file path. File type: input. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/data/utils/cat_ligand.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

18 output_structure_path (str): Output protein file path. File type: output. `Sample file <https://github.com/bioexcel/biobb_structure_utils/raw/master/biobb_structure_utils/test/reference/utils/ref_cat_pdb.pdb>`_. Accepted formats: pdb (edam:format_1476), pdbqt (edam:format_1476). 

19 properties (dic - Python dictionary object containing the tool parameters, not input/output files): 

20 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

21 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

22 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

23 

24 Examples: 

25 This is a use example of how to use the building block from Python:: 

26 

27 from biobb_structure_utils.utils.cat_pdb import cat_pdb 

28 prop = { } 

29 cat_pdb(input_structure1='/path/to/myInputStr1.pdb', 

30 input_structure2='/path/to/myInputStr2.pdb', 

31 output_structure_path='/path/to/newStructure.pdb', 

32 properties=prop) 

33 

34 Info: 

35 * wrapped_software: 

36 * name: In house 

37 * license: Apache-2.0 

38 * ontology: 

39 * name: EDAM 

40 * schema: http://edamontology.org/EDAM.owl 

41 

42 """ 

43 

44 def __init__( 

45 self, 

46 input_structure1, 

47 input_structure2, 

48 output_structure_path, 

49 properties=None, 

50 **kwargs, 

51 ) -> None: 

52 properties = properties or {} 

53 

54 # Call parent class constructor 

55 super().__init__(properties) 

56 self.locals_var_dict = locals().copy() 

57 

58 # Input/Output files 

59 self.io_dict = { 

60 "in": { 

61 "input_structure1": input_structure1, 

62 "input_structure2": input_structure2, 

63 }, 

64 "out": {"output_structure_path": output_structure_path}, 

65 } 

66 

67 # Properties specific for BB 

68 self.properties = properties 

69 

70 # Check the properties 

71 self.check_properties(properties) 

72 self.check_arguments() 

73 

74 @launchlogger 

75 def launch(self) -> int: 

76 """Execute the :class:`CatPDB <utils.cat_pdb.CatPDB>` utils.cat_pdb.CatPDB object.""" 

77 

78 self.io_dict["in"]["input_structure1"] = check_input_path( 

79 self.io_dict["in"]["input_structure1"], 

80 self.out_log, 

81 self.__class__.__name__, 

82 ) 

83 self.io_dict["in"]["input_structure2"] = check_input_path( 

84 self.io_dict["in"]["input_structure2"], 

85 self.out_log, 

86 self.__class__.__name__, 

87 ) 

88 self.io_dict["out"]["output_structure_path"] = check_output_path( 

89 self.io_dict["out"]["output_structure_path"], 

90 self.out_log, 

91 self.__class__.__name__, 

92 ) 

93 

94 # Setup Biobb 

95 if self.check_restart(): 

96 return 0 

97 self.stage_files() 

98 

99 # Business code 

100 filenames = [ 

101 self.io_dict["in"]["input_structure1"], 

102 self.io_dict["in"]["input_structure2"], 

103 ] 

104 # check if self.input_structure1 and self.input_structure2 end with newline 

105 newline = [False, False] 

106 for idx, fname in enumerate(filenames): 

107 with open(fname, "rb") as fh: 

108 fh.seek(-2, 2) 

109 last = fh.readlines()[-1].decode() 

110 newline[idx] = "\n" in last 

111 

112 # concat both input files and save them into output file 

113 with open(self.io_dict["out"]["output_structure_path"], "w") as outfile: 

114 for idx, fname in enumerate(filenames): 

115 with open(fname) as infile: 

116 for line in infile: 

117 if not line.startswith("END"): 

118 outfile.write(line) 

119 # if not ends in newline, add it 

120 if not newline[idx]: 

121 outfile.write("\n") 

122 self.return_code = 0 

123 

124 # Copy files to host 

125 self.copy_to_host() 

126 

127 # Remove temporal files 

128 self.remove_tmp_files() 

129 

130 self.check_arguments(output_files_created=True, raise_exception=False) 

131 

132 return self.return_code 

133 

134 

135def cat_pdb( 

136 input_structure1: str, 

137 input_structure2: str, 

138 output_structure_path: str, 

139 properties: Optional[dict] = None, 

140 **kwargs, 

141) -> int: 

142 """Create the :class:`CatPDB <utils.cat_pdb.CatPDB>` class and 

143 execute the :meth:`launch() <utils.cat_pdb.CatPDB.launch>` method.""" 

144 return CatPDB(**dict(locals())).launch() 

145 

146 

147cat_pdb.__doc__ = CatPDB.__doc__ 

148main = CatPDB.get_main(cat_pdb, "Concat two PDB structures in a single PDB file.") 

149 

150if __name__ == "__main__": 

151 main()