Coverage for biobb_flexserv/pcasuite/pcz_bfactor.py: 76%

55 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-19 15:08 +0000

1#!/usr/bin/env python3 

2 

3"""Module containing the PCZbfactor class and the command line interface.""" 

4import argparse 

5from typing import Optional 

6import shutil 

7from pathlib import PurePath 

8from biobb_common.tools import file_utils as fu 

9from biobb_common.generic.biobb_object import BiobbObject 

10from biobb_common.configuration import settings 

11from biobb_common.tools.file_utils import launchlogger 

12 

13 

14class PCZbfactor(BiobbObject): 

15 """ 

16 | biobb_flexserv PCZbfactor 

17 | Extract residue bfactors x PCA mode from a compressed PCZ file. 

18 | Wrapper of the pczdump tool from the PCAsuite FlexServ module. 

19 

20 Args: 

21 input_pcz_path (str): Input compressed trajectory file. File type: input. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/data/pcasuite/pcazip.pcz>`_. Accepted formats: pcz (edam:format_3874). 

22 output_dat_path (str): Output Bfactor x residue x PCA mode file. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/bfactors.dat>`_. Accepted formats: dat (edam:format_1637), txt (edam:format_2330), csv (edam:format_3752). 

23 output_pdb_path (str) (Optional): Output PDB with Bfactor x residue x PCA mode file. File type: output. `Sample file <https://github.com/bioexcel/biobb_flexserv/raw/master/biobb_flexserv/test/reference/pcasuite/bfactors.pdb>`_. Accepted formats: pdb (edam:format_1476). 

24 properties (dict - Python dictionary object containing the tool parameters, not input/output files): 

25 * **binary_path** (*str*) - ("pczdump") pczdump binary path to be used. 

26 * **eigenvector** (*int*) - (0) PCA mode (eigenvector) from which to extract bfactor values per residue (0 means average over all modes). 

27 * **pdb** (*bool*) - (False) Generate a PDB file with the computed bfactors (to be easily represented with colour scale) 

28 * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. 

29 * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. 

30 * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. 

31 

32 Examples: 

33 This is a use example of how to use the building block from Python:: 

34 

35 from biobb_flexserv.pcasuite.pcz_bfactor import pcz_bfactor 

36 prop = { 

37 'eigenvector': 1, 

38 'pdb': True 

39 } 

40 pcz_bfactor( input_pcz_path='/path/to/pcazip_input.pcz', 

41 output_dat_path='/path/to/bfactors_mode1.dat', 

42 output_pdb_path='/path/to/bfactors_mode1.pdb', 

43 properties=prop) 

44 

45 Info: 

46 * wrapped_software: 

47 * name: FlexServ PCAsuite 

48 * version: >=1.0 

49 * license: Apache-2.0 

50 * ontology: 

51 * name: EDAM 

52 * schema: http://edamontology.org/EDAM.owl 

53 

54 """ 

55 

56 def __init__(self, input_pcz_path: str, output_dat_path: str, 

57 output_pdb_path: str, properties: Optional[dict] = None, **kwargs) -> None: 

58 

59 properties = properties or {} 

60 

61 # Call parent class constructor 

62 super().__init__(properties) 

63 self.locals_var_dict = locals().copy() 

64 

65 # Input/Output files 

66 self.io_dict = { 

67 'in': {'input_pcz_path': input_pcz_path}, 

68 'out': {'output_dat_path': output_dat_path, 

69 'output_pdb_path': output_pdb_path} 

70 } 

71 

72 # Properties specific for BB 

73 self.properties = properties 

74 self.binary_path = properties.get('binary_path', 'pczdump') 

75 self.eigenvector = properties.get('eigenvector', 1) 

76 self.pdb = properties.get('pdb', False) 

77 

78 # Check the properties 

79 self.check_properties(properties) 

80 self.check_arguments() 

81 

82 @launchlogger 

83 def launch(self): 

84 """Launches the execution of the FlexServ pcz_bfactor module.""" 

85 

86 # Setup Biobb 

87 if self.check_restart(): 

88 return 0 

89 # self.stage_files() 

90 

91 # # Internal file paths 

92 # try: 

93 # # Using rel paths to shorten the amount of characters due to fortran path length limitations 

94 # input_pcz = str(Path(self.stage_io_dict["in"]["input_pcz_path"]).relative_to(Path.cwd())) 

95 # output_pdb = str(Path(self.stage_io_dict["out"]["output_pdb_path"]).relative_to(Path.cwd())) 

96 # output_dat = str(Path(self.stage_io_dict["out"]["output_dat_path"]).relative_to(Path.cwd())) 

97 # except ValueError: 

98 # # Container or remote case 

99 # input_pcz = self.stage_io_dict["in"]["input_pcz_path"] 

100 # output_pdb = self.stage_io_dict["out"]["output_pdb_path"] 

101 # output_dat = self.stage_io_dict["out"]["output_dat_path"] 

102 

103 # Manually creating a Sandbox to avoid issues with input parameters buffer overflow: 

104 # Long strings defining a file path makes Fortran or C compiled programs crash if the string 

105 # declared is shorter than the input parameter path (string) length. 

106 # Generating a temporary folder and working inside this folder (sandbox) fixes this problem. 

107 # The problem was found in Galaxy executions, launching Singularity containers (May 2023). 

108 

109 # Creating temporary folder 

110 self.tmp_folder = fu.create_unique_dir() 

111 fu.log('Creating %s temporary folder' % self.tmp_folder, self.out_log) 

112 

113 shutil.copy2(self.io_dict["in"]["input_pcz_path"], self.tmp_folder) 

114 

115 # Command line (1: dat file) 

116 # pczdump -i structure.ca.std.pcz --fluc=1 -o bfactor_1.dat 

117 # self.cmd = [self.binary_path, 

118 # "-i", input_pcz, 

119 # "-o", output_dat, 

120 # "--bfactor", 

121 # "--fluc={}".format(self.eigenvector) 

122 # ] 

123 

124 self.cmd = ['cd', self.tmp_folder, ';', 

125 self.binary_path, 

126 '-i', PurePath(self.io_dict["in"]["input_pcz_path"]).name, 

127 '-o', PurePath(self.io_dict["out"]["output_dat_path"]).name, 

128 "--bfactor", 

129 "--fluc={}".format(self.eigenvector) 

130 ] 

131 

132 # Run Biobb block 

133 self.run_biobb() 

134 

135 if self.pdb: 

136 # Command line (2: pdb file) 

137 # pczdump -i structure.ca.std.pcz --fluc=1 --pdb -o bfactor_1.pdb 

138 # self.cmd = [self.binary_path, 

139 # "-i", input_pcz, 

140 # "-o", output_pdb, 

141 # "--bfactor", 

142 # "--fluc={}".format(self.eigenvector), 

143 # "--pdb" 

144 # ] 

145 

146 self.cmd = ['cd', self.tmp_folder, ';', 

147 self.binary_path, 

148 '-i', PurePath(self.io_dict["in"]["input_pcz_path"]).name, 

149 '-o', PurePath(self.io_dict["out"]["output_pdb_path"]).name, 

150 "--bfactor", 

151 "--fluc={}".format(self.eigenvector), 

152 "--pdb" 

153 ] 

154 

155 # Run Biobb block 

156 self.run_biobb() 

157 

158 # Copy outputs from temporary folder to output path 

159 shutil.copy2(PurePath(self.tmp_folder).joinpath(PurePath(self.io_dict["out"]["output_dat_path"]).name), PurePath(self.io_dict["out"]["output_dat_path"])) 

160 

161 if self.pdb: 

162 shutil.copy2(PurePath(self.tmp_folder).joinpath(PurePath(self.io_dict["out"]["output_pdb_path"]).name), PurePath(self.io_dict["out"]["output_pdb_path"])) 

163 

164 # Copy files to host 

165 # self.copy_to_host() 

166 

167 # remove temporary folder(s) 

168 self.tmp_files.extend([ 

169 # self.stage_io_dict.get("unique_dir", ""), 

170 self.tmp_folder 

171 ]) 

172 self.remove_tmp_files() 

173 

174 self.check_arguments(output_files_created=True, raise_exception=False) 

175 

176 return self.return_code 

177 

178 

179def pcz_bfactor(input_pcz_path: str, output_dat_path: str, output_pdb_path: str, 

180 properties: Optional[dict] = None, **kwargs) -> int: 

181 """Create :class:`PCZbfactor <flexserv.pcasuite.pcz_bfactor>`flexserv.pcasuite.PCZbfactor class and 

182 execute :meth:`launch() <flexserv.pcasuite.pcz_bfactor.launch>` method""" 

183 

184 return PCZbfactor(input_pcz_path=input_pcz_path, 

185 output_dat_path=output_dat_path, 

186 output_pdb_path=output_pdb_path, 

187 properties=properties).launch() 

188 

189 pcz_bfactor.__doc__ = PCZbfactor.__doc__ 

190 

191 

192def main(): 

193 parser = argparse.ArgumentParser(description='Extract residue bfactors x PCA mode from a compressed PCZ file.', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) 

194 parser.add_argument('--config', required=False, help='Configuration file') 

195 

196 # Specific args 

197 required_args = parser.add_argument_group('required arguments') 

198 required_args.add_argument('--input_pcz_path', required=True, help='Input compressed trajectory file. Accepted formats: pcz.') 

199 required_args.add_argument('--output_dat_path', required=True, help='Output Bfactor x residue x PCA mode file. Accepted formats: dat, txt, csv.') 

200 required_args.add_argument('--output_pdb_path', required=False, help='Output PDB with Bfactor x residue x PCA mode file. Accepted formats: pdb.') 

201 

202 args = parser.parse_args() 

203 args.config = args.config or "{}" 

204 properties = settings.ConfReader(config=args.config).get_prop_dic() 

205 

206 # Specific call 

207 pcz_bfactor(input_pcz_path=args.input_pcz_path, 

208 output_dat_path=args.output_dat_path, 

209 output_pdb_path=args.output_pdb_path, 

210 properties=properties) 

211 

212 

213if __name__ == '__main__': 

214 main()