Coverage for biobb_structure_utils/utils/common.py: 80%

144 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-28 11:54 +0000

1"""Common functions and constants for package biobb_structure_utils.utils""" 

2 

3from collections.abc import Mapping 

4from pathlib import Path, PurePath 

5from typing import Optional, Union 

6 

7from biobb_common.tools import file_utils as fu 

8 

9PDB_COORD_RECORDS = ["MODEL", "ANISOU", "HETATM", "ATOM", "TER", "ENDMDL"] 

10PDB_SERIAL_RECORDS = ["ANISOU", "HETATM", "ATOM", "TER"] 

11PDB_WATERS = ["SOL", "HOH", "WAT", "T3P"] 

12 

13 

14def check_input_path(path, out_log, classname): 

15 """Checks input file path""" 

16 if not Path(path).exists(): 

17 fu.log(classname + ": Unexisting input file, exiting", out_log) 

18 raise SystemExit(classname + ": Unexisting input file") 

19 file_extension = PurePath(path).suffix 

20 if not is_valid_pdb(file_extension[1:]) and not is_valid_pdbqt(file_extension[1:]): 

21 fu.log( 

22 classname + ": Format %s in input file is not compatible" % file_extension[1:], 

23 out_log, 

24 ) 

25 raise SystemExit( 

26 classname + ": Format %s in input file is not compatible" % file_extension[1:] 

27 ) 

28 # if file input has no path, add cwd because execution is launched on tmp folder 

29 if PurePath(path).name == path or not PurePath(path).is_absolute(): 

30 path = str(PurePath(Path.cwd()).joinpath(path)) 

31 return path 

32 

33 

34def check_output_path(path, out_log, classname): 

35 """Checks output file path""" 

36 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

37 fu.log(classname + ": Unexisting output folder, exiting", out_log) 

38 raise SystemExit(classname + ": Unexisting output folder") 

39 file_extension = PurePath(path).suffix 

40 if not is_valid_pdb(file_extension[1:]) and not is_valid_pdbqt(file_extension[1:]): 

41 fu.log( 

42 classname + ": Format %s in output file is not compatible" % file_extension[1:], 

43 out_log, 

44 ) 

45 raise SystemExit( 

46 classname + ": Format %s in output file is not compatible" % file_extension[1:] 

47 ) 

48 return path 

49 

50 

51def check_output_path_pdbqt(path, out_log, classname): 

52 """Checks output file path""" 

53 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

54 fu.log(classname + ": Unexisting output folder, exiting", out_log) 

55 raise SystemExit(classname + ": Unexisting output folder") 

56 file_extension = PurePath(path).suffix 

57 if not is_valid_pdbqt(file_extension[1:]): 

58 fu.log( 

59 classname + ": Format %s in output file is not compatible" % file_extension[1:], 

60 out_log, 

61 ) 

62 raise SystemExit( 

63 classname + ": Format %s in output file is not compatible" % file_extension[1:] 

64 ) 

65 return path 

66 

67 

68def check_output_path_json(path, out_log, classname): 

69 """Checks output file path""" 

70 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

71 fu.log(classname + ": Unexisting output folder, exiting", out_log) 

72 raise SystemExit(classname + ": Unexisting output folder") 

73 file_extension = PurePath(path).suffix 

74 if not is_valid_json(file_extension[1:]): 

75 fu.log( 

76 classname + ": Format %s in output file is not compatible" % file_extension[1:], 

77 out_log, 

78 ) 

79 raise SystemExit( 

80 classname + ": Format %s in output file is not compatible" % file_extension[1:] 

81 ) 

82 return path 

83 

84 

85def is_valid_pdb(ext): 

86 """Checks if is a valid PDB file""" 

87 formats = ["pdb"] 

88 return ext in formats 

89 

90 

91def is_valid_pdbqt(ext): 

92 """Checks if is a valid PDB/PDBQT file""" 

93 formats = ["pdb", "pdbqt"] 

94 return ext in formats 

95 

96 

97def is_valid_json(ext): 

98 """Checks if is a valid JSON file""" 

99 formats = ["json"] 

100 return ext in formats 

101 

102 

103def check_output_end(structure, out_log): 

104 """if structure ends with END, remove last line""" 

105 lines_new = [] 

106 with open(structure, "r") as f: 

107 lines = f.read().splitlines() 

108 for item in lines: 

109 # if not item.startswith('END'): 

110 if not item.strip() == "END": 

111 lines_new.append(item) 

112 else: 

113 fu.log("%s file ends with END, cleaning" % structure, out_log) 

114 

115 with open(structure, "w") as f: 

116 for item in lines_new: 

117 f.write("%s\n" % item) 

118 

119 

120def create_output_file(type, input, residues, output, out_log): 

121 # parse PDB file and get residues line by line 

122 new_file_lines = [] 

123 curr_model = 0 

124 with open(input) as infile: 

125 for line in infile: 

126 if line.startswith("MODEL "): 

127 curr_model = line.rstrip()[-1] 

128 if int(curr_model) > 1: 

129 new_file_lines.append("ENDMDL\n") 

130 new_file_lines.append("MODEL " + "{:>4}".format(curr_model) + "\n") 

131 

132 conditional_atoms = [ 

133 (line.startswith("ATOM") or line.startswith("HETATM")), 

134 line.startswith("HETATM"), 

135 line.startswith("ATOM"), 

136 ] 

137 

138 if conditional_atoms[type]: 

139 name = line[17:20].strip() 

140 chain = line[21:22].strip() 

141 res_id = line[22:27].strip() 

142 if curr_model != 0: 

143 model = curr_model.strip() 

144 else: 

145 model = "1" 

146 if chain == "": 

147 chain = " " 

148 

149 for nstr in residues: 

150 if ( 

151 nstr["res_id"] == res_id and nstr["name"] == name and nstr["chain"] == chain and nstr["model"] == model 

152 ): 

153 new_file_lines.append(line) 

154 

155 if int(curr_model) > 0: 

156 new_file_lines.append("ENDMDL\n") 

157 

158 fu.log("Writting pdb to: %s" % (output), out_log) 

159 

160 # save new file with heteroatoms 

161 with open(output, "w") as outfile: 

162 for line in new_file_lines: 

163 outfile.write(line) 

164 

165 

166def create_biopython_residue(residue): 

167 return { 

168 "model": str(residue.get_parent().get_parent().get_id() + 1), 

169 "chain": residue.get_parent().get_id(), 

170 "name": residue.get_resname(), 

171 "res_id": str(residue.get_id()[1]), 

172 } 

173 

174 

175def create_residues_list(residues, out_log): 

176 """Check format of residues list""" 

177 if not residues: 

178 return None 

179 

180 list_residues = [] 

181 

182 for residue in residues: 

183 d = residue 

184 code = [] 

185 if isinstance(residue, Mapping): 

186 if "name" in residue: 

187 code.append("name") 

188 if "res_id" in residue: 

189 code.append("res_id") 

190 if "chain" in residue: 

191 code.append("chain") 

192 if "model" in residue: 

193 code.append("model") 

194 else: 

195 d = {"res_id": str(residue)} 

196 code.append("res_id") 

197 

198 d["code"] = code 

199 list_residues.append(d) 

200 

201 return list_residues 

202 

203 

204def check_format_heteroatoms(hets, out_log): 

205 """Check format of heteroatoms list""" 

206 if not hets: 

207 return 0 

208 

209 listh = [] 

210 

211 for het in hets: 

212 d = het 

213 code = [] 

214 if "name" in het: 

215 code.append("name") 

216 if "res_id" in het: 

217 code.append("res_id") 

218 if "chain" in het: 

219 code.append("chain") 

220 if "model" in het: 

221 code.append("model") 

222 

223 d["code"] = code 

224 listh.append(d) 

225 

226 return listh 

227 

228 

229# TODO: Move this function to biobb_common.tools.file_utils 

230def _from_string_to_list(input_data: Optional[Union[str, list[str]]]) -> list[str]: 

231 """ 

232 Converts a string to a list, splitting by commas or spaces. If the input is already a list, returns it as is. 

233 Returns an empty list if input_data is None. 

234 

235 Parameters: 

236 input_data (str, list, or None): The string, list, or None value to convert. 

237 

238 Returns: 

239 list: A list of string elements or an empty list if input_data is None. 

240 """ 

241 if input_data is None: 

242 return [] 

243 

244 if isinstance(input_data, list): 

245 # If input is already a list, return it 

246 return input_data 

247 

248 # If input is a string, determine the delimiter based on presence of commas 

249 delimiter = "," if "," in input_data else " " 

250 items = input_data.split(delimiter) 

251 

252 # Remove whitespace from each item and ignore empty strings 

253 processed_items = [item.strip() for item in items if item.strip()] 

254 

255 return processed_items