Coverage for biobb_structure_utils/utils/common.py: 82%

134 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-03 15:30 +0000

1""" Common functions and constants for package biobb_structure_utils.utils """ 

2from pathlib import Path, PurePath 

3from collections.abc import Mapping 

4from biobb_common.tools import file_utils as fu 

5 

6PDB_COORD_RECORDS = ['MODEL', 'ANISOU', 'HETATM', 'ATOM', 'TER', 'ENDMDL'] 

7PDB_SERIAL_RECORDS = ['ANISOU', 'HETATM', 'ATOM', 'TER'] 

8PDB_WATERS = ['SOL', 'HOH', 'WAT', 'T3P'] 

9 

10 

11def check_input_path(path, out_log, classname): 

12 """ Checks input file path """ 

13 if not Path(path).exists(): 

14 fu.log(classname + ': Unexisting input file, exiting', out_log) 

15 raise SystemExit(classname + ': Unexisting input file') 

16 file_extension = PurePath(path).suffix 

17 if not is_valid_pdb(file_extension[1:]) and not is_valid_pdbqt(file_extension[1:]): 

18 fu.log(classname + ': Format %s in input file is not compatible' % file_extension[1:], out_log) 

19 raise SystemExit(classname + ': Format %s in input file is not compatible' % file_extension[1:]) 

20 # if file input has no path, add cwd because execution is launched on tmp folder 

21 if (PurePath(path).name == path or not PurePath(path).is_absolute()): 

22 path = str(PurePath(Path.cwd()).joinpath(path)) 

23 return path 

24 

25 

26def check_output_path(path, out_log, classname): 

27 """ Checks output file path """ 

28 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

29 fu.log(classname + ': Unexisting output folder, exiting', out_log) 

30 raise SystemExit(classname + ': Unexisting output folder') 

31 file_extension = PurePath(path).suffix 

32 if not is_valid_pdb(file_extension[1:]) and not is_valid_pdbqt(file_extension[1:]): 

33 fu.log(classname + ': Format %s in output file is not compatible' % file_extension[1:], out_log) 

34 raise SystemExit(classname + ': Format %s in output file is not compatible' % file_extension[1:]) 

35 return path 

36 

37 

38def check_output_path_pdbqt(path, out_log, classname): 

39 """ Checks output file path """ 

40 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

41 fu.log(classname + ': Unexisting output folder, exiting', out_log) 

42 raise SystemExit(classname + ': Unexisting output folder') 

43 file_extension = PurePath(path).suffix 

44 if not is_valid_pdbqt(file_extension[1:]): 

45 fu.log(classname + ': Format %s in output file is not compatible' % file_extension[1:], out_log) 

46 raise SystemExit(classname + ': Format %s in output file is not compatible' % file_extension[1:]) 

47 return path 

48 

49 

50def check_output_path_json(path, out_log, classname): 

51 """ Checks output file path """ 

52 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

53 fu.log(classname + ': Unexisting output folder, exiting', out_log) 

54 raise SystemExit(classname + ': Unexisting output folder') 

55 file_extension = PurePath(path).suffix 

56 if not is_valid_json(file_extension[1:]): 

57 fu.log(classname + ': Format %s in output file is not compatible' % file_extension[1:], out_log) 

58 raise SystemExit(classname + ': Format %s in output file is not compatible' % file_extension[1:]) 

59 return path 

60 

61 

62def is_valid_pdb(ext): 

63 """ Checks if is a valid PDB file """ 

64 formats = ['pdb'] 

65 return ext in formats 

66 

67 

68def is_valid_pdbqt(ext): 

69 """ Checks if is a valid PDB/PDBQT file """ 

70 formats = ['pdb', 'pdbqt'] 

71 return ext in formats 

72 

73 

74def is_valid_json(ext): 

75 """ Checks if is a valid JSON file """ 

76 formats = ['json'] 

77 return ext in formats 

78 

79 

80def check_output_end(structure, out_log): 

81 """ if structure ends with END, remove last line """ 

82 lines_new = [] 

83 with open(structure, 'r') as f: 

84 lines = f.read().splitlines() 

85 for item in lines: 

86 # if not item.startswith('END'): 

87 if not item.strip() == 'END': 

88 lines_new.append(item) 

89 else: 

90 fu.log('%s file ends with END, cleaning' % structure, out_log) 

91 

92 with open(structure, 'w') as f: 

93 for item in lines_new: 

94 f.write("%s\n" % item) 

95 

96 

97def create_output_file(type, input, residues, output, out_log): 

98 # parse PDB file and get residues line by line 

99 new_file_lines = [] 

100 curr_model = 0 

101 with open(input) as infile: 

102 for line in infile: 

103 if line.startswith("MODEL "): 

104 curr_model = line.rstrip()[-1] 

105 if int(curr_model) > 1: 

106 new_file_lines.append('ENDMDL\n') 

107 new_file_lines.append('MODEL ' + "{:>4}".format(curr_model) + '\n') 

108 

109 conditional_atoms = [(line.startswith("ATOM") or line.startswith("HETATM")), line.startswith("HETATM"), line.startswith("ATOM")] 

110 

111 if conditional_atoms[type]: 

112 name = line[17:20].strip() 

113 chain = line[21:22].strip() 

114 res_id = line[22:27].strip() 

115 if curr_model != 0: 

116 model = curr_model.strip() 

117 else: 

118 model = "1" 

119 if chain == "": 

120 chain = " " 

121 

122 for nstr in residues: 

123 if nstr['res_id'] == res_id and nstr['name'] == name and nstr['chain'] == chain and nstr['model'] == model: 

124 new_file_lines.append(line) 

125 

126 if int(curr_model) > 0: 

127 new_file_lines.append('ENDMDL\n') 

128 

129 fu.log("Writting pdb to: %s" % (output), out_log) 

130 

131 # save new file with heteroatoms 

132 with open(output, 'w') as outfile: 

133 for line in new_file_lines: 

134 outfile.write(line) 

135 

136 

137def create_biopython_residue(residue): 

138 return {'model': str(residue.get_parent().get_parent().get_id() + 1), 

139 'chain': residue.get_parent().get_id(), 

140 'name': residue.get_resname(), 

141 'res_id': str(residue.get_id()[1])} 

142 

143 

144def create_residues_list(residues, out_log): 

145 """ Check format of residues list """ 

146 if not residues: 

147 return None 

148 

149 list_residues = [] 

150 

151 for residue in residues: 

152 d = residue 

153 code = [] 

154 if isinstance(residue, Mapping): 

155 if 'name' in residue: 

156 code.append('name') 

157 if 'res_id' in residue: 

158 code.append('res_id') 

159 if 'chain' in residue: 

160 code.append('chain') 

161 if 'model' in residue: 

162 code.append('model') 

163 else: 

164 d = {'res_id': str(residue)} 

165 code.append('res_id') 

166 

167 d['code'] = code 

168 list_residues.append(d) 

169 

170 return list_residues 

171 

172 

173def check_format_heteroatoms(hets, out_log): 

174 """ Check format of heteroatoms list """ 

175 if not hets: 

176 return 0 

177 

178 listh = [] 

179 

180 for het in hets: 

181 d = het 

182 code = [] 

183 if 'name' in het: 

184 code.append('name') 

185 if 'res_id' in het: 

186 code.append('res_id') 

187 if 'chain' in het: 

188 code.append('chain') 

189 if 'model' in het: 

190 code.append('model') 

191 

192 d['code'] = code 

193 listh.append(d) 

194 

195 return listh