Coverage for biobb_structure_utils/utils/common.py: 82%

1""" Common functions and constants for package biobb_structure_utils.utils """

2from pathlib import Path, PurePath

3from collections.abc import Mapping

4from biobb_common.tools import file_utils as fu

6PDB_COORD_RECORDS = ['MODEL', 'ANISOU', 'HETATM', 'ATOM', 'TER', 'ENDMDL']

7PDB_SERIAL_RECORDS = ['ANISOU', 'HETATM', 'ATOM', 'TER']

8PDB_WATERS = ['SOL', 'HOH', 'WAT', 'T3P']

11def check_input_path(path, out_log, classname):

12 """ Checks input file path """

13 if not Path(path).exists():

14 fu.log(classname + ': Unexisting input file, exiting', out_log)

15 raise SystemExit(classname + ': Unexisting input file')

16 file_extension = PurePath(path).suffix

17 if not is_valid_pdb(file_extension[1:]) and not is_valid_pdbqt(file_extension[1:]):

18 fu.log(classname + ': Format %s in input file is not compatible' % file_extension[1:], out_log)

19 raise SystemExit(classname + ': Format %s in input file is not compatible' % file_extension[1:])

20 # if file input has no path, add cwd because execution is launched on tmp folder

21 if (PurePath(path).name == path or not PurePath(path).is_absolute()):

22 path = str(PurePath(Path.cwd()).joinpath(path))

23 return path

26def check_output_path(path, out_log, classname):

27 """ Checks output file path """

28 if PurePath(path).parent and not Path(PurePath(path).parent).exists():

29 fu.log(classname + ': Unexisting output folder, exiting', out_log)

30 raise SystemExit(classname + ': Unexisting output folder')

31 file_extension = PurePath(path).suffix

32 if not is_valid_pdb(file_extension[1:]) and not is_valid_pdbqt(file_extension[1:]):

33 fu.log(classname + ': Format %s in output file is not compatible' % file_extension[1:], out_log)

34 raise SystemExit(classname + ': Format %s in output file is not compatible' % file_extension[1:])

35 return path

38def check_output_path_pdbqt(path, out_log, classname):

39 """ Checks output file path """

40 if PurePath(path).parent and not Path(PurePath(path).parent).exists():

41 fu.log(classname + ': Unexisting output folder, exiting', out_log)

42 raise SystemExit(classname + ': Unexisting output folder')

43 file_extension = PurePath(path).suffix

44 if not is_valid_pdbqt(file_extension[1:]):

45 fu.log(classname + ': Format %s in output file is not compatible' % file_extension[1:], out_log)

46 raise SystemExit(classname + ': Format %s in output file is not compatible' % file_extension[1:])

47 return path

50def check_output_path_json(path, out_log, classname):

51 """ Checks output file path """

52 if PurePath(path).parent and not Path(PurePath(path).parent).exists():

53 fu.log(classname + ': Unexisting output folder, exiting', out_log)

54 raise SystemExit(classname + ': Unexisting output folder')

55 file_extension = PurePath(path).suffix

56 if not is_valid_json(file_extension[1:]):

57 fu.log(classname + ': Format %s in output file is not compatible' % file_extension[1:], out_log)

58 raise SystemExit(classname + ': Format %s in output file is not compatible' % file_extension[1:])

59 return path

62def is_valid_pdb(ext):

63 """ Checks if is a valid PDB file """

64 formats = ['pdb']

65 return ext in formats

68def is_valid_pdbqt(ext):

69 """ Checks if is a valid PDB/PDBQT file """

70 formats = ['pdb', 'pdbqt']

71 return ext in formats

74def is_valid_json(ext):

75 """ Checks if is a valid JSON file """

76 formats = ['json']

77 return ext in formats

80def check_output_end(structure, out_log):

81 """ if structure ends with END, remove last line """

82 lines_new = []

83 with open(structure, 'r') as f:

84 lines = f.read().splitlines()

85 for item in lines:

86 # if not item.startswith('END'):

87 if not item.strip() == 'END':

88 lines_new.append(item)

89 else:

90 fu.log('%s file ends with END, cleaning' % structure, out_log)

92 with open(structure, 'w') as f:

93 for item in lines_new:

94 f.write("%s\n" % item)

97def create_output_file(type, input, residues, output, out_log):

98 # parse PDB file and get residues line by line

99 new_file_lines = []

100 curr_model = 0

101 with open(input) as infile:

102 for line in infile:

103 if line.startswith("MODEL "):

104 curr_model = line.rstrip()[-1]

105 if int(curr_model) > 1:

106 new_file_lines.append('ENDMDL\n')

107 new_file_lines.append('MODEL ' + "{:>4}".format(curr_model) + '\n')

108

109 conditional_atoms = [(line.startswith("ATOM") or line.startswith("HETATM")), line.startswith("HETATM"), line.startswith("ATOM")]

110

111 if conditional_atoms[type]:

112 name = line[17:20].strip()

113 chain = line[21:22].strip()

114 res_id = line[22:27].strip()

115 if curr_model != 0:

116 model = curr_model.strip()

117 else:

118 model = "1"

119 if chain == "":

120 chain = " "

121

122 for nstr in residues:

123 if nstr['res_id'] == res_id and nstr['name'] == name and nstr['chain'] == chain and nstr['model'] == model:

124 new_file_lines.append(line)

125

126 if int(curr_model) > 0:

127 new_file_lines.append('ENDMDL\n')

128

129 fu.log("Writting pdb to: %s" % (output), out_log)

130

131 # save new file with heteroatoms

132 with open(output, 'w') as outfile:

133 for line in new_file_lines:

134 outfile.write(line)

135

136

137def create_biopython_residue(residue):

138 return {'model': str(residue.get_parent().get_parent().get_id() + 1),

139 'chain': residue.get_parent().get_id(),

140 'name': residue.get_resname(),

141 'res_id': str(residue.get_id()[1])}

142

143

144def create_residues_list(residues, out_log):

145 """ Check format of residues list """

146 if not residues:

147 return None

148

149 list_residues = []

150

151 for residue in residues:

152 d = residue

153 code = []

154 if isinstance(residue, Mapping):

155 if 'name' in residue:

156 code.append('name')

157 if 'res_id' in residue:

158 code.append('res_id')

159 if 'chain' in residue:

160 code.append('chain')

161 if 'model' in residue:

162 code.append('model')

163 else:

164 d = {'res_id': str(residue)}

165 code.append('res_id')

166

167 d['code'] = code

168 list_residues.append(d)

169

170 return list_residues

171

172

173def check_format_heteroatoms(hets, out_log):

174 """ Check format of heteroatoms list """

175 if not hets:

176 return 0

177

178 listh = []

179

180 for het in hets:

181 d = het

182 code = []

183 if 'name' in het:

184 code.append('name')

185 if 'res_id' in het:

186 code.append('res_id')

187 if 'chain' in het:

188 code.append('chain')

189 if 'model' in het:

190 code.append('model')

191

192 d['code'] = code

193 listh.append(d)

194

195 return listh