Coverage for biobb_structure_utils/utils/common.py: 82%
134 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-03 15:30 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-10-03 15:30 +0000
1""" Common functions and constants for package biobb_structure_utils.utils """
2from pathlib import Path, PurePath
3from collections.abc import Mapping
4from biobb_common.tools import file_utils as fu
6PDB_COORD_RECORDS = ['MODEL', 'ANISOU', 'HETATM', 'ATOM', 'TER', 'ENDMDL']
7PDB_SERIAL_RECORDS = ['ANISOU', 'HETATM', 'ATOM', 'TER']
8PDB_WATERS = ['SOL', 'HOH', 'WAT', 'T3P']
11def check_input_path(path, out_log, classname):
12 """ Checks input file path """
13 if not Path(path).exists():
14 fu.log(classname + ': Unexisting input file, exiting', out_log)
15 raise SystemExit(classname + ': Unexisting input file')
16 file_extension = PurePath(path).suffix
17 if not is_valid_pdb(file_extension[1:]) and not is_valid_pdbqt(file_extension[1:]):
18 fu.log(classname + ': Format %s in input file is not compatible' % file_extension[1:], out_log)
19 raise SystemExit(classname + ': Format %s in input file is not compatible' % file_extension[1:])
20 # if file input has no path, add cwd because execution is launched on tmp folder
21 if (PurePath(path).name == path or not PurePath(path).is_absolute()):
22 path = str(PurePath(Path.cwd()).joinpath(path))
23 return path
26def check_output_path(path, out_log, classname):
27 """ Checks output file path """
28 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
29 fu.log(classname + ': Unexisting output folder, exiting', out_log)
30 raise SystemExit(classname + ': Unexisting output folder')
31 file_extension = PurePath(path).suffix
32 if not is_valid_pdb(file_extension[1:]) and not is_valid_pdbqt(file_extension[1:]):
33 fu.log(classname + ': Format %s in output file is not compatible' % file_extension[1:], out_log)
34 raise SystemExit(classname + ': Format %s in output file is not compatible' % file_extension[1:])
35 return path
38def check_output_path_pdbqt(path, out_log, classname):
39 """ Checks output file path """
40 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
41 fu.log(classname + ': Unexisting output folder, exiting', out_log)
42 raise SystemExit(classname + ': Unexisting output folder')
43 file_extension = PurePath(path).suffix
44 if not is_valid_pdbqt(file_extension[1:]):
45 fu.log(classname + ': Format %s in output file is not compatible' % file_extension[1:], out_log)
46 raise SystemExit(classname + ': Format %s in output file is not compatible' % file_extension[1:])
47 return path
50def check_output_path_json(path, out_log, classname):
51 """ Checks output file path """
52 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
53 fu.log(classname + ': Unexisting output folder, exiting', out_log)
54 raise SystemExit(classname + ': Unexisting output folder')
55 file_extension = PurePath(path).suffix
56 if not is_valid_json(file_extension[1:]):
57 fu.log(classname + ': Format %s in output file is not compatible' % file_extension[1:], out_log)
58 raise SystemExit(classname + ': Format %s in output file is not compatible' % file_extension[1:])
59 return path
62def is_valid_pdb(ext):
63 """ Checks if is a valid PDB file """
64 formats = ['pdb']
65 return ext in formats
68def is_valid_pdbqt(ext):
69 """ Checks if is a valid PDB/PDBQT file """
70 formats = ['pdb', 'pdbqt']
71 return ext in formats
74def is_valid_json(ext):
75 """ Checks if is a valid JSON file """
76 formats = ['json']
77 return ext in formats
80def check_output_end(structure, out_log):
81 """ if structure ends with END, remove last line """
82 lines_new = []
83 with open(structure, 'r') as f:
84 lines = f.read().splitlines()
85 for item in lines:
86 # if not item.startswith('END'):
87 if not item.strip() == 'END':
88 lines_new.append(item)
89 else:
90 fu.log('%s file ends with END, cleaning' % structure, out_log)
92 with open(structure, 'w') as f:
93 for item in lines_new:
94 f.write("%s\n" % item)
97def create_output_file(type, input, residues, output, out_log):
98 # parse PDB file and get residues line by line
99 new_file_lines = []
100 curr_model = 0
101 with open(input) as infile:
102 for line in infile:
103 if line.startswith("MODEL "):
104 curr_model = line.rstrip()[-1]
105 if int(curr_model) > 1:
106 new_file_lines.append('ENDMDL\n')
107 new_file_lines.append('MODEL ' + "{:>4}".format(curr_model) + '\n')
109 conditional_atoms = [(line.startswith("ATOM") or line.startswith("HETATM")), line.startswith("HETATM"), line.startswith("ATOM")]
111 if conditional_atoms[type]:
112 name = line[17:20].strip()
113 chain = line[21:22].strip()
114 res_id = line[22:27].strip()
115 if curr_model != 0:
116 model = curr_model.strip()
117 else:
118 model = "1"
119 if chain == "":
120 chain = " "
122 for nstr in residues:
123 if nstr['res_id'] == res_id and nstr['name'] == name and nstr['chain'] == chain and nstr['model'] == model:
124 new_file_lines.append(line)
126 if int(curr_model) > 0:
127 new_file_lines.append('ENDMDL\n')
129 fu.log("Writting pdb to: %s" % (output), out_log)
131 # save new file with heteroatoms
132 with open(output, 'w') as outfile:
133 for line in new_file_lines:
134 outfile.write(line)
137def create_biopython_residue(residue):
138 return {'model': str(residue.get_parent().get_parent().get_id() + 1),
139 'chain': residue.get_parent().get_id(),
140 'name': residue.get_resname(),
141 'res_id': str(residue.get_id()[1])}
144def create_residues_list(residues, out_log):
145 """ Check format of residues list """
146 if not residues:
147 return None
149 list_residues = []
151 for residue in residues:
152 d = residue
153 code = []
154 if isinstance(residue, Mapping):
155 if 'name' in residue:
156 code.append('name')
157 if 'res_id' in residue:
158 code.append('res_id')
159 if 'chain' in residue:
160 code.append('chain')
161 if 'model' in residue:
162 code.append('model')
163 else:
164 d = {'res_id': str(residue)}
165 code.append('res_id')
167 d['code'] = code
168 list_residues.append(d)
170 return list_residues
173def check_format_heteroatoms(hets, out_log):
174 """ Check format of heteroatoms list """
175 if not hets:
176 return 0
178 listh = []
180 for het in hets:
181 d = het
182 code = []
183 if 'name' in het:
184 code.append('name')
185 if 'res_id' in het:
186 code.append('res_id')
187 if 'chain' in het:
188 code.append('chain')
189 if 'model' in het:
190 code.append('model')
192 d['code'] = code
193 listh.append(d)
195 return listh