Coverage for biobb_structure_utils/utils/common.py: 80%
144 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 11:54 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-28 11:54 +0000
1"""Common functions and constants for package biobb_structure_utils.utils"""
3from collections.abc import Mapping
4from pathlib import Path, PurePath
5from typing import Optional, Union
7from biobb_common.tools import file_utils as fu
9PDB_COORD_RECORDS = ["MODEL", "ANISOU", "HETATM", "ATOM", "TER", "ENDMDL"]
10PDB_SERIAL_RECORDS = ["ANISOU", "HETATM", "ATOM", "TER"]
11PDB_WATERS = ["SOL", "HOH", "WAT", "T3P"]
14def check_input_path(path, out_log, classname):
15 """Checks input file path"""
16 if not Path(path).exists():
17 fu.log(classname + ": Unexisting input file, exiting", out_log)
18 raise SystemExit(classname + ": Unexisting input file")
19 file_extension = PurePath(path).suffix
20 if not is_valid_pdb(file_extension[1:]) and not is_valid_pdbqt(file_extension[1:]):
21 fu.log(
22 classname + ": Format %s in input file is not compatible" % file_extension[1:],
23 out_log,
24 )
25 raise SystemExit(
26 classname + ": Format %s in input file is not compatible" % file_extension[1:]
27 )
28 # if file input has no path, add cwd because execution is launched on tmp folder
29 if PurePath(path).name == path or not PurePath(path).is_absolute():
30 path = str(PurePath(Path.cwd()).joinpath(path))
31 return path
34def check_output_path(path, out_log, classname):
35 """Checks output file path"""
36 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
37 fu.log(classname + ": Unexisting output folder, exiting", out_log)
38 raise SystemExit(classname + ": Unexisting output folder")
39 file_extension = PurePath(path).suffix
40 if not is_valid_pdb(file_extension[1:]) and not is_valid_pdbqt(file_extension[1:]):
41 fu.log(
42 classname + ": Format %s in output file is not compatible" % file_extension[1:],
43 out_log,
44 )
45 raise SystemExit(
46 classname + ": Format %s in output file is not compatible" % file_extension[1:]
47 )
48 return path
51def check_output_path_pdbqt(path, out_log, classname):
52 """Checks output file path"""
53 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
54 fu.log(classname + ": Unexisting output folder, exiting", out_log)
55 raise SystemExit(classname + ": Unexisting output folder")
56 file_extension = PurePath(path).suffix
57 if not is_valid_pdbqt(file_extension[1:]):
58 fu.log(
59 classname + ": Format %s in output file is not compatible" % file_extension[1:],
60 out_log,
61 )
62 raise SystemExit(
63 classname + ": Format %s in output file is not compatible" % file_extension[1:]
64 )
65 return path
68def check_output_path_json(path, out_log, classname):
69 """Checks output file path"""
70 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
71 fu.log(classname + ": Unexisting output folder, exiting", out_log)
72 raise SystemExit(classname + ": Unexisting output folder")
73 file_extension = PurePath(path).suffix
74 if not is_valid_json(file_extension[1:]):
75 fu.log(
76 classname + ": Format %s in output file is not compatible" % file_extension[1:],
77 out_log,
78 )
79 raise SystemExit(
80 classname + ": Format %s in output file is not compatible" % file_extension[1:]
81 )
82 return path
85def is_valid_pdb(ext):
86 """Checks if is a valid PDB file"""
87 formats = ["pdb"]
88 return ext in formats
91def is_valid_pdbqt(ext):
92 """Checks if is a valid PDB/PDBQT file"""
93 formats = ["pdb", "pdbqt"]
94 return ext in formats
97def is_valid_json(ext):
98 """Checks if is a valid JSON file"""
99 formats = ["json"]
100 return ext in formats
103def check_output_end(structure, out_log):
104 """if structure ends with END, remove last line"""
105 lines_new = []
106 with open(structure, "r") as f:
107 lines = f.read().splitlines()
108 for item in lines:
109 # if not item.startswith('END'):
110 if not item.strip() == "END":
111 lines_new.append(item)
112 else:
113 fu.log("%s file ends with END, cleaning" % structure, out_log)
115 with open(structure, "w") as f:
116 for item in lines_new:
117 f.write("%s\n" % item)
120def create_output_file(type, input, residues, output, out_log):
121 # parse PDB file and get residues line by line
122 new_file_lines = []
123 curr_model = 0
124 with open(input) as infile:
125 for line in infile:
126 if line.startswith("MODEL "):
127 curr_model = line.rstrip()[-1]
128 if int(curr_model) > 1:
129 new_file_lines.append("ENDMDL\n")
130 new_file_lines.append("MODEL " + "{:>4}".format(curr_model) + "\n")
132 conditional_atoms = [
133 (line.startswith("ATOM") or line.startswith("HETATM")),
134 line.startswith("HETATM"),
135 line.startswith("ATOM"),
136 ]
138 if conditional_atoms[type]:
139 name = line[17:20].strip()
140 chain = line[21:22].strip()
141 res_id = line[22:27].strip()
142 if curr_model != 0:
143 model = curr_model.strip()
144 else:
145 model = "1"
146 if chain == "":
147 chain = " "
149 for nstr in residues:
150 if (
151 nstr["res_id"] == res_id and nstr["name"] == name and nstr["chain"] == chain and nstr["model"] == model
152 ):
153 new_file_lines.append(line)
155 if int(curr_model) > 0:
156 new_file_lines.append("ENDMDL\n")
158 fu.log("Writting pdb to: %s" % (output), out_log)
160 # save new file with heteroatoms
161 with open(output, "w") as outfile:
162 for line in new_file_lines:
163 outfile.write(line)
166def create_biopython_residue(residue):
167 return {
168 "model": str(residue.get_parent().get_parent().get_id() + 1),
169 "chain": residue.get_parent().get_id(),
170 "name": residue.get_resname(),
171 "res_id": str(residue.get_id()[1]),
172 }
175def create_residues_list(residues, out_log):
176 """Check format of residues list"""
177 if not residues:
178 return None
180 list_residues = []
182 for residue in residues:
183 d = residue
184 code = []
185 if isinstance(residue, Mapping):
186 if "name" in residue:
187 code.append("name")
188 if "res_id" in residue:
189 code.append("res_id")
190 if "chain" in residue:
191 code.append("chain")
192 if "model" in residue:
193 code.append("model")
194 else:
195 d = {"res_id": str(residue)}
196 code.append("res_id")
198 d["code"] = code
199 list_residues.append(d)
201 return list_residues
204def check_format_heteroatoms(hets, out_log):
205 """Check format of heteroatoms list"""
206 if not hets:
207 return 0
209 listh = []
211 for het in hets:
212 d = het
213 code = []
214 if "name" in het:
215 code.append("name")
216 if "res_id" in het:
217 code.append("res_id")
218 if "chain" in het:
219 code.append("chain")
220 if "model" in het:
221 code.append("model")
223 d["code"] = code
224 listh.append(d)
226 return listh
229# TODO: Move this function to biobb_common.tools.file_utils
230def _from_string_to_list(input_data: Optional[Union[str, list[str]]]) -> list[str]:
231 """
232 Converts a string to a list, splitting by commas or spaces. If the input is already a list, returns it as is.
233 Returns an empty list if input_data is None.
235 Parameters:
236 input_data (str, list, or None): The string, list, or None value to convert.
238 Returns:
239 list: A list of string elements or an empty list if input_data is None.
240 """
241 if input_data is None:
242 return []
244 if isinstance(input_data, list):
245 # If input is already a list, return it
246 return input_data
248 # If input is a string, determine the delimiter based on presence of commas
249 delimiter = "," if "," in input_data else " "
250 items = input_data.split(delimiter)
252 # Remove whitespace from each item and ignore empty strings
253 processed_items = [item.strip() for item in items if item.strip()]
255 return processed_items