Coverage for biobb_vs/utils/common.py: 81%
132 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 11:08 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-06-20 11:08 +0000
1"""Common functions for package biobb_vs.utils"""
3import warnings
4from pathlib import Path, PurePath
5from typing import Optional, Union
7from Bio import BiopythonDeprecationWarning
8from biobb_common.tools import file_utils as fu
10with warnings.catch_warnings():
11 warnings.simplefilter("ignore", BiopythonDeprecationWarning)
12 import Bio.pairwise2
13 import Bio.PDB
15 try:
16 import Bio.SubsMat.MatrixInfo # type: ignore
17 except ImportError:
18 import Bio.Align.substitution_matrices
19 # from Bio.Data.SCOPData import protein_letters_3to1 as prot_one_letter
20 from Bio.Data.PDBData import protein_letters_3to1 as prot_one_letter
23# CHECK PARAMETERS
26def check_input_path(path, argument, out_log, classname):
27 """Checks input file"""
28 if not Path(path).exists():
29 fu.log(classname + ": Unexisting %s file, exiting" % argument, out_log)
30 raise SystemExit(classname + ": Unexisting %s file" % argument)
31 file_extension = PurePath(path).suffix
32 if not is_valid_file(file_extension[1:], argument):
33 fu.log(
34 classname + ": Format %s in %s file is not compatible"
35 % (file_extension[1:], argument),
36 out_log,
37 )
38 raise SystemExit(
39 classname + ": Format %s in %s file is not compatible"
40 % (file_extension[1:], argument)
41 )
42 return path
45def check_output_path(path, argument, optional, out_log, classname):
46 """Checks output file"""
47 if optional and not path:
48 return None
49 if PurePath(path).parent and not Path(PurePath(path).parent).exists():
50 fu.log(classname + ": Unexisting %s folder, exiting" % argument, out_log)
51 raise SystemExit(classname + ": Unexisting %s folder" % argument)
52 file_extension = PurePath(path).suffix
53 if not is_valid_file(file_extension[1:], argument):
54 fu.log(
55 classname + ": Format %s in %s file is not compatible"
56 % (file_extension[1:], argument),
57 out_log,
58 )
59 raise SystemExit(
60 classname + ": Format %s in %s file is not compatible"
61 % (file_extension[1:], argument)
62 )
63 return path
66def is_valid_file(ext, argument):
67 """Checks if file format is compatible"""
68 formats = {
69 "input_pdb_path": ["pdb", "pqr"],
70 "input_clusters_zip": ["zip"],
71 "resid_pdb_path": ["pdb"],
72 "input_pdbqt_path": ["pdbqt"],
73 "output_pdb_path": ["pdb"],
74 "output_pdbqt_path": ["pdbqt"],
75 }
76 return ext in formats[argument]
79# UTILS FUNCTIONS
82def get_residue_by_id(structure, res_num):
83 for residue in structure.get_residues():
84 if residue.get_id()[1] == res_num:
85 return residue
87 return None
90def get_pdb_sequence(structure):
91 """
92 Retrieves the AA sequence from a PDB structure.
93 """
95 # aa = lambda r: (r.id[1], prot_one_letter.get(r.resname, 'X'))
96 def aa(r):
97 return (r.id[1], prot_one_letter.get(r.resname, "X"))
99 seq = []
100 for r in structure.get_residues():
101 if Bio.PDB.Polypeptide.is_aa(r):
102 seq.append(aa(r))
103 return seq
106def get_sequence_nucs(structure):
107 seq = []
108 for nuc in structure:
109 seq.append(nuc)
110 return seq
113def align_sequences(
114 seqA, seqB, matrix_name="BLOSUM62", gap_open=-10.0, gap_extend=-0.5
115):
116 """
117 Performs a global pairwise alignment between two sequences using the Needleman-Wunsch algorithm as implemented in Biopython.
118 Returns the alignment and the residue mapping between both original sequences.
119 """
121 # seq list to seq string
122 sequence_A = "".join([i[1] for i in seqA])
123 sequence_B = "".join([i[1] for i in seqB])
125 # get matrix from matrix_name
126 # try:
127 # matrix = getattr(Bio.SubsMat.MatrixInfo, matrix_name)
128 # except AttributeError:
130 matrix = Bio.Align.substitution_matrices.load(matrix_name)
132 # print(Bio.SubsMat.MatrixInfo)
133 # print(type(substitution_matrices.select()))
134 # matrix = getattr(substitution_matrices.load(), matrix_name)
136 # Do pairwaise alignment
137 alns = Bio.pairwise2.align.globalds(
138 sequence_A,
139 sequence_B,
140 matrix,
141 gap_open,
142 gap_extend,
143 penalize_end_gaps=(False, False),
144 )
146 best_aln = alns[0]
147 aligned_A, aligned_B, score, begin, end = best_aln
149 # Equivalent residue numbering. Relative to reference
150 mapping = {}
151 aa_i_A, aa_i_B = 0, 0
152 for aln_i, (aa_aln_A, aa_aln_B) in enumerate(zip(aligned_A, aligned_B)):
153 if aa_aln_A == "-":
154 if aa_aln_B != "-":
155 aa_i_B += 1
156 elif aa_aln_B == "-":
157 if aa_aln_A != "-":
158 aa_i_A += 1
159 else:
160 assert seqA[aa_i_A][1] == aa_aln_A
161 assert seqB[aa_i_B][1] == aa_aln_B
162 mapping[seqA[aa_i_A][0]] = seqB[aa_i_B][0]
163 aa_i_A += 1
164 aa_i_B += 1
166 return ((aligned_A, aligned_B), mapping)
169def calculate_alignment_identity(alignedA, alignedB):
170 """
171 Returns the percentage of identical characters between two sequences
172 """
173 matches = [alignedA[i] == alignedB[i] for i in range(len(alignedA))]
174 seq_id = (100 * sum(matches)) / len(alignedA)
176 gapless_sl = sum(
177 [1 for i in range(len(alignedA)) if (alignedA[i] != "-" and alignedB[i] != "-")]
178 )
179 gap_id = (100 * sum(matches)) / gapless_sl
180 return (seq_id, gap_id)
183def get_ligand_residues(
184 PDBchain,
185 ignore_wats=True,
186 ignore_small_molec=True,
187 ignore_ions=True,
188 ignore_modres=True,
189):
190 """
191 Returns heteroatoms residues.
192 Args:
193 PDBchain (Bio.PDB.PDBParser chain object): PDB selection of the chain to be scanned
194 ignore_wats (boolean): If True, water residues will be skipped, and not returned as ligand residues
195 ignore_small_molec (boolean): If True, small ligands (< 5 atoms) will be skipped, and not returned as ligand residues
196 ignore_ions (boolean): If True, ion residues will be skipped, and not returned as ligand residues
197 ignore_modres (boolean): If True, modified aminoa acid residues will be skipped, and not returned as ligand residues
198 """
200 # small_molec_atoms_min = 5
201 ligands = []
203 for res in PDBchain.get_residues():
204 res_entity = res.get_full_id()
205 res_hetflag = res_entity[3][0]
207 # skip aminoacids
208 if res_hetflag == " ":
209 continue
211 # skip waters, if defined
212 if res_hetflag == "W":
213 if not ignore_wats:
214 ligands.append(res)
215 continue
216 # skip small_molec (< small_molec_atoms_min)
217 # if ignore_small_molec:
218 # if len(res.get_list()) < small_molec_atoms_min:
219 # continue
220 # skip ions
221 if ignore_ions:
222 if res.get_resname().strip() in __ions():
223 continue
224 # skip modres
225 if ignore_modres:
226 if res.get_resname().strip() in __modres().keys():
227 continue
229 # add as ligand
230 ligands.append(res)
232 return ligands
235def get_box_coordinates(box_center, box_size, pdb_format=True):
236 coords = [
237 [
238 box_center[0] - box_size[0],
239 box_center[1] - box_size[1],
240 box_center[2] - box_size[2],
241 ],
242 [
243 box_center[0] - box_size[0],
244 box_center[1] - box_size[1],
245 box_center[2] + box_size[2],
246 ],
247 [
248 box_center[0] - box_size[0],
249 box_center[1] + box_size[1],
250 box_center[2] - box_size[2],
251 ],
252 [
253 box_center[0] - box_size[0],
254 box_center[1] + box_size[1],
255 box_center[2] + box_size[2],
256 ],
257 [
258 box_center[0] + box_size[0],
259 box_center[1] - box_size[1],
260 box_center[2] - box_size[2],
261 ],
262 [
263 box_center[0] + box_size[0],
264 box_center[1] - box_size[1],
265 box_center[2] + box_size[2],
266 ],
267 [
268 box_center[0] + box_size[0],
269 box_center[1] + box_size[1],
270 box_center[2] - box_size[2],
271 ],
272 [
273 box_center[0] + box_size[0],
274 box_center[1] + box_size[1],
275 box_center[2] + box_size[2],
276 ],
277 ]
279 if pdb_format:
280 coords_txt = ""
281 at_num = 10000
282 at_nam = "ZN"
283 re_nam = "ZN"
284 chain = "Z"
285 res_num = 9999
286 occ = 1
287 bfact = 50
288 elem = "ZN"
289 for i, coord in enumerate(coords):
290 coords_txt += (
291 "HETATM%5d %-4s %3s %s%4d %8.3f%8.3f%8.3f%6.2f%6.2f %2s\n"
292 % (
293 at_num,
294 at_nam + str(i + 1),
295 re_nam,
296 chain,
297 res_num,
298 coord[0],
299 coord[1],
300 coord[2],
301 occ,
302 bfact,
303 elem,
304 )
305 )
306 at_num += 1
307 return coords_txt
308 else:
309 return coords
312def __ions():
313 return {
314 "UNX", # UNKNOWN ATOM OR ION
315 "LI", # LITHIUM ION
316 "OH", # HYDROXIDE ION
317 "NH4", # AMMONIUM ION
318 "F", # FLUORIDE ION
319 "ND4", # AMMONIUM CATION WITH D
320 "NA", # SODIUM ION
321 "MG", # MAGNESIUM ION
322 "CYN", # CYANIDE ION
323 "AL", # ALUMINUM ION
324 "2FK", # SUPEROXO ION
325 "PER", # PEROXIDE ION
326 "3P8", # methylammonium ion
327 "CL", # CHLORIDE ION
328 "K", # POTASSIUM ION
329 "CA", # CALCIUM ION
330 "AZI", # AZIDE ION
331 "NO2", # NITRITE ION
332 "4TI", # TITANIUM ION
333 "V", # VANADIUM ION
334 "CR", # CHROMIUM ION
335 "MN", # MANGANESE (II) ION
336 "MN3", # MANGANESE (III) ION
337 "FE", # FE (III) ION
338 "FE2", # FE (II) ION
339 "SCN", # THIOCYANATE ION
340 "3NI", # NICKEL (III) ION
341 "NI", # NICKEL (II) ION
342 "3CO", # COBALT (III) ION
343 "CO", # COBALT (II) ION
344 "ACT", # ACETATE ION
345 "CO3", # CARBONATE ION
346 "BCT", # BICARBONATE ION
347 "NO3", # NITRATE ION
348 "CU", # COPPER (II) ION
349 "CU1", # COPPER (I) ION
350 "CU3", # COPPER (III) ION
351 "ZN", # ZINC ION
352 "BEF", # BERYLLIUM TRIFLUORIDE ION
353 "GA", # GALLIUM (III) ION
354 "MH2", # MANGANESE ION
355 "TMA", # TETRAMETHYLAMMONIUM ION
356 "BO4", # BORATE ION
357 "PO3", # PHOSPHITE ION
358 "BR", # BROMIDE ION
359 "SO3", # SULFITE ION
360 "LCO", # CHLORATE ION
361 "BF4", # BERYLLIUM TETRAFLUORIDE ION
362 "RB", # RUBIDIUM ION
363 "SR", # STRONTIUM ION
364 "OXL", # OXALATE ION
365 "Y1", # YTTRIUM ION
366 "YT3", # YTTRIUM (III) ION
367 "ZR", # ZIRCONIUM ION
368 "PO4", # PHOSPHATE ION
369 "4MO", # MOLYBDENUM(IV) ION
370 "6MO", # MOLYBDENUM(VI) ION
371 "PI", # HYDROGENPHOSPHATE ION
372 "SO4", # SULFATE ION
373 "2HP", # DIHYDROGENPHOSPHATE ION
374 "DMI", # 2
375 "FPO", # FLUORO-PHOSPHITE ION
376 "VN3", # VANADATE ION
377 "LCP", # PERCHLORATE ION
378 "3MT", # 3-METHYLTHIAZOLIUM ION
379 "HAI", # CYCLOHEXYLAMMONIUM ION
380 "RU", # RUTHENIUM ION
381 "MLI", # MALONATE ION
382 "TEA", # TRIETHYLAMMONIUM ION
383 "RH3", # RHODIUM(III) ION
384 "ALF", # TETRAFLUOROALUMINATE ION
385 "CHT", # CHOLINE ION
386 "SEK", # SELENOCYANATE ION
387 "PD", # PALLADIUM ION
388 "AG", # SILVER ION
389 "CD", # CADMIUM ION
390 "DTI", # 3
391 "IN", # INDIUM (III) ION
392 "VO4", # VANADATE ION
393 "SB", # ANTIMONY (III) ION
394 "IOD", # IODIDE ION
395 "CON", # COBALT TETRAAMMINE ION
396 "CUA", # DINUCLEAR COPPER ION
397 "BSY", # BISELENITE ION
398 "NET", # TETRAETHYLAMMONIUM ION
399 "OAA", # OXALOACETATE ION
400 "CS", # CESIUM ION
401 "THE", # THREONATE ION
402 "CAC", # CACODYLATE ION
403 "BA", # BARIUM ION
404 "LA", # LANTHANUM (III) ION
405 "CE", # CERIUM (III) ION
406 "PR", # PRASEODYMIUM ION
407 "SE4", # SELENATE ION
408 "MOW", # Oxo(sulfanyl)molybdenum(IV) ION
409 "SM", # SAMARIUM (III) ION
410 "EU", # EUROPIUM ION
411 "EU3", # EUROPIUM (III) ION
412 "GD3", # GADOLINIUM ION
413 "TB", # TERBIUM(III) ION
414 "MOO", # MOLYBDATE ION
415 "SMO", # DIOXOSULFIDOMOLYBDENUM(VI) ION
416 "MOS", # DIOXOTHIOMOLYBDENUM(VI) ION
417 "DY", # DYSPROSIUM ION
418 "TCN", # TETRACYANONICKELATE ION
419 "EDR", # EDROPHONIUM ION
420 "ER3", # ERBIUM (III) ION
421 "TRA", # ACONITATE ION
422 "YB", # YTTERBIUM (III) ION
423 "YB2", # YTTERBIUM (II) ION
424 "LU", # LUTETIUM (III) ION
425 "1AL", # ALLANTOATE ION
426 "W", # TUNGSTEN ION
427 "ATH", # 4-HYDROXY-ACONITATE ION
428 "FLC", # CITRATE ANION
429 "OS", # OSMIUM ION
430 "OS4", # OSMIUM 4+ ION
431 "T1A", # TETRAETHYLARSONIUM ION
432 "IR", # IRIDIUM ION
433 "IR3", # IRIDIUM (III) ION
434 "PT", # PLATINUM (II) ION
435 "PT4", # PLATINUM (IV) ION
436 "AU", # GOLD ION
437 "AU3", # GOLD 3+ ION
438 "HG", # MERCURY (II) ION
439 "NRU", # RUTHENIUM (III) HEXAAMINE ION
440 "TL", # THALLIUM (I) ION
441 "RHD", # RHODIUM HEXAMINE ION
442 "EUD", # EUDESMANE CATION
443 "PB", # LEAD (II) ION
444 "BS3", # Bismuth(III) ION
445 "PDV", # Divanadate ion
446 "MMC", # METHYL MERCURY ION
447 "EMC", # ETHYL MERCURY ION
448 "TH", # THORIUM ION
449 "TBA", # TETRABUTYLAMMONIUM ION
450 "AM", # AMERICIUM ION
451 "4PU", # PLUTONIUM ION
452 "PTN", # PLATINUM TRIAMINE ION
453 "ZCM", # CURIUM ION
454 "AUC", # GOLD (I) CYANIDE ION
455 "DSC", # DODECANESULFONATE ION
456 "CF", # CALIFORNIUM ION
457 "PBM", # TRIMETHYL LEAD ION
458 "DME", # DECAMETHONIUM ION
459 "MAC", # MERCURY ACETATE ION
460 "WO5", # TUNGSTATE(VI) ION
461 "IUM", # URANYL (VI) ION
462 "CUZ", # (MU-4-SULFIDO)-TETRA-NUCLEAR COPPER ION
463 "I3M", # Tri-iodode Anion
464 }
467def __modres():
468 return {
469 "0CS": "ALA", # 0CS ALA 3-[(S)-HYDROPEROXYSULFINYL]-L-ALANINE
470 "1AB": "PRO", # 1AB PRO 1,4-DIDEOXY-1,4-IMINO-D-ARABINITOL
471 "1LU": "LEU", # 1LU LEU 4-METHYL-PENTANOIC ACID-2-OXYL GROUP
472 "1PA": "PHE", # 1PA PHE PHENYLMETHYLACETIC ACID ALANINE
473 "1TQ": "TRP", # 1TQ TRP 6-(FORMYLAMINO)-7-HYDROXY-L-TRYPTOPHAN
474 "1TY": "TYR", # 1TY TYR
475 "23F": "PHE", # 23F PHE (2Z)-2-AMINO-3-PHENYLACRYLIC ACID
476 "23S": "TRP", # 23S TRP MODIFIED TRYPTOPHAN
477 "2BU": "ALA", # 2BU ADE
478 "2ML": "LEU", # 2ML LEU 2-METHYLLEUCINE
479 "2MR": "ARG", # 2MR ARG N3, N4-DIMETHYLARGININE
480 "2MT": "PRO", # 2MT PRO
481 "2OP": "ALA", # 2OP (2S 2-HYDROXYPROPANAL
482 "2TY": "TYR", # 2TY TYR
483 "32S": "TRP", # 32S TRP MODIFIED TRYPTOPHAN
484 "32T": "TRP", # 32T TRP MODIFIED TRYPTOPHAN
485 "3AH": "HIS", # 3AH HIS
486 "3MD": "ASP", # 3MD ASP 2S,3S-3-METHYLASPARTIC ACID
487 "3TY": "TYR", # 3TY TYR MODIFIED TYROSINE
488 "4DP": "TRP", # 4DP TRP
489 "4F3": "ALA", # 4F3 ALA CYCLIZED
490 "4FB": "PRO", # 4FB PRO (4S)-4-FLUORO-L-PROLINE
491 "4FW": "TRP", # 4FW TRP 4-FLUOROTRYPTOPHANE
492 "4HT": "TRP", # 4HT TRP 4-HYDROXYTRYPTOPHAN
493 "4IN": "TRP", # 4IN TRP 4-AMINO-L-TRYPTOPHAN
494 "4PH": "PHE", # 4PH PHE 4-METHYL-L-PHENYLALANINE
495 "5CS": "CYS", # 5CS CYS
496 "6CL": "LYS", # 6CL LYS 6-CARBOXYLYSINE
497 "6CW": "TRP", # 6CW TRP 6-CHLORO-L-TRYPTOPHAN
498 "A0A": "ASP", # A0A ASP ASPARTYL-FORMYL MIXED ANHYDRIDE
499 "AA4": "ALA", # AA4 ALA 2-AMINO-5-HYDROXYPENTANOIC ACID
500 "AAR": "ARG", # AAR ARG ARGININEAMIDE
501 "AB7": "GLU", # AB7 GLU ALPHA-AMINOBUTYRIC ACID
502 "ABA": "ALA", # ABA ALA ALPHA-AMINOBUTYRIC ACID
503 "ACB": "ASP", # ACB ASP 3-METHYL-ASPARTIC ACID
504 "ACL": "ARG", # ACL ARG DEOXY-CHLOROMETHYL-ARGININE
505 "ACY": "GLY", # ACY GLY POST-TRANSLATIONAL MODIFICATION
506 "AEI": "THR", # AEI THR ACYLATED THR
507 "AFA": "ASN", # AFA ASN N-[7-METHYL-OCT-2,4-DIENOYL]ASPARAGINE
508 "AGM": "ARG", # AGM ARG 4-METHYL-ARGININE
509 "AGT": "CYS", # AGT CYS AGMATINE-CYSTEINE ADDUCT
510 "AHB": "ASN", # AHB ASN BETA-HYDROXYASPARAGINE
511 "AHO": "ALA", # AHO ALA N-ACETYL-N-HYDROXY-L-ORNITHINE
512 "AHP": "ALA", # AHP ALA 2-AMINO-HEPTANOIC ACID
513 "AIB": "ALA", # AIB ALA ALPHA-AMINOISOBUTYRIC ACID
514 "AKL": "ASP", # AKL ASP 3-AMINO-5-CHLORO-4-OXOPENTANOIC ACID
515 "ALA": "ALA", # ALA ALA
516 "ALC": "ALA", # ALC ALA 2-AMINO-3-CYCLOHEXYL-PROPIONIC ACID
517 "ALG": "ARG", # ALG ARG GUANIDINOBUTYRYL GROUP
518 "ALM": "ALA", # ALM ALA 1-METHYL-ALANINAL
519 "ALN": "ALA", # ALN ALA NAPHTHALEN-2-YL-3-ALANINE
520 "ALO": "THR", # ALO THR ALLO-THREONINE
521 "ALS": "ALA", # ALS ALA 2-AMINO-3-OXO-4-SULFO-BUTYRIC ACID
522 "ALT": "ALA", # ALT ALA THIOALANINE
523 "ALY": "LYS", # ALY LYS N(6)-ACETYLLYSINE
524 "AME": "MET", # AME MET ACETYLATED METHIONINE
525 "AP7": "ALA", # AP7 ADE
526 "APH": "ALA", # APH ALA P-AMIDINOPHENYL-3-ALANINE
527 "API": "LYS", # API LYS 2,6-DIAMINOPIMELIC ACID
528 "APK": "LYS", # APK LYS
529 "AR2": "ARG", # AR2 ARG ARGINYL-BENZOTHIAZOLE-6-CARBOXYLIC ACID
530 "AR4": "GLU", # AR4 GLU
531 "ARG": "ARG", # ARG ARG
532 "ARM": "ARG", # ARM ARG DEOXY-METHYL-ARGININE
533 "ARO": "ARG", # ARO ARG C-GAMMA-HYDROXY ARGININE
534 "ASA": "ASP", # ASA ASP ASPARTIC ALDEHYDE
535 "ASB": "ASP", # ASB ASP ASPARTIC ACID-4-CARBOXYETHYL ESTER
536 "ASI": "ASP", # ASI ASP L-ISO-ASPARTATE
537 "ASK": "ASP", # ASK ASP DEHYDROXYMETHYLASPARTIC ACID
538 "ASL": "ASP", # ASL ASP ASPARTIC ACID-4-CARBOXYETHYL ESTER
539 "ASN": "ASN", # ASN ASN
540 "ASP": "ASP", # ASP ASP
541 "AYA": "ALA", # AYA ALA N-ACETYLALANINE
542 "AYG": "ALA", # AYG ALA
543 "AZK": "LYS", # AZK LYS (2S)-2-AMINO-6-TRIAZANYLHEXAN-1-OL
544 "B2A": "ALA", # B2A ALA ALANINE BORONIC ACID
545 "B2F": "PHE", # B2F PHE PHENYLALANINE BORONIC ACID
546 "B2I": "ILE", # B2I ILE ISOLEUCINE BORONIC ACID
547 "B2V": "VAL", # B2V VAL VALINE BORONIC ACID
548 "B3A": "ALA", # B3A ALA (3S)-3-AMINOBUTANOIC ACID
549 "B3D": "ASP", # B3D ASP 3-AMINOPENTANEDIOIC ACID
550 "B3E": "GLU", # B3E GLU (3S)-3-AMINOHEXANEDIOIC ACID
551 "B3K": "LYS", # B3K LYS (3S)-3,7-DIAMINOHEPTANOIC ACID
552 "B3S": "SER", # B3S SER (3R)-3-AMINO-4-HYDROXYBUTANOIC ACID
553 "B3X": "ASN", # B3X ASN (3S)-3,5-DIAMINO-5-OXOPENTANOIC ACID
554 "B3Y": "TYR", # B3Y TYR
555 "BAL": "ALA", # BAL ALA BETA-ALANINE
556 "BBC": "CYS", # BBC CYS
557 "BCS": "CYS", # BCS CYS BENZYLCYSTEINE
558 "BCX": "CYS", # BCX CYS BETA-3-CYSTEINE
559 "BFD": "ASP", # BFD ASP ASPARTATE BERYLLIUM FLUORIDE
560 "BG1": "SER", # BG1 SER
561 "BHD": "ASP", # BHD ASP BETA-HYDROXYASPARTIC ACID
562 "BIF": "PHE", # BIF PHE
563 "BLE": "LEU", # BLE LEU LEUCINE BORONIC ACID
564 "BLY": "LYS", # BLY LYS LYSINE BORONIC ACID
565 "BMT": "THR", # BMT THR
566 "BNN": "ALA", # BNN ALA ACETYL-P-AMIDINOPHENYLALANINE
567 "BOR": "ARG", # BOR ARG
568 "BPE": "CYS", # BPE CYS
569 "BTR": "TRP", # BTR TRP 6-BROMO-TRYPTOPHAN
570 "BUC": "CYS", # BUC CYS S,S-BUTYLTHIOCYSTEINE
571 "BUG": "LEU", # BUG LEU TERT-LEUCYL AMINE
572 "C12": "ALA", # C12 ALA
573 "C1X": "LYS", # C1X LYS MODIFIED LYSINE
574 "C3Y": "CYS", # C3Y CYS MODIFIED CYSTEINE
575 "C5C": "CYS", # C5C CYS S-CYCLOPENTYL THIOCYSTEINE
576 "C6C": "CYS", # C6C CYS S-CYCLOHEXYL THIOCYSTEINE
577 "C99": "ALA", # C99 ALA
578 "CAB": "ALA", # CAB ALA 4-CARBOXY-4-AMINOBUTANAL
579 "CAF": "CYS", # CAF CYS S-DIMETHYLARSINOYL-CYSTEINE
580 "CAS": "CYS", # CAS CYS S-(DIMETHYLARSENIC)CYSTEINE
581 "CCS": "CYS", # CCS CYS CARBOXYMETHYLATED CYSTEINE
582 "CGU": "GLU", # CGU GLU CARBOXYLATION OF THE CG ATOM
583 "CH6": "ALA", # CH6 ALA
584 "CH7": "ALA", # CH7 ALA
585 "CHG": "GLY", # CHG GLY CYCLOHEXYL GLYCINE
586 "CHP": "GLY", # CHP GLY 3-CHLORO-4-HYDROXYPHENYLGLYCINE
587 "CHS": "PHE", # CHS PHE 4-AMINO-5-CYCLOHEXYL-3-HYDROXY-PENTANOIC AC
588 "CIR": "ARG", # CIR ARG CITRULLINE
589 "CLB": "ALA", # CLB ALA
590 "CLD": "ALA", # CLD ALA
591 "CLE": "LEU", # CLE LEU LEUCINE AMIDE
592 "CLG": "LYS", # CLG LYS
593 "CLH": "LYS", # CLH LYS
594 "CLV": "ALA", # CLV ALA
595 "CME": "CYS", # CME CYS MODIFIED CYSTEINE
596 "CML": "CYS", # CML CYS
597 "CMT": "CYS", # CMT CYS O-METHYLCYSTEINE
598 "CQR": "ALA", # CQR ALA
599 "CR2": "ALA", # CR2 ALA POST-TRANSLATIONAL MODIFICATION
600 "CR5": "ALA", # CR5 ALA
601 "CR7": "ALA", # CR7 ALA
602 "CR8": "ALA", # CR8 ALA
603 "CRK": "ALA", # CRK ALA
604 "CRO": "THR", # CRO THR CYCLIZED
605 "CRQ": "TYR", # CRQ TYR
606 "CRW": "ALA", # CRW ALA
607 "CRX": "ALA", # CRX ALA
608 "CS1": "CYS", # CS1 CYS S-(2-ANILINYL-SULFANYL)-CYSTEINE
609 "CS3": "CYS", # CS3 CYS
610 "CS4": "CYS", # CS4 CYS
611 "CSA": "CYS", # CSA CYS S-ACETONYLCYSTEIN
612 "CSB": "CYS", # CSB CYS CYS BOUND TO LEAD ION
613 "CSD": "CYS", # CSD CYS 3-SULFINOALANINE
614 "CSE": "CYS", # CSE CYS SELENOCYSTEINE
615 "CSI": "ALA", # CSI ALA
616 "CSO": "CYS", # CSO CYS INE S-HYDROXYCYSTEINE
617 "CSR": "CYS", # CSR CYS S-ARSONOCYSTEINE
618 "CSS": "CYS", # CSS CYS 1,3-THIAZOLE-4-CARBOXYLIC ACID
619 "CSU": "CYS", # CSU CYS CYSTEINE-S-SULFONIC ACID
620 "CSW": "CYS", # CSW CYS CYSTEINE-S-DIOXIDE
621 "CSX": "CYS", # CSX CYS OXOCYSTEINE
622 "CSY": "ALA", # CSY ALA MODIFIED TYROSINE COMPLEX
623 "CSZ": "CYS", # CSZ CYS S-SELANYL CYSTEINE
624 "CTH": "THR", # CTH THR 4-CHLOROTHREONINE
625 "CWR": "ALA", # CWR ALA
626 "CXM": "MET", # CXM MET N-CARBOXYMETHIONINE
627 "CY0": "CYS", # CY0 CYS MODIFIED CYSTEINE
628 "CY1": "CYS", # CY1 CYS ACETAMIDOMETHYLCYSTEINE
629 "CY3": "CYS", # CY3 CYS 2-AMINO-3-MERCAPTO-PROPIONAMIDE
630 "CY4": "CYS", # CY4 CYS S-BUTYRYL-CYSTEIN
631 "CY7": "CYS", # CY7 CYS MODIFIED CYSTEINE
632 "CYD": "CYS", # CYD CYS
633 "CYF": "CYS", # CYF CYS FLUORESCEIN LABELLED CYS380 (P14)
634 "CYG": "CYS", # CYG CYS
635 "CYJ": "LYS", # CYJ LYS MODIFIED LYSINE
636 "CYQ": "CYS", # CYQ CYS
637 "CYR": "CYS", # CYR CYS
638 "CYS": "CYS", # CYS CYS
639 "CZ2": "CYS", # CZ2 CYS S-(DIHYDROXYARSINO)CYSTEINE
640 "CZZ": "CYS", # CZZ CYS THIARSAHYDROXY-CYSTEINE
641 "DA2": "ARG", # DA2 ARG MODIFIED ARGININE
642 "DAB": "ALA", # DAB ALA 2,4-DIAMINOBUTYRIC ACID
643 "DAH": "PHE", # DAH PHE 3,4-DIHYDROXYDAHNYLALANINE
644 "DAL": "ALA", # DAL ALA D-ALANINE
645 "DAM": "ALA", # DAM ALA N-METHYL-ALPHA-BETA-DEHYDROALANINE
646 "DAR": "ARG", # DAR ARG D-ARGININE
647 "DAS": "ASP", # DAS ASP D-ASPARTIC ACID
648 "DBU": "ALA", # DBU ALA (2E)-2-AMINOBUT-2-ENOIC ACID
649 "DBY": "TYR", # DBY TYR 3,5 DIBROMOTYROSINE
650 "DBZ": "ALA", # DBZ ALA 3-(BENZOYLAMINO)-L-ALANINE
651 "DCL": "LEU", # DCL LEU 2-AMINO-4-METHYL-PENTANYL GROUP
652 "DCY": "CYS", # DCY CYS D-CYSTEINE
653 "DDE": "HIS", # DDE HIS
654 "DGL": "GLU", # DGL GLU D-GLU
655 "DGN": "GLN", # DGN GLN D-GLUTAMINE
656 "DHA": "ALA", # DHA ALA 2-AMINO-ACRYLIC ACID
657 "DHI": "HIS", # DHI HIS D-HISTIDINE
658 "DHL": "SER", # DHL SER POST-TRANSLATIONAL MODIFICATION
659 "DIL": "ILE", # DIL ILE D-ISOLEUCINE
660 "DIV": "VAL", # DIV VAL D-ISOVALINE
661 "DLE": "LEU", # DLE LEU D-LEUCINE
662 "DLS": "LYS", # DLS LYS DI-ACETYL-LYSINE
663 "DLY": "LYS", # DLY LYS D-LYSINE
664 "DMH": "ASN", # DMH ASN N4,N4-DIMETHYL-ASPARAGINE
665 "DMK": "ASP", # DMK ASP DIMETHYL ASPARTIC ACID
666 "DNE": "LEU", # DNE LEU D-NORLEUCINE
667 "DNG": "LEU", # DNG LEU N-FORMYL-D-NORLEUCINE
668 "DNL": "LYS", # DNL LYS 6-AMINO-HEXANAL
669 "DNM": "LEU", # DNM LEU D-N-METHYL NORLEUCINE
670 "DPH": "PHE", # DPH PHE DEAMINO-METHYL-PHENYLALANINE
671 "DPL": "PRO", # DPL PRO 4-OXOPROLINE
672 "DPN": "PHE", # DPN PHE D-CONFIGURATION
673 "DPP": "ALA", # DPP ALA DIAMMINOPROPANOIC ACID
674 "DPQ": "TYR", # DPQ TYR TYROSINE DERIVATIVE
675 "DPR": "PRO", # DPR PRO D-PROLINE
676 "DSE": "SER", # DSE SER D-SERINE N-METHYLATED
677 "DSG": "ASN", # DSG ASN D-ASPARAGINE
678 "DSN": "SER", # DSN SER D-SERINE
679 "DTH": "THR", # DTH THR D-THREONINE
680 "DTR": "TRP", # DTR TRP D-TRYPTOPHAN
681 "DTY": "TYR", # DTY TYR D-TYROSINE
682 "DVA": "VAL", # DVA VAL D-VALINE
683 "DYG": "ALA", # DYG ALA
684 "DYS": "CYS", # DYS CYS
685 "EFC": "CYS", # EFC CYS S,S-(2-FLUOROETHYL)THIOCYSTEINE
686 "ESB": "TYR", # ESB TYR
687 "ESC": "MET", # ESC MET 2-AMINO-4-ETHYL SULFANYL BUTYRIC ACID
688 "FCL": "PHE", # FCL PHE 3-CHLORO-L-PHENYLALANINE
689 "FGL": "ALA", # FGL ALA 2-AMINOPROPANEDIOIC ACID
690 "FGP": "SER", # FGP SER
691 "FHL": "LYS", # FHL LYS MODIFIED LYSINE
692 "FLE": "LEU", # FLE LEU FUROYL-LEUCINE
693 "FLT": "TYR", # FLT TYR FLUOROMALONYL TYROSINE
694 "FME": "MET", # FME MET FORMYL-METHIONINE
695 "FOE": "CYS", # FOE CYS
696 "FOG": "PHE", # FOG PHE PHENYLALANINOYL-[1-HYDROXY]-2-PROPYLENE
697 "FOR": "MET", # FOR MET
698 "FRF": "PHE", # FRF PHE PHE FOLLOWED BY REDUCED PHE
699 "FTR": "TRP", # FTR TRP FLUOROTRYPTOPHANE
700 "FTY": "TYR", # FTY TYR DEOXY-DIFLUOROMETHELENE-PHOSPHOTYROSINE
701 "GHG": "GLN", # GHG GLN GAMMA-HYDROXY-GLUTAMINE
702 "GHP": "GLY", # GHP GLY 4-HYDROXYPHENYLGLYCINE
703 "GL3": "GLY", # GL3 GLY POST-TRANSLATIONAL MODIFICATION
704 "GLH": "GLN", # GLH GLN
705 "GLN": "GLN", # GLN GLN
706 "GLU": "GLU", # GLU GLU
707 "GLY": "GLY", # GLY GLY
708 "GLZ": "GLY", # GLZ GLY AMINO-ACETALDEHYDE
709 "GMA": "GLU", # GMA GLU 1-AMIDO-GLUTAMIC ACID
710 "GMU": "ALA", # GMU 5MU
711 "GPL": "LYS", # GPL LYS LYSINE GUANOSINE-5'-MONOPHOSPHATE
712 "GT9": "CYS", # GT9 CYS SG ALKYLATED
713 "GVL": "SER", # GVL SER SERINE MODIFED WITH PHOSPHOPANTETHEINE
714 "GYC": "CYS", # GYC CYS
715 "GYS": "GLY", # GYS GLY
716 "H5M": "PRO", # H5M PRO TRANS-3-HYDROXY-5-METHYLPROLINE
717 "HHK": "ALA", # HHK ALA (2S)-2,8-DIAMINOOCTANOIC ACID
718 "HIA": "HIS", # HIA HIS L-HISTIDINE AMIDE
719 "HIC": "HIS", # HIC HIS 4-METHYL-HISTIDINE
720 "HIP": "HIS", # HIP HIS ND1-PHOSPHONOHISTIDINE
721 "HIQ": "HIS", # HIQ HIS MODIFIED HISTIDINE
722 "HIS": "HIS", # HIS HIS
723 "HLU": "LEU", # HLU LEU BETA-HYDROXYLEUCINE
724 "HMF": "ALA", # HMF ALA 2-AMINO-4-PHENYL-BUTYRIC ACID
725 "HMR": "ARG", # HMR ARG BETA-HOMOARGININE
726 "HPE": "PHE", # HPE PHE HOMOPHENYLALANINE
727 "HPH": "PHE", # HPH PHE PHENYLALANINOL GROUP
728 "HPQ": "PHE", # HPQ PHE HOMOPHENYLALANINYLMETHANE
729 "HRG": "ARG", # HRG ARG L-HOMOARGININE
730 "HSE": "SER", # HSE SER L-HOMOSERINE
731 "HSL": "SER", # HSL SER HOMOSERINE LACTONE
732 "HSO": "HIS", # HSO HIS HISTIDINOL
733 "HTI": "CYS", # HTI CYS
734 "HTR": "TRP", # HTR TRP BETA-HYDROXYTRYPTOPHANE
735 "HY3": "PRO", # HY3 PRO 3-HYDROXYPROLINE
736 "HYP": "PRO", # HYP PRO 4-HYDROXYPROLINE
737 "IAM": "ALA", # IAM ALA 4-[(ISOPROPYLAMINO)METHYL]PHENYLALANINE
738 "IAS": "ASP", # IAS ASP ASPARTYL GROUP
739 "IGL": "ALA", # IGL ALA ALPHA-AMINO-2-INDANACETIC ACID
740 "IIL": "ILE", # IIL ILE ISO-ISOLEUCINE
741 "ILE": "ILE", # ILE ILE
742 "ILG": "GLU", # ILG GLU GLU LINKED TO NEXT RESIDUE VIA CG
743 "ILX": "ILE", # ILX ILE 4,5-DIHYDROXYISOLEUCINE
744 "IML": "ILE", # IML ILE N-METHYLATED
745 "IPG": "GLY", # IPG GLY N-ISOPROPYL GLYCINE
746 "IT1": "LYS", # IT1 LYS
747 "IYR": "TYR", # IYR TYR 3-IODO-TYROSINE
748 "KCX": "LYS", # KCX LYS CARBAMOYLATED LYSINE
749 "KGC": "LYS", # KGC LYS
750 "KOR": "CYS", # KOR CYS MODIFIED CYSTEINE
751 "KST": "LYS", # KST LYS N~6~-(5-CARBOXY-3-THIENYL)-L-LYSINE
752 "KYN": "ALA", # KYN ALA KYNURENINE
753 "LA2": "LYS", # LA2 LYS
754 "LAL": "ALA", # LAL ALA N,N-DIMETHYL-L-ALANINE
755 "LCK": "LYS", # LCK LYS
756 "LCX": "LYS", # LCX LYS CARBAMYLATED LYSINE
757 "LDH": "LYS", # LDH LYS N~6~-ETHYL-L-LYSINE
758 "LED": "LEU", # LED LEU POST-TRANSLATIONAL MODIFICATION
759 "LEF": "LEU", # LEF LEU 2-5-FLUOROLEUCINE
760 "LET": "LYS", # LET LYS ODIFIED LYSINE
761 "LEU": "LEU", # LEU LEU
762 "LLP": "LYS", # LLP LYS
763 "LLY": "LYS", # LLY LYS NZ-(DICARBOXYMETHYL)LYSINE
764 "LME": "GLU", # LME GLU (3R)-3-METHYL-L-GLUTAMIC ACID
765 "LNT": "LEU", # LNT LEU
766 "LPD": "PRO", # LPD PRO L-PROLINAMIDE
767 "LSO": "LYS", # LSO LYS MODIFIED LYSINE
768 "LYM": "LYS", # LYM LYS DEOXY-METHYL-LYSINE
769 "LYN": "LYS", # LYN LYS 2,6-DIAMINO-HEXANOIC ACID AMIDE
770 "LYP": "LYS", # LYP LYS N~6~-METHYL-N~6~-PROPYL-L-LYSINE
771 "LYR": "LYS", # LYR LYS MODIFIED LYSINE
772 "LYS": "LYS", # LYS LYS
773 "LYX": "LYS", # LYX LYS N''-(2-COENZYME A)-PROPANOYL-LYSINE
774 "LYZ": "LYS", # LYZ LYS 5-HYDROXYLYSINE
775 "M0H": "CYS", # M0H CYS S-(HYDROXYMETHYL)-L-CYSTEINE
776 "M2L": "LYS", # M2L LYS
777 "M3L": "LYS", # M3L LYS N-TRIMETHYLLYSINE
778 "MAA": "ALA", # MAA ALA N-METHYLALANINE
779 "MAI": "ARG", # MAI ARG DEOXO-METHYLARGININE
780 "MBQ": "TYR", # MBQ TYR
781 "MC1": "SER", # MC1 SER METHICILLIN ACYL-SERINE
782 "MCL": "LYS", # MCL LYS NZ-(1-CARBOXYETHYL)-LYSINE
783 "MCS": "CYS", # MCS CYS MALONYLCYSTEINE
784 "MDO": "ALA", # MDO ALA
785 "MEA": "PHE", # MEA PHE N-METHYLPHENYLALANINE
786 "MEG": "GLU", # MEG GLU (2S,3R)-3-METHYL-GLUTAMIC ACID
787 "MEN": "ASN", # MEN ASN GAMMA METHYL ASPARAGINE
788 "MET": "MET", # MET MET
789 "MEU": "GLY", # MEU GLY O-METHYL-GLYCINE
790 "MFC": "ALA", # MFC ALA CYCLIZED
791 "MGG": "ARG", # MGG ARG MODIFIED D-ARGININE
792 "MGN": "GLN", # MGN GLN 2-METHYL-GLUTAMINE
793 "MHL": "LEU", # MHL LEU N-METHYLATED, HYDROXY
794 "MHO": "MET", # MHO MET POST-TRANSLATIONAL MODIFICATION
795 "MHS": "HIS", # MHS HIS 1-N-METHYLHISTIDINE
796 "MIS": "SER", # MIS SER MODIFIED SERINE
797 "MLE": "LEU", # MLE LEU N-METHYLATED
798 "MLL": "LEU", # MLL LEU METHYL L-LEUCINATE
799 "MLY": "LYS", # MLY LYS METHYLATED LYSINE
800 "MLZ": "LYS", # MLZ LYS N-METHYL-LYSINE
801 "MME": "MET", # MME MET N-METHYL METHIONINE
802 "MNL": "LEU", # MNL LEU 4,N-DIMETHYLNORLEUCINE
803 "MNV": "VAL", # MNV VAL N-METHYL-C-AMINO VALINE
804 "MPQ": "GLY", # MPQ GLY N-METHYL-ALPHA-PHENYL-GLYCINE
805 "MSA": "GLY", # MSA GLY (2-S-METHYL) SARCOSINE
806 "MSE": "MET", # MSE MET ELENOMETHIONINE
807 "MSO": "MET", # MSO MET METHIONINE SULFOXIDE
808 "MTY": "PHE", # MTY PHE 3-HYDROXYPHENYLALANINE
809 "MVA": "VAL", # MVA VAL N-METHYLATED
810 "N10": "SER", # N10 SER O-[(HEXYLAMINO)CARBONYL]-L-SERINE
811 "NAL": "ALA", # NAL ALA BETA-(2-NAPHTHYL)-ALANINE
812 "NAM": "ALA", # NAM ALA NAM NAPTHYLAMINOALANINE
813 "NBQ": "TYR", # NBQ TYR
814 "NC1": "SER", # NC1 SER NITROCEFIN ACYL-SERINE
815 "NCB": "ALA", # NCB ALA CHEMICAL MODIFICATION
816 "NEP": "HIS", # NEP HIS N1-PHOSPHONOHISTIDINE
817 "NFA": "PHE", # NFA PHE MODIFIED PHENYLALANINE
818 "NIY": "TYR", # NIY TYR META-NITRO-TYROSINE
819 "NLE": "LEU", # NLE LEU NORLEUCINE
820 "NLN": "LEU", # NLN LEU NORLEUCINE AMIDE
821 "NLO": "LEU", # NLO LEU O-METHYL-L-NORLEUCINE
822 "NMC": "GLY", # NMC GLY N-CYCLOPROPYLMETHYL GLYCINE
823 "NMM": "ARG", # NMM ARG MODIFIED ARGININE
824 "NPH": "CYS", # NPH CYS
825 "NRQ": "ALA", # NRQ ALA
826 "NVA": "VAL", # NVA VAL NORVALINE
827 "NYC": "ALA", # NYC ALA
828 "NYS": "CYS", # NYS CYS
829 "NZH": "HIS", # NZH HIS
830 "OAS": "SER", # OAS SER O-ACETYLSERINE
831 "OBS": "LYS", # OBS LYS MODIFIED LYSINE
832 "OCS": "CYS", # OCS CYS CYSTEINE SULFONIC ACID
833 "OCY": "CYS", # OCY CYS HYDROXYETHYLCYSTEINE
834 "OHI": "HIS", # OHI HIS 3-(2-OXO-2H-IMIDAZOL-4-YL)-L-ALANINE
835 "OHS": "ASP", # OHS ASP O-(CARBOXYSULFANYL)-4-OXO-L-HOMOSERINE
836 "OLT": "THR", # OLT THR O-METHYL-L-THREONINE
837 "OMT": "MET", # OMT MET METHIONINE SULFONE
838 "OPR": "ARG", # OPR ARG C-(3-OXOPROPYL)ARGININE
839 "ORN": "ALA", # ORN ALA ORNITHINE
840 "ORQ": "ARG", # ORQ ARG N~5~-ACETYL-L-ORNITHINE
841 "OSE": "SER", # OSE SER O-SULFO-L-SERINE
842 "OTY": "TYR", # OTY TYR
843 "OXX": "ASP", # OXX ASP OXALYL-ASPARTYL ANHYDRIDE
844 "P1L": "CYS", # P1L CYS S-PALMITOYL CYSTEINE
845 "P2Y": "PRO", # P2Y PRO (2S)-PYRROLIDIN-2-YLMETHYLAMINE
846 "PAQ": "TYR", # PAQ TYR SEE REMARK 999
847 "PAT": "TRP", # PAT TRP ALPHA-PHOSPHONO-TRYPTOPHAN
848 "PBB": "CYS", # PBB CYS S-(4-BROMOBENZYL)CYSTEINE
849 "PBF": "PHE", # PBF PHE PARA-(BENZOYL)-PHENYLALANINE
850 "PCA": "PRO", # PCA PRO 5-OXOPROLINE
851 "PCS": "PHE", # PCS PHE PHENYLALANYLMETHYLCHLORIDE
852 "PEC": "CYS", # PEC CYS S,S-PENTYLTHIOCYSTEINE
853 "PF5": "PHE", # PF5 PHE 2,3,4,5,6-PENTAFLUORO-L-PHENYLALANINE
854 "PFF": "PHE", # PFF PHE 4-FLUORO-L-PHENYLALANINE
855 "PG1": "SER", # PG1 SER BENZYLPENICILLOYL-ACYLATED SERINE
856 "PG9": "GLY", # PG9 GLY D-PHENYLGLYCINE
857 "PHA": "PHE", # PHA PHE PHENYLALANINAL
858 "PHD": "ASP", # PHD ASP 2-AMINO-4-OXO-4-PHOSPHONOOXY-BUTYRIC ACID
859 "PHE": "PHE", # PHE PHE
860 "PHI": "PHE", # PHI PHE IODO-PHENYLALANINE
861 "PHL": "PHE", # PHL PHE L-PHENYLALANINOL
862 "PHM": "PHE", # PHM PHE PHENYLALANYLMETHANE
863 "PIA": "ALA", # PIA ALA FUSION OF ALA 65, TYR 66, GLY 67
864 "PLE": "LEU", # PLE LEU LEUCINE PHOSPHINIC ACID
865 "PM3": "PHE", # PM3 PHE
866 "POM": "PRO", # POM PRO CIS-5-METHYL-4-OXOPROLINE
867 "PPH": "LEU", # PPH LEU PHENYLALANINE PHOSPHINIC ACID
868 "PPN": "PHE", # PPN PHE THE LIGAND IS A PARA-NITRO-PHENYLALANINE
869 "PR3": "CYS", # PR3 CYS INE DTT-CYSTEINE
870 "PRO": "PRO", # PRO PRO
871 "PRQ": "PHE", # PRQ PHE PHENYLALANINE
872 "PRR": "ALA", # PRR ALA 3-(METHYL-PYRIDINIUM)ALANINE
873 "PRS": "PRO", # PRS PRO THIOPROLINE
874 "PSA": "PHE", # PSA PHE
875 "PSH": "HIS", # PSH HIS 1-THIOPHOSPHONO-L-HISTIDINE
876 "PTH": "TYR", # PTH TYR METHYLENE-HYDROXY-PHOSPHOTYROSINE
877 "PTM": "TYR", # PTM TYR ALPHA-METHYL-O-PHOSPHOTYROSINE
878 "PTR": "TYR", # PTR TYR O-PHOSPHOTYROSINE
879 "PYA": "ALA", # PYA ALA 3-(1,10-PHENANTHROL-2-YL)-L-ALANINE
880 "PYC": "ALA", # PYC ALA PYRROLE-2-CARBOXYLATE
881 "PYR": "SER", # PYR SER CHEMICALLY MODIFIED
882 "PYT": "ALA", # PYT ALA MODIFIED ALANINE
883 "PYX": "CYS", # PYX CYS S-[S-THIOPYRIDOXAMINYL]CYSTEINE
884 "R1A": "CYS", # R1A CYS
885 "R1B": "CYS", # R1B CYS
886 "R1F": "CYS", # R1F CYS
887 "R7A": "CYS", # R7A CYS
888 "RC7": "ALA", # RC7 ALA
889 "RCY": "CYS", # RCY CYS
890 "S1H": "SER", # S1H SER 1-HEXADECANOSULFONYL-O-L-SERINE
891 "SAC": "SER", # SAC SER N-ACETYL-SERINE
892 "SAH": "CYS", # SAH CYS S-ADENOSYL-L-HOMOCYSTEINE
893 "SAR": "GLY", # SAR GLY SARCOSINE
894 "SBD": "SER", # SBD SER
895 "SBG": "SER", # SBG SER MODIFIED SERINE
896 "SBL": "SER", # SBL SER
897 "SC2": "CYS", # SC2 CYS N-ACETYL-L-CYSTEINE
898 "SCH": "CYS", # SCH CYS S-METHYL THIOCYSTEINE GROUP
899 "SCS": "CYS", # SCS CYS MODIFIED CYSTEINE
900 "SCY": "CYS", # SCY CYS CETYLATED CYSTEINE
901 "SDP": "SER", # SDP SER
902 "SEB": "SER", # SEB SER O-BENZYLSULFONYL-SERINE
903 "SEC": "ALA", # SEC ALA 2-AMINO-3-SELENINO-PROPIONIC ACID
904 "SEL": "SER", # SEL SER 2-AMINO-1,3-PROPANEDIOL
905 "SEP": "SER", # SEP SER E PHOSPHOSERINE
906 "SER": "SER", # SER SER
907 "SET": "SER", # SET SER AMINOSERINE
908 "SGB": "SER", # SGB SER MODIFIED SERINE
909 "SGR": "SER", # SGR SER MODIFIED SERINE
910 "SHC": "CYS", # SHC CYS S-HEXYLCYSTEINE
911 "SHP": "GLY", # SHP GLY (4-HYDROXYMALTOSEPHENYL)GLYCINE
912 "SIC": "ALA", # SIC ALA
913 "SLZ": "LYS", # SLZ LYS L-THIALYSINE
914 "SMC": "CYS", # SMC CYS POST-TRANSLATIONAL MODIFICATION
915 "SME": "MET", # SME MET METHIONINE SULFOXIDE
916 "SMF": "PHE", # SMF PHE 4-SULFOMETHYL-L-PHENYLALANINE
917 "SNC": "CYS", # SNC CYS S-NITROSO CYSTEINE
918 "SNN": "ASP", # SNN ASP POST-TRANSLATIONAL MODIFICATION
919 "SOC": "CYS", # SOC CYS DIOXYSELENOCYSTEINE
920 "SOY": "SER", # SOY SER OXACILLOYL-ACYLATED SERINE
921 "SUI": "ALA", # SUI ALA
922 "SUN": "SER", # SUN SER TABUN CONJUGATED SERINE
923 "SVA": "SER", # SVA SER SERINE VANADATE
924 "SVV": "SER", # SVV SER MODIFIED SERINE
925 "SVX": "SER", # SVX SER MODIFIED SERINE
926 "SVY": "SER", # SVY SER MODIFIED SERINE
927 "SVZ": "SER", # SVZ SER MODIFIED SERINE
928 "SXE": "SER", # SXE SER MODIFIED SERINE
929 "TBG": "GLY", # TBG GLY T-BUTYL GLYCINE
930 "TBM": "THR", # TBM THR
931 "TCQ": "TYR", # TCQ TYR MODIFIED TYROSINE
932 "TEE": "CYS", # TEE CYS POST-TRANSLATIONAL MODIFICATION
933 "TH5": "THR", # TH5 THR O-ACETYL-L-THREONINE
934 "THC": "THR", # THC THR N-METHYLCARBONYLTHREONINE
935 "THR": "THR", # THR THR
936 "TIH": "ALA", # TIH ALA BETA(2-THIENYL)ALANINE
937 "TMD": "THR", # TMD THR N-METHYLATED, EPSILON C ALKYLATED
938 "TNB": "CYS", # TNB CYS S-(2,3,6-TRINITROPHENYL)CYSTEINE
939 "TOX": "TRP", # TOX TRP
940 "TPL": "TRP", # TPL TRP TRYTOPHANOL
941 "TPO": "THR", # TPO THR HOSPHOTHREONINE
942 "TPQ": "ALA", # TPQ ALA 2,4,5-TRIHYDROXYPHENYLALANINE
943 "TQQ": "TRP", # TQQ TRP
944 "TRF": "TRP", # TRF TRP N1-FORMYL-TRYPTOPHAN
945 "TRN": "TRP", # TRN TRP AZA-TRYPTOPHAN
946 "TRO": "TRP", # TRO TRP 2-HYDROXY-TRYPTOPHAN
947 "TRP": "TRP", # TRP TRP
948 "TRQ": "TRP", # TRQ TRP
949 "TRW": "TRP", # TRW TRP
950 "TRX": "TRP", # TRX TRP 6-HYDROXYTRYPTOPHAN
951 "TTQ": "TRP", # TTQ TRP 6-AMINO-7-HYDROXY-L-TRYPTOPHAN
952 "TTS": "TYR", # TTS TYR
953 "TY2": "TYR", # TY2 TYR 3-AMINO-L-TYROSINE
954 "TY3": "TYR", # TY3 TYR 3-HYDROXY-L-TYROSINE
955 "TYB": "TYR", # TYB TYR TYROSINAL
956 "TYC": "TYR", # TYC TYR L-TYROSINAMIDE
957 "TYI": "TYR", # TYI TYR 3,5-DIIODOTYROSINE
958 "TYN": "TYR", # TYN TYR ADDUCT AT HYDROXY GROUP
959 "TYO": "TYR", # TYO TYR
960 "TYQ": "TYR", # TYQ TYR AMINOQUINOL FORM OF TOPA QUINONONE
961 "TYR": "TYR", # TYR TYR
962 "TYS": "TYR", # TYS TYR INE SULPHONATED TYROSINE
963 "TYT": "TYR", # TYT TYR
964 "TYX": "CYS", # TYX CYS S-(2-ANILINO-2-OXOETHYL)-L-CYSTEINE
965 "TYY": "TYR", # TYY TYR IMINOQUINONE FORM OF TOPA QUINONONE
966 "TYZ": "ARG", # TYZ ARG PARA ACETAMIDO BENZOIC ACID
967 "UMA": "ALA", # UMA ALA
968 "VAD": "VAL", # VAD VAL DEAMINOHYDROXYVALINE
969 "VAF": "VAL", # VAF VAL METHYLVALINE
970 "VAL": "VAL", # VAL VAL
971 "VDL": "VAL", # VDL VAL (2R,3R)-2,3-DIAMINOBUTANOIC ACID
972 "VLL": "VAL", # VLL VAL (2S)-2,3-DIAMINOBUTANOIC ACID
973 "VME": "VAL", # VME VAL O- METHYLVALINE
974 "X9Q": "ALA", # X9Q ALA
975 "XX1": "LYS", # XX1 LYS N~6~-7H-PURIN-6-YL-L-LYSINE
976 "XXY": "ALA", # XXY ALA
977 "XYG": "ALA", # XYG ALA
978 "YCM": "CYS", # YCM CYS S-(2-AMINO-2-OXOETHYL)-L-CYSTEINE
979 "YOF": "TYR",
980 }
983# TODO: Move this function to biobb_common.tools.file_utils
984def _from_string_to_list(input_data: Optional[Union[str, list[str]]]) -> list[str]:
985 """
986 Converts a string to a list, splitting by commas or spaces. If the input is already a list, returns it as is.
987 Returns an empty list if input_data is None.
989 Parameters:
990 input_data (str, list, or None): The string, list, or None value to convert.
992 Returns:
993 list: A list of string elements or an empty list if input_data is None.
994 """
995 if input_data is None:
996 return []
998 if isinstance(input_data, list):
999 # If input is already a list, return it
1000 return input_data
1002 # If input is a string, determine the delimiter based on presence of commas
1003 delimiter = "," if "," in input_data else " "
1004 items = input_data.split(delimiter)
1006 # Remove whitespace from each item and ignore empty strings
1007 processed_items = [item.strip() for item in items if item.strip()]
1009 return processed_items