Coverage for biobb_vs/utils/common.py: 81%

132 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-20 11:08 +0000

1"""Common functions for package biobb_vs.utils""" 

2 

3import warnings 

4from pathlib import Path, PurePath 

5from typing import Optional, Union 

6 

7from Bio import BiopythonDeprecationWarning 

8from biobb_common.tools import file_utils as fu 

9 

10with warnings.catch_warnings(): 

11 warnings.simplefilter("ignore", BiopythonDeprecationWarning) 

12 import Bio.pairwise2 

13 import Bio.PDB 

14 

15 try: 

16 import Bio.SubsMat.MatrixInfo # type: ignore 

17 except ImportError: 

18 import Bio.Align.substitution_matrices 

19 # from Bio.Data.SCOPData import protein_letters_3to1 as prot_one_letter 

20 from Bio.Data.PDBData import protein_letters_3to1 as prot_one_letter 

21 

22 

23# CHECK PARAMETERS 

24 

25 

26def check_input_path(path, argument, out_log, classname): 

27 """Checks input file""" 

28 if not Path(path).exists(): 

29 fu.log(classname + ": Unexisting %s file, exiting" % argument, out_log) 

30 raise SystemExit(classname + ": Unexisting %s file" % argument) 

31 file_extension = PurePath(path).suffix 

32 if not is_valid_file(file_extension[1:], argument): 

33 fu.log( 

34 classname + ": Format %s in %s file is not compatible" 

35 % (file_extension[1:], argument), 

36 out_log, 

37 ) 

38 raise SystemExit( 

39 classname + ": Format %s in %s file is not compatible" 

40 % (file_extension[1:], argument) 

41 ) 

42 return path 

43 

44 

45def check_output_path(path, argument, optional, out_log, classname): 

46 """Checks output file""" 

47 if optional and not path: 

48 return None 

49 if PurePath(path).parent and not Path(PurePath(path).parent).exists(): 

50 fu.log(classname + ": Unexisting %s folder, exiting" % argument, out_log) 

51 raise SystemExit(classname + ": Unexisting %s folder" % argument) 

52 file_extension = PurePath(path).suffix 

53 if not is_valid_file(file_extension[1:], argument): 

54 fu.log( 

55 classname + ": Format %s in %s file is not compatible" 

56 % (file_extension[1:], argument), 

57 out_log, 

58 ) 

59 raise SystemExit( 

60 classname + ": Format %s in %s file is not compatible" 

61 % (file_extension[1:], argument) 

62 ) 

63 return path 

64 

65 

66def is_valid_file(ext, argument): 

67 """Checks if file format is compatible""" 

68 formats = { 

69 "input_pdb_path": ["pdb", "pqr"], 

70 "input_clusters_zip": ["zip"], 

71 "resid_pdb_path": ["pdb"], 

72 "input_pdbqt_path": ["pdbqt"], 

73 "output_pdb_path": ["pdb"], 

74 "output_pdbqt_path": ["pdbqt"], 

75 } 

76 return ext in formats[argument] 

77 

78 

79# UTILS FUNCTIONS 

80 

81 

82def get_residue_by_id(structure, res_num): 

83 for residue in structure.get_residues(): 

84 if residue.get_id()[1] == res_num: 

85 return residue 

86 

87 return None 

88 

89 

90def get_pdb_sequence(structure): 

91 """ 

92 Retrieves the AA sequence from a PDB structure. 

93 """ 

94 

95 # aa = lambda r: (r.id[1], prot_one_letter.get(r.resname, 'X')) 

96 def aa(r): 

97 return (r.id[1], prot_one_letter.get(r.resname, "X")) 

98 

99 seq = [] 

100 for r in structure.get_residues(): 

101 if Bio.PDB.Polypeptide.is_aa(r): 

102 seq.append(aa(r)) 

103 return seq 

104 

105 

106def get_sequence_nucs(structure): 

107 seq = [] 

108 for nuc in structure: 

109 seq.append(nuc) 

110 return seq 

111 

112 

113def align_sequences( 

114 seqA, seqB, matrix_name="BLOSUM62", gap_open=-10.0, gap_extend=-0.5 

115): 

116 """ 

117 Performs a global pairwise alignment between two sequences using the Needleman-Wunsch algorithm as implemented in Biopython. 

118 Returns the alignment and the residue mapping between both original sequences. 

119 """ 

120 

121 # seq list to seq string 

122 sequence_A = "".join([i[1] for i in seqA]) 

123 sequence_B = "".join([i[1] for i in seqB]) 

124 

125 # get matrix from matrix_name 

126 # try: 

127 # matrix = getattr(Bio.SubsMat.MatrixInfo, matrix_name) 

128 # except AttributeError: 

129 

130 matrix = Bio.Align.substitution_matrices.load(matrix_name) 

131 

132 # print(Bio.SubsMat.MatrixInfo) 

133 # print(type(substitution_matrices.select())) 

134 # matrix = getattr(substitution_matrices.load(), matrix_name) 

135 

136 # Do pairwaise alignment 

137 alns = Bio.pairwise2.align.globalds( 

138 sequence_A, 

139 sequence_B, 

140 matrix, 

141 gap_open, 

142 gap_extend, 

143 penalize_end_gaps=(False, False), 

144 ) 

145 

146 best_aln = alns[0] 

147 aligned_A, aligned_B, score, begin, end = best_aln 

148 

149 # Equivalent residue numbering. Relative to reference 

150 mapping = {} 

151 aa_i_A, aa_i_B = 0, 0 

152 for aln_i, (aa_aln_A, aa_aln_B) in enumerate(zip(aligned_A, aligned_B)): 

153 if aa_aln_A == "-": 

154 if aa_aln_B != "-": 

155 aa_i_B += 1 

156 elif aa_aln_B == "-": 

157 if aa_aln_A != "-": 

158 aa_i_A += 1 

159 else: 

160 assert seqA[aa_i_A][1] == aa_aln_A 

161 assert seqB[aa_i_B][1] == aa_aln_B 

162 mapping[seqA[aa_i_A][0]] = seqB[aa_i_B][0] 

163 aa_i_A += 1 

164 aa_i_B += 1 

165 

166 return ((aligned_A, aligned_B), mapping) 

167 

168 

169def calculate_alignment_identity(alignedA, alignedB): 

170 """ 

171 Returns the percentage of identical characters between two sequences 

172 """ 

173 matches = [alignedA[i] == alignedB[i] for i in range(len(alignedA))] 

174 seq_id = (100 * sum(matches)) / len(alignedA) 

175 

176 gapless_sl = sum( 

177 [1 for i in range(len(alignedA)) if (alignedA[i] != "-" and alignedB[i] != "-")] 

178 ) 

179 gap_id = (100 * sum(matches)) / gapless_sl 

180 return (seq_id, gap_id) 

181 

182 

183def get_ligand_residues( 

184 PDBchain, 

185 ignore_wats=True, 

186 ignore_small_molec=True, 

187 ignore_ions=True, 

188 ignore_modres=True, 

189): 

190 """ 

191 Returns heteroatoms residues. 

192 Args: 

193 PDBchain (Bio.PDB.PDBParser chain object): PDB selection of the chain to be scanned 

194 ignore_wats (boolean): If True, water residues will be skipped, and not returned as ligand residues 

195 ignore_small_molec (boolean): If True, small ligands (< 5 atoms) will be skipped, and not returned as ligand residues 

196 ignore_ions (boolean): If True, ion residues will be skipped, and not returned as ligand residues 

197 ignore_modres (boolean): If True, modified aminoa acid residues will be skipped, and not returned as ligand residues 

198 """ 

199 

200 # small_molec_atoms_min = 5 

201 ligands = [] 

202 

203 for res in PDBchain.get_residues(): 

204 res_entity = res.get_full_id() 

205 res_hetflag = res_entity[3][0] 

206 

207 # skip aminoacids 

208 if res_hetflag == " ": 

209 continue 

210 

211 # skip waters, if defined 

212 if res_hetflag == "W": 

213 if not ignore_wats: 

214 ligands.append(res) 

215 continue 

216 # skip small_molec (< small_molec_atoms_min) 

217 # if ignore_small_molec: 

218 # if len(res.get_list()) < small_molec_atoms_min: 

219 # continue 

220 # skip ions 

221 if ignore_ions: 

222 if res.get_resname().strip() in __ions(): 

223 continue 

224 # skip modres 

225 if ignore_modres: 

226 if res.get_resname().strip() in __modres().keys(): 

227 continue 

228 

229 # add as ligand 

230 ligands.append(res) 

231 

232 return ligands 

233 

234 

235def get_box_coordinates(box_center, box_size, pdb_format=True): 

236 coords = [ 

237 [ 

238 box_center[0] - box_size[0], 

239 box_center[1] - box_size[1], 

240 box_center[2] - box_size[2], 

241 ], 

242 [ 

243 box_center[0] - box_size[0], 

244 box_center[1] - box_size[1], 

245 box_center[2] + box_size[2], 

246 ], 

247 [ 

248 box_center[0] - box_size[0], 

249 box_center[1] + box_size[1], 

250 box_center[2] - box_size[2], 

251 ], 

252 [ 

253 box_center[0] - box_size[0], 

254 box_center[1] + box_size[1], 

255 box_center[2] + box_size[2], 

256 ], 

257 [ 

258 box_center[0] + box_size[0], 

259 box_center[1] - box_size[1], 

260 box_center[2] - box_size[2], 

261 ], 

262 [ 

263 box_center[0] + box_size[0], 

264 box_center[1] - box_size[1], 

265 box_center[2] + box_size[2], 

266 ], 

267 [ 

268 box_center[0] + box_size[0], 

269 box_center[1] + box_size[1], 

270 box_center[2] - box_size[2], 

271 ], 

272 [ 

273 box_center[0] + box_size[0], 

274 box_center[1] + box_size[1], 

275 box_center[2] + box_size[2], 

276 ], 

277 ] 

278 

279 if pdb_format: 

280 coords_txt = "" 

281 at_num = 10000 

282 at_nam = "ZN" 

283 re_nam = "ZN" 

284 chain = "Z" 

285 res_num = 9999 

286 occ = 1 

287 bfact = 50 

288 elem = "ZN" 

289 for i, coord in enumerate(coords): 

290 coords_txt += ( 

291 "HETATM%5d %-4s %3s %s%4d %8.3f%8.3f%8.3f%6.2f%6.2f %2s\n" 

292 % ( 

293 at_num, 

294 at_nam + str(i + 1), 

295 re_nam, 

296 chain, 

297 res_num, 

298 coord[0], 

299 coord[1], 

300 coord[2], 

301 occ, 

302 bfact, 

303 elem, 

304 ) 

305 ) 

306 at_num += 1 

307 return coords_txt 

308 else: 

309 return coords 

310 

311 

312def __ions(): 

313 return { 

314 "UNX", # UNKNOWN ATOM OR ION 

315 "LI", # LITHIUM ION 

316 "OH", # HYDROXIDE ION 

317 "NH4", # AMMONIUM ION 

318 "F", # FLUORIDE ION 

319 "ND4", # AMMONIUM CATION WITH D 

320 "NA", # SODIUM ION 

321 "MG", # MAGNESIUM ION 

322 "CYN", # CYANIDE ION 

323 "AL", # ALUMINUM ION 

324 "2FK", # SUPEROXO ION 

325 "PER", # PEROXIDE ION 

326 "3P8", # methylammonium ion 

327 "CL", # CHLORIDE ION 

328 "K", # POTASSIUM ION 

329 "CA", # CALCIUM ION 

330 "AZI", # AZIDE ION 

331 "NO2", # NITRITE ION 

332 "4TI", # TITANIUM ION 

333 "V", # VANADIUM ION 

334 "CR", # CHROMIUM ION 

335 "MN", # MANGANESE (II) ION 

336 "MN3", # MANGANESE (III) ION 

337 "FE", # FE (III) ION 

338 "FE2", # FE (II) ION 

339 "SCN", # THIOCYANATE ION 

340 "3NI", # NICKEL (III) ION 

341 "NI", # NICKEL (II) ION 

342 "3CO", # COBALT (III) ION 

343 "CO", # COBALT (II) ION 

344 "ACT", # ACETATE ION 

345 "CO3", # CARBONATE ION 

346 "BCT", # BICARBONATE ION 

347 "NO3", # NITRATE ION 

348 "CU", # COPPER (II) ION 

349 "CU1", # COPPER (I) ION 

350 "CU3", # COPPER (III) ION 

351 "ZN", # ZINC ION 

352 "BEF", # BERYLLIUM TRIFLUORIDE ION 

353 "GA", # GALLIUM (III) ION 

354 "MH2", # MANGANESE ION 

355 "TMA", # TETRAMETHYLAMMONIUM ION 

356 "BO4", # BORATE ION 

357 "PO3", # PHOSPHITE ION 

358 "BR", # BROMIDE ION 

359 "SO3", # SULFITE ION 

360 "LCO", # CHLORATE ION 

361 "BF4", # BERYLLIUM TETRAFLUORIDE ION 

362 "RB", # RUBIDIUM ION 

363 "SR", # STRONTIUM ION 

364 "OXL", # OXALATE ION 

365 "Y1", # YTTRIUM ION 

366 "YT3", # YTTRIUM (III) ION 

367 "ZR", # ZIRCONIUM ION 

368 "PO4", # PHOSPHATE ION 

369 "4MO", # MOLYBDENUM(IV) ION 

370 "6MO", # MOLYBDENUM(VI) ION 

371 "PI", # HYDROGENPHOSPHATE ION 

372 "SO4", # SULFATE ION 

373 "2HP", # DIHYDROGENPHOSPHATE ION 

374 "DMI", # 2 

375 "FPO", # FLUORO-PHOSPHITE ION 

376 "VN3", # VANADATE ION 

377 "LCP", # PERCHLORATE ION 

378 "3MT", # 3-METHYLTHIAZOLIUM ION 

379 "HAI", # CYCLOHEXYLAMMONIUM ION 

380 "RU", # RUTHENIUM ION 

381 "MLI", # MALONATE ION 

382 "TEA", # TRIETHYLAMMONIUM ION 

383 "RH3", # RHODIUM(III) ION 

384 "ALF", # TETRAFLUOROALUMINATE ION 

385 "CHT", # CHOLINE ION 

386 "SEK", # SELENOCYANATE ION 

387 "PD", # PALLADIUM ION 

388 "AG", # SILVER ION 

389 "CD", # CADMIUM ION 

390 "DTI", # 3 

391 "IN", # INDIUM (III) ION 

392 "VO4", # VANADATE ION 

393 "SB", # ANTIMONY (III) ION 

394 "IOD", # IODIDE ION 

395 "CON", # COBALT TETRAAMMINE ION 

396 "CUA", # DINUCLEAR COPPER ION 

397 "BSY", # BISELENITE ION 

398 "NET", # TETRAETHYLAMMONIUM ION 

399 "OAA", # OXALOACETATE ION 

400 "CS", # CESIUM ION 

401 "THE", # THREONATE ION 

402 "CAC", # CACODYLATE ION 

403 "BA", # BARIUM ION 

404 "LA", # LANTHANUM (III) ION 

405 "CE", # CERIUM (III) ION 

406 "PR", # PRASEODYMIUM ION 

407 "SE4", # SELENATE ION 

408 "MOW", # Oxo(sulfanyl)molybdenum(IV) ION 

409 "SM", # SAMARIUM (III) ION 

410 "EU", # EUROPIUM ION 

411 "EU3", # EUROPIUM (III) ION 

412 "GD3", # GADOLINIUM ION 

413 "TB", # TERBIUM(III) ION 

414 "MOO", # MOLYBDATE ION 

415 "SMO", # DIOXOSULFIDOMOLYBDENUM(VI) ION 

416 "MOS", # DIOXOTHIOMOLYBDENUM(VI) ION 

417 "DY", # DYSPROSIUM ION 

418 "TCN", # TETRACYANONICKELATE ION 

419 "EDR", # EDROPHONIUM ION 

420 "ER3", # ERBIUM (III) ION 

421 "TRA", # ACONITATE ION 

422 "YB", # YTTERBIUM (III) ION 

423 "YB2", # YTTERBIUM (II) ION 

424 "LU", # LUTETIUM (III) ION 

425 "1AL", # ALLANTOATE ION 

426 "W", # TUNGSTEN ION 

427 "ATH", # 4-HYDROXY-ACONITATE ION 

428 "FLC", # CITRATE ANION 

429 "OS", # OSMIUM ION 

430 "OS4", # OSMIUM 4+ ION 

431 "T1A", # TETRAETHYLARSONIUM ION 

432 "IR", # IRIDIUM ION 

433 "IR3", # IRIDIUM (III) ION 

434 "PT", # PLATINUM (II) ION 

435 "PT4", # PLATINUM (IV) ION 

436 "AU", # GOLD ION 

437 "AU3", # GOLD 3+ ION 

438 "HG", # MERCURY (II) ION 

439 "NRU", # RUTHENIUM (III) HEXAAMINE ION 

440 "TL", # THALLIUM (I) ION 

441 "RHD", # RHODIUM HEXAMINE ION 

442 "EUD", # EUDESMANE CATION 

443 "PB", # LEAD (II) ION 

444 "BS3", # Bismuth(III) ION 

445 "PDV", # Divanadate ion 

446 "MMC", # METHYL MERCURY ION 

447 "EMC", # ETHYL MERCURY ION 

448 "TH", # THORIUM ION 

449 "TBA", # TETRABUTYLAMMONIUM ION 

450 "AM", # AMERICIUM ION 

451 "4PU", # PLUTONIUM ION 

452 "PTN", # PLATINUM TRIAMINE ION 

453 "ZCM", # CURIUM ION 

454 "AUC", # GOLD (I) CYANIDE ION 

455 "DSC", # DODECANESULFONATE ION 

456 "CF", # CALIFORNIUM ION 

457 "PBM", # TRIMETHYL LEAD ION 

458 "DME", # DECAMETHONIUM ION 

459 "MAC", # MERCURY ACETATE ION 

460 "WO5", # TUNGSTATE(VI) ION 

461 "IUM", # URANYL (VI) ION 

462 "CUZ", # (MU-4-SULFIDO)-TETRA-NUCLEAR COPPER ION 

463 "I3M", # Tri-iodode Anion 

464 } 

465 

466 

467def __modres(): 

468 return { 

469 "0CS": "ALA", # 0CS ALA 3-[(S)-HYDROPEROXYSULFINYL]-L-ALANINE 

470 "1AB": "PRO", # 1AB PRO 1,4-DIDEOXY-1,4-IMINO-D-ARABINITOL 

471 "1LU": "LEU", # 1LU LEU 4-METHYL-PENTANOIC ACID-2-OXYL GROUP 

472 "1PA": "PHE", # 1PA PHE PHENYLMETHYLACETIC ACID ALANINE 

473 "1TQ": "TRP", # 1TQ TRP 6-(FORMYLAMINO)-7-HYDROXY-L-TRYPTOPHAN 

474 "1TY": "TYR", # 1TY TYR 

475 "23F": "PHE", # 23F PHE (2Z)-2-AMINO-3-PHENYLACRYLIC ACID 

476 "23S": "TRP", # 23S TRP MODIFIED TRYPTOPHAN 

477 "2BU": "ALA", # 2BU ADE 

478 "2ML": "LEU", # 2ML LEU 2-METHYLLEUCINE 

479 "2MR": "ARG", # 2MR ARG N3, N4-DIMETHYLARGININE 

480 "2MT": "PRO", # 2MT PRO 

481 "2OP": "ALA", # 2OP (2S 2-HYDROXYPROPANAL 

482 "2TY": "TYR", # 2TY TYR 

483 "32S": "TRP", # 32S TRP MODIFIED TRYPTOPHAN 

484 "32T": "TRP", # 32T TRP MODIFIED TRYPTOPHAN 

485 "3AH": "HIS", # 3AH HIS 

486 "3MD": "ASP", # 3MD ASP 2S,3S-3-METHYLASPARTIC ACID 

487 "3TY": "TYR", # 3TY TYR MODIFIED TYROSINE 

488 "4DP": "TRP", # 4DP TRP 

489 "4F3": "ALA", # 4F3 ALA CYCLIZED 

490 "4FB": "PRO", # 4FB PRO (4S)-4-FLUORO-L-PROLINE 

491 "4FW": "TRP", # 4FW TRP 4-FLUOROTRYPTOPHANE 

492 "4HT": "TRP", # 4HT TRP 4-HYDROXYTRYPTOPHAN 

493 "4IN": "TRP", # 4IN TRP 4-AMINO-L-TRYPTOPHAN 

494 "4PH": "PHE", # 4PH PHE 4-METHYL-L-PHENYLALANINE 

495 "5CS": "CYS", # 5CS CYS 

496 "6CL": "LYS", # 6CL LYS 6-CARBOXYLYSINE 

497 "6CW": "TRP", # 6CW TRP 6-CHLORO-L-TRYPTOPHAN 

498 "A0A": "ASP", # A0A ASP ASPARTYL-FORMYL MIXED ANHYDRIDE 

499 "AA4": "ALA", # AA4 ALA 2-AMINO-5-HYDROXYPENTANOIC ACID 

500 "AAR": "ARG", # AAR ARG ARGININEAMIDE 

501 "AB7": "GLU", # AB7 GLU ALPHA-AMINOBUTYRIC ACID 

502 "ABA": "ALA", # ABA ALA ALPHA-AMINOBUTYRIC ACID 

503 "ACB": "ASP", # ACB ASP 3-METHYL-ASPARTIC ACID 

504 "ACL": "ARG", # ACL ARG DEOXY-CHLOROMETHYL-ARGININE 

505 "ACY": "GLY", # ACY GLY POST-TRANSLATIONAL MODIFICATION 

506 "AEI": "THR", # AEI THR ACYLATED THR 

507 "AFA": "ASN", # AFA ASN N-[7-METHYL-OCT-2,4-DIENOYL]ASPARAGINE 

508 "AGM": "ARG", # AGM ARG 4-METHYL-ARGININE 

509 "AGT": "CYS", # AGT CYS AGMATINE-CYSTEINE ADDUCT 

510 "AHB": "ASN", # AHB ASN BETA-HYDROXYASPARAGINE 

511 "AHO": "ALA", # AHO ALA N-ACETYL-N-HYDROXY-L-ORNITHINE 

512 "AHP": "ALA", # AHP ALA 2-AMINO-HEPTANOIC ACID 

513 "AIB": "ALA", # AIB ALA ALPHA-AMINOISOBUTYRIC ACID 

514 "AKL": "ASP", # AKL ASP 3-AMINO-5-CHLORO-4-OXOPENTANOIC ACID 

515 "ALA": "ALA", # ALA ALA 

516 "ALC": "ALA", # ALC ALA 2-AMINO-3-CYCLOHEXYL-PROPIONIC ACID 

517 "ALG": "ARG", # ALG ARG GUANIDINOBUTYRYL GROUP 

518 "ALM": "ALA", # ALM ALA 1-METHYL-ALANINAL 

519 "ALN": "ALA", # ALN ALA NAPHTHALEN-2-YL-3-ALANINE 

520 "ALO": "THR", # ALO THR ALLO-THREONINE 

521 "ALS": "ALA", # ALS ALA 2-AMINO-3-OXO-4-SULFO-BUTYRIC ACID 

522 "ALT": "ALA", # ALT ALA THIOALANINE 

523 "ALY": "LYS", # ALY LYS N(6)-ACETYLLYSINE 

524 "AME": "MET", # AME MET ACETYLATED METHIONINE 

525 "AP7": "ALA", # AP7 ADE 

526 "APH": "ALA", # APH ALA P-AMIDINOPHENYL-3-ALANINE 

527 "API": "LYS", # API LYS 2,6-DIAMINOPIMELIC ACID 

528 "APK": "LYS", # APK LYS 

529 "AR2": "ARG", # AR2 ARG ARGINYL-BENZOTHIAZOLE-6-CARBOXYLIC ACID 

530 "AR4": "GLU", # AR4 GLU 

531 "ARG": "ARG", # ARG ARG 

532 "ARM": "ARG", # ARM ARG DEOXY-METHYL-ARGININE 

533 "ARO": "ARG", # ARO ARG C-GAMMA-HYDROXY ARGININE 

534 "ASA": "ASP", # ASA ASP ASPARTIC ALDEHYDE 

535 "ASB": "ASP", # ASB ASP ASPARTIC ACID-4-CARBOXYETHYL ESTER 

536 "ASI": "ASP", # ASI ASP L-ISO-ASPARTATE 

537 "ASK": "ASP", # ASK ASP DEHYDROXYMETHYLASPARTIC ACID 

538 "ASL": "ASP", # ASL ASP ASPARTIC ACID-4-CARBOXYETHYL ESTER 

539 "ASN": "ASN", # ASN ASN 

540 "ASP": "ASP", # ASP ASP 

541 "AYA": "ALA", # AYA ALA N-ACETYLALANINE 

542 "AYG": "ALA", # AYG ALA 

543 "AZK": "LYS", # AZK LYS (2S)-2-AMINO-6-TRIAZANYLHEXAN-1-OL 

544 "B2A": "ALA", # B2A ALA ALANINE BORONIC ACID 

545 "B2F": "PHE", # B2F PHE PHENYLALANINE BORONIC ACID 

546 "B2I": "ILE", # B2I ILE ISOLEUCINE BORONIC ACID 

547 "B2V": "VAL", # B2V VAL VALINE BORONIC ACID 

548 "B3A": "ALA", # B3A ALA (3S)-3-AMINOBUTANOIC ACID 

549 "B3D": "ASP", # B3D ASP 3-AMINOPENTANEDIOIC ACID 

550 "B3E": "GLU", # B3E GLU (3S)-3-AMINOHEXANEDIOIC ACID 

551 "B3K": "LYS", # B3K LYS (3S)-3,7-DIAMINOHEPTANOIC ACID 

552 "B3S": "SER", # B3S SER (3R)-3-AMINO-4-HYDROXYBUTANOIC ACID 

553 "B3X": "ASN", # B3X ASN (3S)-3,5-DIAMINO-5-OXOPENTANOIC ACID 

554 "B3Y": "TYR", # B3Y TYR 

555 "BAL": "ALA", # BAL ALA BETA-ALANINE 

556 "BBC": "CYS", # BBC CYS 

557 "BCS": "CYS", # BCS CYS BENZYLCYSTEINE 

558 "BCX": "CYS", # BCX CYS BETA-3-CYSTEINE 

559 "BFD": "ASP", # BFD ASP ASPARTATE BERYLLIUM FLUORIDE 

560 "BG1": "SER", # BG1 SER 

561 "BHD": "ASP", # BHD ASP BETA-HYDROXYASPARTIC ACID 

562 "BIF": "PHE", # BIF PHE 

563 "BLE": "LEU", # BLE LEU LEUCINE BORONIC ACID 

564 "BLY": "LYS", # BLY LYS LYSINE BORONIC ACID 

565 "BMT": "THR", # BMT THR 

566 "BNN": "ALA", # BNN ALA ACETYL-P-AMIDINOPHENYLALANINE 

567 "BOR": "ARG", # BOR ARG 

568 "BPE": "CYS", # BPE CYS 

569 "BTR": "TRP", # BTR TRP 6-BROMO-TRYPTOPHAN 

570 "BUC": "CYS", # BUC CYS S,S-BUTYLTHIOCYSTEINE 

571 "BUG": "LEU", # BUG LEU TERT-LEUCYL AMINE 

572 "C12": "ALA", # C12 ALA 

573 "C1X": "LYS", # C1X LYS MODIFIED LYSINE 

574 "C3Y": "CYS", # C3Y CYS MODIFIED CYSTEINE 

575 "C5C": "CYS", # C5C CYS S-CYCLOPENTYL THIOCYSTEINE 

576 "C6C": "CYS", # C6C CYS S-CYCLOHEXYL THIOCYSTEINE 

577 "C99": "ALA", # C99 ALA 

578 "CAB": "ALA", # CAB ALA 4-CARBOXY-4-AMINOBUTANAL 

579 "CAF": "CYS", # CAF CYS S-DIMETHYLARSINOYL-CYSTEINE 

580 "CAS": "CYS", # CAS CYS S-(DIMETHYLARSENIC)CYSTEINE 

581 "CCS": "CYS", # CCS CYS CARBOXYMETHYLATED CYSTEINE 

582 "CGU": "GLU", # CGU GLU CARBOXYLATION OF THE CG ATOM 

583 "CH6": "ALA", # CH6 ALA 

584 "CH7": "ALA", # CH7 ALA 

585 "CHG": "GLY", # CHG GLY CYCLOHEXYL GLYCINE 

586 "CHP": "GLY", # CHP GLY 3-CHLORO-4-HYDROXYPHENYLGLYCINE 

587 "CHS": "PHE", # CHS PHE 4-AMINO-5-CYCLOHEXYL-3-HYDROXY-PENTANOIC AC 

588 "CIR": "ARG", # CIR ARG CITRULLINE 

589 "CLB": "ALA", # CLB ALA 

590 "CLD": "ALA", # CLD ALA 

591 "CLE": "LEU", # CLE LEU LEUCINE AMIDE 

592 "CLG": "LYS", # CLG LYS 

593 "CLH": "LYS", # CLH LYS 

594 "CLV": "ALA", # CLV ALA 

595 "CME": "CYS", # CME CYS MODIFIED CYSTEINE 

596 "CML": "CYS", # CML CYS 

597 "CMT": "CYS", # CMT CYS O-METHYLCYSTEINE 

598 "CQR": "ALA", # CQR ALA 

599 "CR2": "ALA", # CR2 ALA POST-TRANSLATIONAL MODIFICATION 

600 "CR5": "ALA", # CR5 ALA 

601 "CR7": "ALA", # CR7 ALA 

602 "CR8": "ALA", # CR8 ALA 

603 "CRK": "ALA", # CRK ALA 

604 "CRO": "THR", # CRO THR CYCLIZED 

605 "CRQ": "TYR", # CRQ TYR 

606 "CRW": "ALA", # CRW ALA 

607 "CRX": "ALA", # CRX ALA 

608 "CS1": "CYS", # CS1 CYS S-(2-ANILINYL-SULFANYL)-CYSTEINE 

609 "CS3": "CYS", # CS3 CYS 

610 "CS4": "CYS", # CS4 CYS 

611 "CSA": "CYS", # CSA CYS S-ACETONYLCYSTEIN 

612 "CSB": "CYS", # CSB CYS CYS BOUND TO LEAD ION 

613 "CSD": "CYS", # CSD CYS 3-SULFINOALANINE 

614 "CSE": "CYS", # CSE CYS SELENOCYSTEINE 

615 "CSI": "ALA", # CSI ALA 

616 "CSO": "CYS", # CSO CYS INE S-HYDROXYCYSTEINE 

617 "CSR": "CYS", # CSR CYS S-ARSONOCYSTEINE 

618 "CSS": "CYS", # CSS CYS 1,3-THIAZOLE-4-CARBOXYLIC ACID 

619 "CSU": "CYS", # CSU CYS CYSTEINE-S-SULFONIC ACID 

620 "CSW": "CYS", # CSW CYS CYSTEINE-S-DIOXIDE 

621 "CSX": "CYS", # CSX CYS OXOCYSTEINE 

622 "CSY": "ALA", # CSY ALA MODIFIED TYROSINE COMPLEX 

623 "CSZ": "CYS", # CSZ CYS S-SELANYL CYSTEINE 

624 "CTH": "THR", # CTH THR 4-CHLOROTHREONINE 

625 "CWR": "ALA", # CWR ALA 

626 "CXM": "MET", # CXM MET N-CARBOXYMETHIONINE 

627 "CY0": "CYS", # CY0 CYS MODIFIED CYSTEINE 

628 "CY1": "CYS", # CY1 CYS ACETAMIDOMETHYLCYSTEINE 

629 "CY3": "CYS", # CY3 CYS 2-AMINO-3-MERCAPTO-PROPIONAMIDE 

630 "CY4": "CYS", # CY4 CYS S-BUTYRYL-CYSTEIN 

631 "CY7": "CYS", # CY7 CYS MODIFIED CYSTEINE 

632 "CYD": "CYS", # CYD CYS 

633 "CYF": "CYS", # CYF CYS FLUORESCEIN LABELLED CYS380 (P14) 

634 "CYG": "CYS", # CYG CYS 

635 "CYJ": "LYS", # CYJ LYS MODIFIED LYSINE 

636 "CYQ": "CYS", # CYQ CYS 

637 "CYR": "CYS", # CYR CYS 

638 "CYS": "CYS", # CYS CYS 

639 "CZ2": "CYS", # CZ2 CYS S-(DIHYDROXYARSINO)CYSTEINE 

640 "CZZ": "CYS", # CZZ CYS THIARSAHYDROXY-CYSTEINE 

641 "DA2": "ARG", # DA2 ARG MODIFIED ARGININE 

642 "DAB": "ALA", # DAB ALA 2,4-DIAMINOBUTYRIC ACID 

643 "DAH": "PHE", # DAH PHE 3,4-DIHYDROXYDAHNYLALANINE 

644 "DAL": "ALA", # DAL ALA D-ALANINE 

645 "DAM": "ALA", # DAM ALA N-METHYL-ALPHA-BETA-DEHYDROALANINE 

646 "DAR": "ARG", # DAR ARG D-ARGININE 

647 "DAS": "ASP", # DAS ASP D-ASPARTIC ACID 

648 "DBU": "ALA", # DBU ALA (2E)-2-AMINOBUT-2-ENOIC ACID 

649 "DBY": "TYR", # DBY TYR 3,5 DIBROMOTYROSINE 

650 "DBZ": "ALA", # DBZ ALA 3-(BENZOYLAMINO)-L-ALANINE 

651 "DCL": "LEU", # DCL LEU 2-AMINO-4-METHYL-PENTANYL GROUP 

652 "DCY": "CYS", # DCY CYS D-CYSTEINE 

653 "DDE": "HIS", # DDE HIS 

654 "DGL": "GLU", # DGL GLU D-GLU 

655 "DGN": "GLN", # DGN GLN D-GLUTAMINE 

656 "DHA": "ALA", # DHA ALA 2-AMINO-ACRYLIC ACID 

657 "DHI": "HIS", # DHI HIS D-HISTIDINE 

658 "DHL": "SER", # DHL SER POST-TRANSLATIONAL MODIFICATION 

659 "DIL": "ILE", # DIL ILE D-ISOLEUCINE 

660 "DIV": "VAL", # DIV VAL D-ISOVALINE 

661 "DLE": "LEU", # DLE LEU D-LEUCINE 

662 "DLS": "LYS", # DLS LYS DI-ACETYL-LYSINE 

663 "DLY": "LYS", # DLY LYS D-LYSINE 

664 "DMH": "ASN", # DMH ASN N4,N4-DIMETHYL-ASPARAGINE 

665 "DMK": "ASP", # DMK ASP DIMETHYL ASPARTIC ACID 

666 "DNE": "LEU", # DNE LEU D-NORLEUCINE 

667 "DNG": "LEU", # DNG LEU N-FORMYL-D-NORLEUCINE 

668 "DNL": "LYS", # DNL LYS 6-AMINO-HEXANAL 

669 "DNM": "LEU", # DNM LEU D-N-METHYL NORLEUCINE 

670 "DPH": "PHE", # DPH PHE DEAMINO-METHYL-PHENYLALANINE 

671 "DPL": "PRO", # DPL PRO 4-OXOPROLINE 

672 "DPN": "PHE", # DPN PHE D-CONFIGURATION 

673 "DPP": "ALA", # DPP ALA DIAMMINOPROPANOIC ACID 

674 "DPQ": "TYR", # DPQ TYR TYROSINE DERIVATIVE 

675 "DPR": "PRO", # DPR PRO D-PROLINE 

676 "DSE": "SER", # DSE SER D-SERINE N-METHYLATED 

677 "DSG": "ASN", # DSG ASN D-ASPARAGINE 

678 "DSN": "SER", # DSN SER D-SERINE 

679 "DTH": "THR", # DTH THR D-THREONINE 

680 "DTR": "TRP", # DTR TRP D-TRYPTOPHAN 

681 "DTY": "TYR", # DTY TYR D-TYROSINE 

682 "DVA": "VAL", # DVA VAL D-VALINE 

683 "DYG": "ALA", # DYG ALA 

684 "DYS": "CYS", # DYS CYS 

685 "EFC": "CYS", # EFC CYS S,S-(2-FLUOROETHYL)THIOCYSTEINE 

686 "ESB": "TYR", # ESB TYR 

687 "ESC": "MET", # ESC MET 2-AMINO-4-ETHYL SULFANYL BUTYRIC ACID 

688 "FCL": "PHE", # FCL PHE 3-CHLORO-L-PHENYLALANINE 

689 "FGL": "ALA", # FGL ALA 2-AMINOPROPANEDIOIC ACID 

690 "FGP": "SER", # FGP SER 

691 "FHL": "LYS", # FHL LYS MODIFIED LYSINE 

692 "FLE": "LEU", # FLE LEU FUROYL-LEUCINE 

693 "FLT": "TYR", # FLT TYR FLUOROMALONYL TYROSINE 

694 "FME": "MET", # FME MET FORMYL-METHIONINE 

695 "FOE": "CYS", # FOE CYS 

696 "FOG": "PHE", # FOG PHE PHENYLALANINOYL-[1-HYDROXY]-2-PROPYLENE 

697 "FOR": "MET", # FOR MET 

698 "FRF": "PHE", # FRF PHE PHE FOLLOWED BY REDUCED PHE 

699 "FTR": "TRP", # FTR TRP FLUOROTRYPTOPHANE 

700 "FTY": "TYR", # FTY TYR DEOXY-DIFLUOROMETHELENE-PHOSPHOTYROSINE 

701 "GHG": "GLN", # GHG GLN GAMMA-HYDROXY-GLUTAMINE 

702 "GHP": "GLY", # GHP GLY 4-HYDROXYPHENYLGLYCINE 

703 "GL3": "GLY", # GL3 GLY POST-TRANSLATIONAL MODIFICATION 

704 "GLH": "GLN", # GLH GLN 

705 "GLN": "GLN", # GLN GLN 

706 "GLU": "GLU", # GLU GLU 

707 "GLY": "GLY", # GLY GLY 

708 "GLZ": "GLY", # GLZ GLY AMINO-ACETALDEHYDE 

709 "GMA": "GLU", # GMA GLU 1-AMIDO-GLUTAMIC ACID 

710 "GMU": "ALA", # GMU 5MU 

711 "GPL": "LYS", # GPL LYS LYSINE GUANOSINE-5'-MONOPHOSPHATE 

712 "GT9": "CYS", # GT9 CYS SG ALKYLATED 

713 "GVL": "SER", # GVL SER SERINE MODIFED WITH PHOSPHOPANTETHEINE 

714 "GYC": "CYS", # GYC CYS 

715 "GYS": "GLY", # GYS GLY 

716 "H5M": "PRO", # H5M PRO TRANS-3-HYDROXY-5-METHYLPROLINE 

717 "HHK": "ALA", # HHK ALA (2S)-2,8-DIAMINOOCTANOIC ACID 

718 "HIA": "HIS", # HIA HIS L-HISTIDINE AMIDE 

719 "HIC": "HIS", # HIC HIS 4-METHYL-HISTIDINE 

720 "HIP": "HIS", # HIP HIS ND1-PHOSPHONOHISTIDINE 

721 "HIQ": "HIS", # HIQ HIS MODIFIED HISTIDINE 

722 "HIS": "HIS", # HIS HIS 

723 "HLU": "LEU", # HLU LEU BETA-HYDROXYLEUCINE 

724 "HMF": "ALA", # HMF ALA 2-AMINO-4-PHENYL-BUTYRIC ACID 

725 "HMR": "ARG", # HMR ARG BETA-HOMOARGININE 

726 "HPE": "PHE", # HPE PHE HOMOPHENYLALANINE 

727 "HPH": "PHE", # HPH PHE PHENYLALANINOL GROUP 

728 "HPQ": "PHE", # HPQ PHE HOMOPHENYLALANINYLMETHANE 

729 "HRG": "ARG", # HRG ARG L-HOMOARGININE 

730 "HSE": "SER", # HSE SER L-HOMOSERINE 

731 "HSL": "SER", # HSL SER HOMOSERINE LACTONE 

732 "HSO": "HIS", # HSO HIS HISTIDINOL 

733 "HTI": "CYS", # HTI CYS 

734 "HTR": "TRP", # HTR TRP BETA-HYDROXYTRYPTOPHANE 

735 "HY3": "PRO", # HY3 PRO 3-HYDROXYPROLINE 

736 "HYP": "PRO", # HYP PRO 4-HYDROXYPROLINE 

737 "IAM": "ALA", # IAM ALA 4-[(ISOPROPYLAMINO)METHYL]PHENYLALANINE 

738 "IAS": "ASP", # IAS ASP ASPARTYL GROUP 

739 "IGL": "ALA", # IGL ALA ALPHA-AMINO-2-INDANACETIC ACID 

740 "IIL": "ILE", # IIL ILE ISO-ISOLEUCINE 

741 "ILE": "ILE", # ILE ILE 

742 "ILG": "GLU", # ILG GLU GLU LINKED TO NEXT RESIDUE VIA CG 

743 "ILX": "ILE", # ILX ILE 4,5-DIHYDROXYISOLEUCINE 

744 "IML": "ILE", # IML ILE N-METHYLATED 

745 "IPG": "GLY", # IPG GLY N-ISOPROPYL GLYCINE 

746 "IT1": "LYS", # IT1 LYS 

747 "IYR": "TYR", # IYR TYR 3-IODO-TYROSINE 

748 "KCX": "LYS", # KCX LYS CARBAMOYLATED LYSINE 

749 "KGC": "LYS", # KGC LYS 

750 "KOR": "CYS", # KOR CYS MODIFIED CYSTEINE 

751 "KST": "LYS", # KST LYS N~6~-(5-CARBOXY-3-THIENYL)-L-LYSINE 

752 "KYN": "ALA", # KYN ALA KYNURENINE 

753 "LA2": "LYS", # LA2 LYS 

754 "LAL": "ALA", # LAL ALA N,N-DIMETHYL-L-ALANINE 

755 "LCK": "LYS", # LCK LYS 

756 "LCX": "LYS", # LCX LYS CARBAMYLATED LYSINE 

757 "LDH": "LYS", # LDH LYS N~6~-ETHYL-L-LYSINE 

758 "LED": "LEU", # LED LEU POST-TRANSLATIONAL MODIFICATION 

759 "LEF": "LEU", # LEF LEU 2-5-FLUOROLEUCINE 

760 "LET": "LYS", # LET LYS ODIFIED LYSINE 

761 "LEU": "LEU", # LEU LEU 

762 "LLP": "LYS", # LLP LYS 

763 "LLY": "LYS", # LLY LYS NZ-(DICARBOXYMETHYL)LYSINE 

764 "LME": "GLU", # LME GLU (3R)-3-METHYL-L-GLUTAMIC ACID 

765 "LNT": "LEU", # LNT LEU 

766 "LPD": "PRO", # LPD PRO L-PROLINAMIDE 

767 "LSO": "LYS", # LSO LYS MODIFIED LYSINE 

768 "LYM": "LYS", # LYM LYS DEOXY-METHYL-LYSINE 

769 "LYN": "LYS", # LYN LYS 2,6-DIAMINO-HEXANOIC ACID AMIDE 

770 "LYP": "LYS", # LYP LYS N~6~-METHYL-N~6~-PROPYL-L-LYSINE 

771 "LYR": "LYS", # LYR LYS MODIFIED LYSINE 

772 "LYS": "LYS", # LYS LYS 

773 "LYX": "LYS", # LYX LYS N''-(2-COENZYME A)-PROPANOYL-LYSINE 

774 "LYZ": "LYS", # LYZ LYS 5-HYDROXYLYSINE 

775 "M0H": "CYS", # M0H CYS S-(HYDROXYMETHYL)-L-CYSTEINE 

776 "M2L": "LYS", # M2L LYS 

777 "M3L": "LYS", # M3L LYS N-TRIMETHYLLYSINE 

778 "MAA": "ALA", # MAA ALA N-METHYLALANINE 

779 "MAI": "ARG", # MAI ARG DEOXO-METHYLARGININE 

780 "MBQ": "TYR", # MBQ TYR 

781 "MC1": "SER", # MC1 SER METHICILLIN ACYL-SERINE 

782 "MCL": "LYS", # MCL LYS NZ-(1-CARBOXYETHYL)-LYSINE 

783 "MCS": "CYS", # MCS CYS MALONYLCYSTEINE 

784 "MDO": "ALA", # MDO ALA 

785 "MEA": "PHE", # MEA PHE N-METHYLPHENYLALANINE 

786 "MEG": "GLU", # MEG GLU (2S,3R)-3-METHYL-GLUTAMIC ACID 

787 "MEN": "ASN", # MEN ASN GAMMA METHYL ASPARAGINE 

788 "MET": "MET", # MET MET 

789 "MEU": "GLY", # MEU GLY O-METHYL-GLYCINE 

790 "MFC": "ALA", # MFC ALA CYCLIZED 

791 "MGG": "ARG", # MGG ARG MODIFIED D-ARGININE 

792 "MGN": "GLN", # MGN GLN 2-METHYL-GLUTAMINE 

793 "MHL": "LEU", # MHL LEU N-METHYLATED, HYDROXY 

794 "MHO": "MET", # MHO MET POST-TRANSLATIONAL MODIFICATION 

795 "MHS": "HIS", # MHS HIS 1-N-METHYLHISTIDINE 

796 "MIS": "SER", # MIS SER MODIFIED SERINE 

797 "MLE": "LEU", # MLE LEU N-METHYLATED 

798 "MLL": "LEU", # MLL LEU METHYL L-LEUCINATE 

799 "MLY": "LYS", # MLY LYS METHYLATED LYSINE 

800 "MLZ": "LYS", # MLZ LYS N-METHYL-LYSINE 

801 "MME": "MET", # MME MET N-METHYL METHIONINE 

802 "MNL": "LEU", # MNL LEU 4,N-DIMETHYLNORLEUCINE 

803 "MNV": "VAL", # MNV VAL N-METHYL-C-AMINO VALINE 

804 "MPQ": "GLY", # MPQ GLY N-METHYL-ALPHA-PHENYL-GLYCINE 

805 "MSA": "GLY", # MSA GLY (2-S-METHYL) SARCOSINE 

806 "MSE": "MET", # MSE MET ELENOMETHIONINE 

807 "MSO": "MET", # MSO MET METHIONINE SULFOXIDE 

808 "MTY": "PHE", # MTY PHE 3-HYDROXYPHENYLALANINE 

809 "MVA": "VAL", # MVA VAL N-METHYLATED 

810 "N10": "SER", # N10 SER O-[(HEXYLAMINO)CARBONYL]-L-SERINE 

811 "NAL": "ALA", # NAL ALA BETA-(2-NAPHTHYL)-ALANINE 

812 "NAM": "ALA", # NAM ALA NAM NAPTHYLAMINOALANINE 

813 "NBQ": "TYR", # NBQ TYR 

814 "NC1": "SER", # NC1 SER NITROCEFIN ACYL-SERINE 

815 "NCB": "ALA", # NCB ALA CHEMICAL MODIFICATION 

816 "NEP": "HIS", # NEP HIS N1-PHOSPHONOHISTIDINE 

817 "NFA": "PHE", # NFA PHE MODIFIED PHENYLALANINE 

818 "NIY": "TYR", # NIY TYR META-NITRO-TYROSINE 

819 "NLE": "LEU", # NLE LEU NORLEUCINE 

820 "NLN": "LEU", # NLN LEU NORLEUCINE AMIDE 

821 "NLO": "LEU", # NLO LEU O-METHYL-L-NORLEUCINE 

822 "NMC": "GLY", # NMC GLY N-CYCLOPROPYLMETHYL GLYCINE 

823 "NMM": "ARG", # NMM ARG MODIFIED ARGININE 

824 "NPH": "CYS", # NPH CYS 

825 "NRQ": "ALA", # NRQ ALA 

826 "NVA": "VAL", # NVA VAL NORVALINE 

827 "NYC": "ALA", # NYC ALA 

828 "NYS": "CYS", # NYS CYS 

829 "NZH": "HIS", # NZH HIS 

830 "OAS": "SER", # OAS SER O-ACETYLSERINE 

831 "OBS": "LYS", # OBS LYS MODIFIED LYSINE 

832 "OCS": "CYS", # OCS CYS CYSTEINE SULFONIC ACID 

833 "OCY": "CYS", # OCY CYS HYDROXYETHYLCYSTEINE 

834 "OHI": "HIS", # OHI HIS 3-(2-OXO-2H-IMIDAZOL-4-YL)-L-ALANINE 

835 "OHS": "ASP", # OHS ASP O-(CARBOXYSULFANYL)-4-OXO-L-HOMOSERINE 

836 "OLT": "THR", # OLT THR O-METHYL-L-THREONINE 

837 "OMT": "MET", # OMT MET METHIONINE SULFONE 

838 "OPR": "ARG", # OPR ARG C-(3-OXOPROPYL)ARGININE 

839 "ORN": "ALA", # ORN ALA ORNITHINE 

840 "ORQ": "ARG", # ORQ ARG N~5~-ACETYL-L-ORNITHINE 

841 "OSE": "SER", # OSE SER O-SULFO-L-SERINE 

842 "OTY": "TYR", # OTY TYR 

843 "OXX": "ASP", # OXX ASP OXALYL-ASPARTYL ANHYDRIDE 

844 "P1L": "CYS", # P1L CYS S-PALMITOYL CYSTEINE 

845 "P2Y": "PRO", # P2Y PRO (2S)-PYRROLIDIN-2-YLMETHYLAMINE 

846 "PAQ": "TYR", # PAQ TYR SEE REMARK 999 

847 "PAT": "TRP", # PAT TRP ALPHA-PHOSPHONO-TRYPTOPHAN 

848 "PBB": "CYS", # PBB CYS S-(4-BROMOBENZYL)CYSTEINE 

849 "PBF": "PHE", # PBF PHE PARA-(BENZOYL)-PHENYLALANINE 

850 "PCA": "PRO", # PCA PRO 5-OXOPROLINE 

851 "PCS": "PHE", # PCS PHE PHENYLALANYLMETHYLCHLORIDE 

852 "PEC": "CYS", # PEC CYS S,S-PENTYLTHIOCYSTEINE 

853 "PF5": "PHE", # PF5 PHE 2,3,4,5,6-PENTAFLUORO-L-PHENYLALANINE 

854 "PFF": "PHE", # PFF PHE 4-FLUORO-L-PHENYLALANINE 

855 "PG1": "SER", # PG1 SER BENZYLPENICILLOYL-ACYLATED SERINE 

856 "PG9": "GLY", # PG9 GLY D-PHENYLGLYCINE 

857 "PHA": "PHE", # PHA PHE PHENYLALANINAL 

858 "PHD": "ASP", # PHD ASP 2-AMINO-4-OXO-4-PHOSPHONOOXY-BUTYRIC ACID 

859 "PHE": "PHE", # PHE PHE 

860 "PHI": "PHE", # PHI PHE IODO-PHENYLALANINE 

861 "PHL": "PHE", # PHL PHE L-PHENYLALANINOL 

862 "PHM": "PHE", # PHM PHE PHENYLALANYLMETHANE 

863 "PIA": "ALA", # PIA ALA FUSION OF ALA 65, TYR 66, GLY 67 

864 "PLE": "LEU", # PLE LEU LEUCINE PHOSPHINIC ACID 

865 "PM3": "PHE", # PM3 PHE 

866 "POM": "PRO", # POM PRO CIS-5-METHYL-4-OXOPROLINE 

867 "PPH": "LEU", # PPH LEU PHENYLALANINE PHOSPHINIC ACID 

868 "PPN": "PHE", # PPN PHE THE LIGAND IS A PARA-NITRO-PHENYLALANINE 

869 "PR3": "CYS", # PR3 CYS INE DTT-CYSTEINE 

870 "PRO": "PRO", # PRO PRO 

871 "PRQ": "PHE", # PRQ PHE PHENYLALANINE 

872 "PRR": "ALA", # PRR ALA 3-(METHYL-PYRIDINIUM)ALANINE 

873 "PRS": "PRO", # PRS PRO THIOPROLINE 

874 "PSA": "PHE", # PSA PHE 

875 "PSH": "HIS", # PSH HIS 1-THIOPHOSPHONO-L-HISTIDINE 

876 "PTH": "TYR", # PTH TYR METHYLENE-HYDROXY-PHOSPHOTYROSINE 

877 "PTM": "TYR", # PTM TYR ALPHA-METHYL-O-PHOSPHOTYROSINE 

878 "PTR": "TYR", # PTR TYR O-PHOSPHOTYROSINE 

879 "PYA": "ALA", # PYA ALA 3-(1,10-PHENANTHROL-2-YL)-L-ALANINE 

880 "PYC": "ALA", # PYC ALA PYRROLE-2-CARBOXYLATE 

881 "PYR": "SER", # PYR SER CHEMICALLY MODIFIED 

882 "PYT": "ALA", # PYT ALA MODIFIED ALANINE 

883 "PYX": "CYS", # PYX CYS S-[S-THIOPYRIDOXAMINYL]CYSTEINE 

884 "R1A": "CYS", # R1A CYS 

885 "R1B": "CYS", # R1B CYS 

886 "R1F": "CYS", # R1F CYS 

887 "R7A": "CYS", # R7A CYS 

888 "RC7": "ALA", # RC7 ALA 

889 "RCY": "CYS", # RCY CYS 

890 "S1H": "SER", # S1H SER 1-HEXADECANOSULFONYL-O-L-SERINE 

891 "SAC": "SER", # SAC SER N-ACETYL-SERINE 

892 "SAH": "CYS", # SAH CYS S-ADENOSYL-L-HOMOCYSTEINE 

893 "SAR": "GLY", # SAR GLY SARCOSINE 

894 "SBD": "SER", # SBD SER 

895 "SBG": "SER", # SBG SER MODIFIED SERINE 

896 "SBL": "SER", # SBL SER 

897 "SC2": "CYS", # SC2 CYS N-ACETYL-L-CYSTEINE 

898 "SCH": "CYS", # SCH CYS S-METHYL THIOCYSTEINE GROUP 

899 "SCS": "CYS", # SCS CYS MODIFIED CYSTEINE 

900 "SCY": "CYS", # SCY CYS CETYLATED CYSTEINE 

901 "SDP": "SER", # SDP SER 

902 "SEB": "SER", # SEB SER O-BENZYLSULFONYL-SERINE 

903 "SEC": "ALA", # SEC ALA 2-AMINO-3-SELENINO-PROPIONIC ACID 

904 "SEL": "SER", # SEL SER 2-AMINO-1,3-PROPANEDIOL 

905 "SEP": "SER", # SEP SER E PHOSPHOSERINE 

906 "SER": "SER", # SER SER 

907 "SET": "SER", # SET SER AMINOSERINE 

908 "SGB": "SER", # SGB SER MODIFIED SERINE 

909 "SGR": "SER", # SGR SER MODIFIED SERINE 

910 "SHC": "CYS", # SHC CYS S-HEXYLCYSTEINE 

911 "SHP": "GLY", # SHP GLY (4-HYDROXYMALTOSEPHENYL)GLYCINE 

912 "SIC": "ALA", # SIC ALA 

913 "SLZ": "LYS", # SLZ LYS L-THIALYSINE 

914 "SMC": "CYS", # SMC CYS POST-TRANSLATIONAL MODIFICATION 

915 "SME": "MET", # SME MET METHIONINE SULFOXIDE 

916 "SMF": "PHE", # SMF PHE 4-SULFOMETHYL-L-PHENYLALANINE 

917 "SNC": "CYS", # SNC CYS S-NITROSO CYSTEINE 

918 "SNN": "ASP", # SNN ASP POST-TRANSLATIONAL MODIFICATION 

919 "SOC": "CYS", # SOC CYS DIOXYSELENOCYSTEINE 

920 "SOY": "SER", # SOY SER OXACILLOYL-ACYLATED SERINE 

921 "SUI": "ALA", # SUI ALA 

922 "SUN": "SER", # SUN SER TABUN CONJUGATED SERINE 

923 "SVA": "SER", # SVA SER SERINE VANADATE 

924 "SVV": "SER", # SVV SER MODIFIED SERINE 

925 "SVX": "SER", # SVX SER MODIFIED SERINE 

926 "SVY": "SER", # SVY SER MODIFIED SERINE 

927 "SVZ": "SER", # SVZ SER MODIFIED SERINE 

928 "SXE": "SER", # SXE SER MODIFIED SERINE 

929 "TBG": "GLY", # TBG GLY T-BUTYL GLYCINE 

930 "TBM": "THR", # TBM THR 

931 "TCQ": "TYR", # TCQ TYR MODIFIED TYROSINE 

932 "TEE": "CYS", # TEE CYS POST-TRANSLATIONAL MODIFICATION 

933 "TH5": "THR", # TH5 THR O-ACETYL-L-THREONINE 

934 "THC": "THR", # THC THR N-METHYLCARBONYLTHREONINE 

935 "THR": "THR", # THR THR 

936 "TIH": "ALA", # TIH ALA BETA(2-THIENYL)ALANINE 

937 "TMD": "THR", # TMD THR N-METHYLATED, EPSILON C ALKYLATED 

938 "TNB": "CYS", # TNB CYS S-(2,3,6-TRINITROPHENYL)CYSTEINE 

939 "TOX": "TRP", # TOX TRP 

940 "TPL": "TRP", # TPL TRP TRYTOPHANOL 

941 "TPO": "THR", # TPO THR HOSPHOTHREONINE 

942 "TPQ": "ALA", # TPQ ALA 2,4,5-TRIHYDROXYPHENYLALANINE 

943 "TQQ": "TRP", # TQQ TRP 

944 "TRF": "TRP", # TRF TRP N1-FORMYL-TRYPTOPHAN 

945 "TRN": "TRP", # TRN TRP AZA-TRYPTOPHAN 

946 "TRO": "TRP", # TRO TRP 2-HYDROXY-TRYPTOPHAN 

947 "TRP": "TRP", # TRP TRP 

948 "TRQ": "TRP", # TRQ TRP 

949 "TRW": "TRP", # TRW TRP 

950 "TRX": "TRP", # TRX TRP 6-HYDROXYTRYPTOPHAN 

951 "TTQ": "TRP", # TTQ TRP 6-AMINO-7-HYDROXY-L-TRYPTOPHAN 

952 "TTS": "TYR", # TTS TYR 

953 "TY2": "TYR", # TY2 TYR 3-AMINO-L-TYROSINE 

954 "TY3": "TYR", # TY3 TYR 3-HYDROXY-L-TYROSINE 

955 "TYB": "TYR", # TYB TYR TYROSINAL 

956 "TYC": "TYR", # TYC TYR L-TYROSINAMIDE 

957 "TYI": "TYR", # TYI TYR 3,5-DIIODOTYROSINE 

958 "TYN": "TYR", # TYN TYR ADDUCT AT HYDROXY GROUP 

959 "TYO": "TYR", # TYO TYR 

960 "TYQ": "TYR", # TYQ TYR AMINOQUINOL FORM OF TOPA QUINONONE 

961 "TYR": "TYR", # TYR TYR 

962 "TYS": "TYR", # TYS TYR INE SULPHONATED TYROSINE 

963 "TYT": "TYR", # TYT TYR 

964 "TYX": "CYS", # TYX CYS S-(2-ANILINO-2-OXOETHYL)-L-CYSTEINE 

965 "TYY": "TYR", # TYY TYR IMINOQUINONE FORM OF TOPA QUINONONE 

966 "TYZ": "ARG", # TYZ ARG PARA ACETAMIDO BENZOIC ACID 

967 "UMA": "ALA", # UMA ALA 

968 "VAD": "VAL", # VAD VAL DEAMINOHYDROXYVALINE 

969 "VAF": "VAL", # VAF VAL METHYLVALINE 

970 "VAL": "VAL", # VAL VAL 

971 "VDL": "VAL", # VDL VAL (2R,3R)-2,3-DIAMINOBUTANOIC ACID 

972 "VLL": "VAL", # VLL VAL (2S)-2,3-DIAMINOBUTANOIC ACID 

973 "VME": "VAL", # VME VAL O- METHYLVALINE 

974 "X9Q": "ALA", # X9Q ALA 

975 "XX1": "LYS", # XX1 LYS N~6~-7H-PURIN-6-YL-L-LYSINE 

976 "XXY": "ALA", # XXY ALA 

977 "XYG": "ALA", # XYG ALA 

978 "YCM": "CYS", # YCM CYS S-(2-AMINO-2-OXOETHYL)-L-CYSTEINE 

979 "YOF": "TYR", 

980 } 

981 

982 

983# TODO: Move this function to biobb_common.tools.file_utils 

984def _from_string_to_list(input_data: Optional[Union[str, list[str]]]) -> list[str]: 

985 """ 

986 Converts a string to a list, splitting by commas or spaces. If the input is already a list, returns it as is. 

987 Returns an empty list if input_data is None. 

988 

989 Parameters: 

990 input_data (str, list, or None): The string, list, or None value to convert. 

991 

992 Returns: 

993 list: A list of string elements or an empty list if input_data is None. 

994 """ 

995 if input_data is None: 

996 return [] 

997 

998 if isinstance(input_data, list): 

999 # If input is already a list, return it 

1000 return input_data 

1001 

1002 # If input is a string, determine the delimiter based on presence of commas 

1003 delimiter = "," if "," in input_data else " " 

1004 items = input_data.split(delimiter) 

1005 

1006 # Remove whitespace from each item and ignore empty strings 

1007 processed_items = [item.strip() for item in items if item.strip()] 

1008 

1009 return processed_items