Sequence exploration for the protein-ligand complex¶
This notebook isolates the bioinformatics checks (sequence, UniProt, PDB similarity) so the main introduction notebook can stay focused on visualization and parameter creation.
Data sources¶
We reuse the protein and ligand files from data/complex/. The molecule parsing and sequence queries below demonstrate how to go from a PDB to UniProt accessions and PDB codes.
Notebook settings¶
Define limits and environment-wide constants here so every downstream cell can reuse them.
# how many results to return from each database
UNIPROT_LIMIT = 20
PDB_LIMIT = 20
Sequence parsing¶
Extract the protein sequence from the input PDB and keep it in SEQ for downstream queries.
import os
from pathlib import Path
from Bio.PDB import PDBParser
COURSE_DIR = Path(os.environ.get("COURSE_DIR", str(Path.home() / "Concepcion26"))).expanduser()
PROTEIN_PDB = COURSE_DIR / "data" / "complex" / "protein.pdb"
parser = PDBParser(QUIET=True)
structure = parser.get_structure("protein", str(PROTEIN_PDB))
three_to_one = {
"ALA": "A", "ARG": "R", "ASN": "N", "ASP": "D", "CYS": "C",
"GLN": "Q", "GLU": "E", "GLY": "G", "HIS": "H", "ILE": "I",
"LEU": "L", "LYS": "K", "MET": "M", "PHE": "F", "PRO": "P",
"SER": "S", "THR": "T", "TRP": "W", "TYR": "Y", "VAL": "V",
"HID": "H", "HIE": "H", "HIP": "H",
}
sequences = {}
for model in structure:
for chain in model:
seq = []
for residue in chain:
if residue.id[0] != " ":
continue
seq.append(three_to_one.get(residue.resname, "X"))
if seq:
sequences[chain.id] = "".join(seq)
break
print("Protein PDB:", PROTEIN_PDB)
if sequences:
for chain_id, seq in sequences.items():
print(f"Chain {chain_id}: {len(seq)} residues")
print(seq)
else:
print("No chains parsed from the structure.")
SEQ = next(iter(sequences.values()), "")
Protein PDB: /home/jordivilla/Concepcion26/data/complex/protein.pdb Chain A: 304 residues SGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVT
UniProt BLAST¶
Get some information from the pdbfile. Run BLASTP against UniProt (via SwissProt) to collect candidate accessions.
from Bio.Blast import NCBIWWW, NCBIXML
sequence = globals().get("SEQ", "")
def run_uniprot_blast(seq, max_hits=UNIPROT_LIMIT):
trimmed = seq if len(seq) <= 500 else seq[:500]
if not trimmed:
return []
try:
handle = NCBIWWW.qblast("blastp", "swissprot", trimmed, hitlist_size=max_hits, format_type="XML")
except Exception as exc:
print("UniProt BLAST request failed:", exc)
return []
try:
record = NCBIXML.read(handle)
except Exception as exc:
print("Could not parse UniProt BLAST output:", exc)
return []
hits = []
for alignment in record.alignments:
desc = alignment.hit_def
identity = alignment.hsps[0].identities if alignment.hsps else 0
accessions = []
for token in desc.split():
if token.count("|") >= 2:
accessions.append(token.split("|")[1])
hits.append((alignment.accession, accessions, identity))
return hits
if not sequence:
print("Sequence missing; rerun the parsing cell.")
else:
UNIPROT_HITS = run_uniprot_blast(sequence)
globals()["UNIPROT_HITS"] = UNIPROT_HITS
globals()["TOP_UNIPROT_ACCESSION"] = UNIPROT_HITS[0][0] if UNIPROT_HITS else None
if UNIPROT_HITS:
print("Top UniProt hits (primary accession / parsed ids / identities):")
for accession, parsed, identity in UNIPROT_HITS:
print(f" {accession} | {parsed or ['(no parsed ids)']} | identities={identity}")
else:
print("No UniProt hits returned.")
Top UniProt hits (primary accession / parsed ids / identities): P0DTC1 | ['(no parsed ids)'] | identities=304 P0DTD1 | ['(no parsed ids)'] | identities=304 P0C6F5 | ['(no parsed ids)'] | identities=292 P0C6U8 | ['(no parsed ids)'] | identities=292 P0C6V9 | ['(no parsed ids)'] | identities=292 P0C6T7 | ['(no parsed ids)'] | identities=291 P0C6X7 | ['(no parsed ids)'] | identities=292 P0C6F8 | ['(no parsed ids)'] | identities=290 P0C6W2 | ['(no parsed ids)'] | identities=290 P0C6W6 | ['(no parsed ids)'] | identities=291 P0C6T6 | ['(no parsed ids)'] | identities=158 P0C6W5 | ['(no parsed ids)'] | identities=158 K9N638 | ['(no parsed ids)'] | identities=156 K9N7C7 | ['(no parsed ids)'] | identities=156 P0C6T5 | ['(no parsed ids)'] | identities=154 P0C6W4 | ['(no parsed ids)'] | identities=154 P0C6U9 | ['(no parsed ids)'] | identities=154 P0C6X8 | ['(no parsed ids)'] | identities=154 P0C6T4 | ['(no parsed ids)'] | identities=153 P0C6F7 | ['(no parsed ids)'] | identities=153
import requests
hits = globals().get("UNIPROT_HITS") or []
def fetch_entry(accession):
url = f"https://rest.uniprot.org/uniprotkb/{accession}.json"
resp = requests.get(url, params={"format": "json"}, timeout=15)
resp.raise_for_status()
return resp.json()
def summarize(entry):
name = entry.get("proteinDescription", {}).get("recommendedName", {}).get("fullName", {}).get("value")
if not name:
name = entry.get("proteinName", {}).get("value") or entry.get("entryType", "<entry>")
organism = entry.get("organism", {}).get("scientificName")
length = entry.get("sequence", {}).get("length") or entry.get("length")
function = next((c.get("texts", [])[0].get("value") for c in entry.get("comments", []) if c.get("commentType", c.get("type")) in ("FUNCTION", "function") and c.get("texts")), None)
return name, organism, length, function
if not hits:
print("No UniProt BLAST hits yet; rerun that cell first.")
else:
print("Fetching UniProt entry details for BLAST hits...")
for accession, parsed, identity in hits[:UNIPROT_LIMIT]:
try:
entry = fetch_entry(accession)
except requests.HTTPError as exc:
print("Failed to fetch", accession, exc)
continue
name, organism, length, function = summarize(entry)
print(f"{accession} (identity {identity}) -> {name} | {organism or '<organism?>'} | length={length or '?'}")
if function:
print(f" Function: {function}")
Fetching UniProt entry details for BLAST hits... P0DTC1 (identity 304) -> Replicase polyprotein 1a | Severe acute respiratory syndrome coronavirus 2 | length=4405 Function: Multifunctional protein involved in the transcription and replication of viral RNAs. Contains the proteinases responsible for the cleavages of the polyprotein P0DTD1 (identity 304) -> Replicase polyprotein 1ab | Severe acute respiratory syndrome coronavirus 2 | length=7096 Function: Multifunctional protein involved in the transcription and replication of viral RNAs. Contains the proteinases responsible for the cleavages of the polyprotein P0C6F5 (identity 292) -> Replicase polyprotein 1a | Bat coronavirus 279/2005 | length=4388 Function: The papain-like proteinase (PL-PRO) is responsible for the cleavages located at the N-terminus of replicase polyprotein. In addition, PL-PRO possesses a deubiquitinating/deISGylating activity and processes both 'Lys-48'- and 'Lys-63'-linked polyubiquitin chains from cellular substrates. Antagonizes innate immune induction of type I interferon by blocking the phosphorylation, dimerization and subsequent nuclear translocation of host IRF-3 (By similarity) P0C6U8 (identity 292) -> Replicase polyprotein 1a | Severe acute respiratory syndrome coronavirus | length=4382 Function: Multifunctional protein involved in the transcription and replication of viral RNAs. Contains the proteinases responsible for the cleavages of the polyprotein P0C6V9 (identity 292) -> Replicase polyprotein 1ab | Bat coronavirus 279/2005 | length=7079 Function: The replicase polyprotein of coronaviruses is a multifunctional protein: it contains the activities necessary for the transcription of negative stranded RNA, leader RNA, subgenomic mRNAs and progeny virion RNA as well as proteinases responsible for the cleavage of the polyprotein into functional products P0C6T7 (identity 291) -> Replicase polyprotein 1a | Bat coronavirus Rp3/2004 | length=4380 Function: The papain-like proteinase (PL-PRO) is responsible for the cleavages located at the N-terminus of replicase polyprotein. In addition, PL-PRO possesses a deubiquitinating/deISGylating activity and processes both 'Lys-48'- and 'Lys-63'-linked polyubiquitin chains from cellular substrates. Antagonizes innate immune induction of type I interferon by blocking the phosphorylation, dimerization and subsequent nuclear translocation of host IRF-3 (By similarity) P0C6X7 (identity 292) -> Replicase polyprotein 1ab | Severe acute respiratory syndrome coronavirus | length=7073 Function: Multifunctional protein involved in the transcription and replication of viral RNAs. Contains the proteinases responsible for the cleavages of the polyprotein P0C6F8 (identity 290) -> Replicase polyprotein 1a | Bat coronavirus HKU3 | length=4376 Function: The papain-like proteinase (PL-PRO) is responsible for the cleavages located at the N-terminus of replicase polyprotein. In addition, PL-PRO possesses a deubiquitinating/deISGylating activity and processes both 'Lys-48'- and 'Lys-63'-linked polyubiquitin chains from cellular substrates. Antagonizes innate immune induction of type I interferon by blocking the phosphorylation, dimerization and subsequent nuclear translocation of host IRF-3 (By similarity) P0C6W2 (identity 290) -> Replicase polyprotein 1ab | Bat coronavirus HKU3 | length=7067 Function: The replicase polyprotein of coronaviruses is a multifunctional protein: it contains the activities necessary for the transcription of negative stranded RNA, leader RNA, subgenomic mRNAs and progeny virion RNA as well as proteinases responsible for the cleavage of the polyprotein into functional products P0C6W6 (identity 291) -> Replicase polyprotein 1ab | Bat coronavirus Rp3/2004 | length=7071 Function: The replicase polyprotein of coronaviruses is a multifunctional protein: it contains the activities necessary for the transcription of negative stranded RNA, leader RNA, subgenomic mRNAs and progeny virion RNA as well as proteinases responsible for the cleavage of the polyprotein into functional products P0C6T6 (identity 158) -> Replicase polyprotein 1a | Bat coronavirus HKU9 | length=4248 Function: The papain-like proteinase (PL-PRO) is responsible for the cleavages located at the N-terminus of replicase polyprotein. In addition, PL-PRO possesses a deubiquitinating/deISGylating activity and processes both 'Lys-48'- and 'Lys-63'-linked polyubiquitin chains from cellular substrates. Antagonizes innate immune induction of type I interferon by blocking the phosphorylation, dimerization and subsequent nuclear translocation of host IRF-3 (By similarity) P0C6W5 (identity 158) -> Replicase polyprotein 1ab | Bat coronavirus HKU9 | length=6930 Function: The replicase polyprotein of coronaviruses is a multifunctional protein: it contains the activities necessary for the transcription of negative stranded RNA, leader RNA, subgenomic mRNAs and progeny virion RNA as well as proteinases responsible for the cleavage of the polyprotein into functional products K9N638 (identity 156) -> Replicase polyprotein 1a | Middle East respiratory syndrome-related coronavirus (isolate United Kingdom/H123990006/2012) | length=4391 Function: The replicase polyprotein of coronaviruses is a multifunctional protein: it contains the activities necessary for the transcription of negative stranded RNA, leader RNA, subgenomic mRNAs and progeny virion RNA as well as proteinases responsible for the cleavage of the polyprotein into functional products K9N7C7 (identity 156) -> Replicase polyprotein 1ab | Middle East respiratory syndrome-related coronavirus (isolate United Kingdom/H123990006/2012) | length=7078 Function: The replicase polyprotein of coronaviruses is a multifunctional protein: it contains the activities necessary for the transcription of negative stranded RNA, leader RNA, subgenomic mRNAs and progeny virion RNA as well as proteinases responsible for the cleavage of the polyprotein into functional products P0C6T5 (identity 154) -> Replicase polyprotein 1a | Bat coronavirus HKU5 | length=4481 Function: The papain-like proteinase (PL-PRO) is responsible for the cleavages located at the N-terminus of replicase polyprotein. In addition, PL-PRO possesses a deubiquitinating/deISGylating activity and processes both 'Lys-48'- and 'Lys-63'-linked polyubiquitin chains from cellular substrates. Antagonizes innate immune induction of type I interferon by blocking the phosphorylation, dimerization and subsequent nuclear translocation of host IRF-3 (By similarity) P0C6W4 (identity 154) -> Replicase polyprotein 1ab | Bat coronavirus HKU5 | length=7182 Function: The replicase polyprotein of coronaviruses is a multifunctional protein: it contains the activities necessary for the transcription of negative stranded RNA, leader RNA, subgenomic mRNAs and progeny virion RNA as well as proteinases responsible for the cleavage of the polyprotein into functional products P0C6U9 (identity 154) -> Replicase polyprotein 1a | Murine coronavirus (strain 2) | length=4416 Function: The papain-like proteinase 1 (PL1-PRO) and papain-like proteinase 2 (PL2-PRO) are responsible for the cleavages located at the N-terminus of the replicase polyprotein. In addition, PLP2 possesses a deubiquitinating/deISGylating activity and processes both 'Lys-48'- and 'Lys-63'-linked polyubiquitin chains from cellular substrates. Antagonizes innate immune induction of type I interferon by blocking the phosphorylation, dimerization and subsequent nuclear translocation of host IRF-3 (By similarity) P0C6X8 (identity 154) -> Replicase polyprotein 1ab | Murine coronavirus (strain 2) | length=7124 Function: The replicase polyprotein of coronaviruses is a multifunctional protein: it contains the activities necessary for the transcription of negative stranded RNA, leader RNA, subgenomic mRNAs and progeny virion RNA as well as proteinases responsible for the cleavage of the polyprotein into functional products P0C6T4 (identity 153) -> Replicase polyprotein 1a | Bat coronavirus HKU4 | length=4434 Function: The papain-like proteinase (PL-PRO) is responsible for the cleavages located at the N-terminus of replicase polyprotein. In addition, PL-PRO possesses a deubiquitinating/deISGylating activity and processes both 'Lys-48'- and 'Lys-63'-linked polyubiquitin chains from cellular substrates. Antagonizes innate immune induction of type I interferon by blocking the phosphorylation, dimerization and subsequent nuclear translocation of host IRF-3 (By similarity) P0C6F7 (identity 153) -> Replicase polyprotein 1a | Bat coronavirus 133/2005 | length=4441 Function: Responsible for the cleavages located at the N-terminus of replicase polyprotein. In addition, PL-PRO possesses a deubiquitinating/deISGylating activity and processes both 'Lys-48'- and 'Lys-63'-linked polyubiquitin chains from cellular substrates. Antagonizes innate immune induction of type I interferon by blocking the phosphorylation, dimerization and subsequent nuclear translocation of host IRF-3
PDB BLAST¶
Search the PDB with BLASTP to find structural relatives to the protein.
from Bio.Blast import NCBIWWW, NCBIXML
from time import perf_counter
import os
from pathlib import Path
import requests
COURSE_DIR = Path(os.environ.get("COURSE_DIR", str(Path.home() / "Concepcion26"))).expanduser()
PDB_OUT = COURSE_DIR / "results" / "01-introduction-sequence-check" / "pdb"
PDB_OUT.mkdir(parents=True, exist_ok=True)
sequence = globals().get("SEQ", "")
rcsb_entry_url = "https://data.rcsb.org/rest/v1/core/entry/{}"
pdb_file_url = "https://files.rcsb.org/download/{}.pdb"
def run_pdb_blast(seq, max_hits=PDB_LIMIT):
print(f"BLAST request: submitting {len(seq)} aa sequence to the PDB (max {max_hits} hits)...")
start = perf_counter()
try:
handle = NCBIWWW.qblast("blastp", "pdb", seq, hitlist_size=max_hits, format_type="XML")
except Exception as exc:
print("PDB BLAST request failed:", exc)
return []
try:
record = NCBIXML.read(handle)
except Exception as exc:
print("Could not parse PDB BLAST response:", exc)
return []
duration = perf_counter() - start
align_count = len(record.alignments)
print(f"PDB BLAST finished in {duration:.1f}s with {align_count} alignments.")
if not align_count:
return []
hits = []
for idx, alignment in enumerate(record.alignments, start=1):
print(f" processing alignment {idx}/{align_count}: {alignment.accession} ({alignment.hit_def.split()[0]})")
accessions = []
for token in alignment.hit_def.split():
if token.count("|") >= 2:
accessions.append(token.split("|")[1])
hits.append((alignment.accession, accessions, alignment.hsps[0].identities if alignment.hsps else 0, alignment.hsps[0].bits if alignment.hsps else 0))
return hits
def fetch_rcsb_summary(pdb_code):
try:
resp = requests.get(rcsb_entry_url.format(pdb_code), timeout=10)
resp.raise_for_status()
data = resp.json()
title = data.get("struct", {}).get("title")
exp = data.get("exptl", [{}])[0].get("method")
resolution = data.get("rcsb_entry_info", {}).get("resolution_combined")
return title, exp, resolution
except Exception as exc:
print(f" Could not fetch RCSB metadata for {pdb_code}: {exc}")
return None, None, None
if not sequence:
print("Sequence missing; rerun the parsing cell.")
else:
PDB_HITS = run_pdb_blast(sequence)
globals()["PDB_HITS"] = PDB_HITS
globals()["TOP_PDB_ACCESSION"] = PDB_HITS[0][0] if PDB_HITS else None
if not PDB_HITS:
print("No PDB hits returned.")
else:
print("Top PDB hits (accession / parsed ids / identity / bits):")
pdb_codes = []
for acc, parsed, identity, bits in PDB_HITS:
pdb_code = acc[:4]
pdb_codes.append(pdb_code.lower())
print(f" {acc} | {parsed or ['(no accession)']} | identities={identity} | bits={bits}")
seen = []
for pdb_code in pdb_codes:
if pdb_code in seen:
continue
seen.append(pdb_code)
title, method, resolution = fetch_rcsb_summary(pdb_code)
print(f"- {pdb_code.upper()}: {title or '<no title>'} | method={method or '<unknown>'} | resolution={resolution or '<n/a>'}")
BLAST request: submitting 304 aa sequence to the PDB (max 20 hits)... PDB BLAST finished in 422.9s with 20 alignments. processing alignment 1/20: 8I4S_A (Chain) processing alignment 2/20: 6XA4_A (Chain) processing alignment 3/20: 9LVR_A (Chain) processing alignment 4/20: 8ZQ8_A (Chain) processing alignment 5/20: 7W9G_A (Chain) processing alignment 6/20: 7VU6_A (Chain) processing alignment 7/20: 7CWC_A (Chain) processing alignment 8/20: 7KFI_A (Chain) processing alignment 9/20: 7VTH_A (Chain) processing alignment 10/20: 9ASV_A (Chain) processing alignment 11/20: 9DTZ_A (Chain) processing alignment 12/20: 6M0K_A (Chain) processing alignment 13/20: 9KGJ_A (Chain) processing alignment 14/20: 7CB7_A (Chain) processing alignment 15/20: 6XMK_A (Chain) processing alignment 16/20: 7BRO_A (Chain) processing alignment 17/20: 5R7Y_A (Chain) processing alignment 18/20: 6YB7_A (Chain) processing alignment 19/20: 9NNG_A (Chain) processing alignment 20/20: 8VQX_A (Chain) Top PDB hits (accession / parsed ids / identity / bits): 8I4S_A | ['(no accession)'] | identities=304 | bits=637.876 6XA4_A | ['6XBG', '6XBG', '6XBH', '6XBI', '6XBI', '6XFN', '7LYI'] | identities=304 | bits=637.106 9LVR_A | ['9LVR'] | identities=304 | bits=636.721 8ZQ8_A | ['(no accession)'] | identities=304 | bits=636.721 7W9G_A | ['7XQ7'] | identities=304 | bits=636.721 7VU6_A | ['7VU6', '9LVT', '9LVT', '9LVV', '9VS1', '9VS1'] | identities=304 | bits=636.721 7CWC_A | ['7CWC'] | identities=304 | bits=636.721 7KFI_A | ['7KFI', '7KVL', '7KVL', '7KVR', '7KVR', '7LDX', '7LDX', '7LFE', '7LFE', '7LFP', '7LFP'] | identities=304 | bits=636.721 7VTH_A | ['7VTH'] | identities=304 | bits=636.336 9ASV_A | ['9ASV', '9ASW', '9ASW', '9ASY', '9ASY', '9ASZ', '9ASZ', '9AT0', '9AT0', '9AT1', '9AT1', '9AT3', '9AT3', '9AT4', '9AT4', '9AT5', '9AT5', '9AT6', '9AT6', '9AT7', '9AT7'] | identities=304 | bits=636.336 9DTZ_A | ['9DTZ', '9DU2', '9DU2', '9DU3', '9DU4', '9DU4'] | identities=304 | bits=636.336 6M0K_A | ['7MLF', '7TGR', '7Z0P', '8AJ1'] | identities=304 | bits=636.336 9KGJ_A | ['9KGJ', '9KGN', '9KGN', '9KGQ', '9KGQ', '9KGR', '9KGR', '9KGS', '9KGS'] | identities=304 | bits=636.336 7CB7_A | ['7CB7'] | identities=304 | bits=636.336 6XMK_A | ['6XMK', '7K0E', '7K0E', '7K0F', '7K0F', '7LKR', '7LKR', '7LKS', '7LKS', '7LKT', '7LKT', '7LKU', '7LKU', '7LKV', '7LKV', '7LKW', '7LKW', '7LKX', '7LKX', '7LZT', '7LZU', '7LZU', '7LZV', '7LZV', '7LZW', '7LZW', '7LZX', '7LZX', '7LZY', '7LZZ', '7M00', '7M00', '7M01', '7M01', '7M02', '7M02', '7M03', '7M03', '7M04', '7M04', '7T42', '7T42', '7T43', '7T43', '7T44', '7T44', '7T45', '7T45', '7T46', '7T46', '7T48', '7T49', '7T49', '7T4A', '7T4A', '7T4B', '7T4B', '7TQ2', '7TQ2', '7TQ3', '7TQ3', '7TQ4', '7TQ5', '7TQ5', '7TQ6', '7TQ6', '8CZW', '8CZW', '8CZX', '8CZX', '8E5X', '8E5X', '8E5Z', '8E5Z', '8E61', '8E61', '8E63', '8E63', '8E64', '8E64', '8E65', '8E68', '8E68', '8E69', '8E69', '8E6A', '8E6A', '8F44', '8F44', '8F45', '8F45', '8F46', '8F46'] | identities=304 | bits=636.336 7BRO_A | ['7BRP', '7BRP', '7CAM', '7CAM', '7D1M', '7D1M', '7DDC', '7JKV', '7JKV', '7RFW', '7URB', '7US4', '7WQ8', '7WQ9', '7WQA', '7WQK'] | identities=304 | bits=636.336 5R7Y_A | ['5R7Z', '5R80', '5R81', '5R82', '5R83', '5R84', '5R8T', '5RE4', '5RE5', '5RE6', '5RE7', '5RE8', '5RE9', '5REA', '5REB', '5REC', '5RED', '5REE', '5REF', '5REG', '5REH', '5REI', '5REJ', '5REK', '5REL', '5REM', '5REN', '5REO', '5REP', '5RER', '5RES', '5RET', '5REU', '5REV', '5REW', '5REX', '5REY', '5REZ', '5RF0', '5RF1', '5RF2', '5RF3', '5RF4', '5RF5', '5RF6', '5RF7', '5RF8', '5RF9', '5RFA', '5RFB', '5RFC', '5RFD', '5RFE', '5RFF', '5RFG', '5RFH', '5RFI', '5RFJ', '5RFK', '5RFL', '5RFM', '5RFN', '5RFO', '5RFP', '5RFQ', '5RFR', '5RFS', '5RFT', '5RFU', '5RFV', '5RFW', '5RFX', '5RFY', '5RFZ', '5RG0', '5RG1', '5RG2', '5RG3', '5RGG', '5RGH', '5RGI', '5RGJ', '5RGK', '5RGL', '5RGM', '5RGN', '5RGO', '5RGP', '5RGQ', '5RGR', '5RGS', '5RGT', '5RGU', '5RGV', '5RGW', '5RGX', '5RGY', '5RGZ', '5RH0', '5RH1', '5RH2', '5RH3', '5RH4', '5RH5', '5RH6', '5RH7', '5RH8', '5RH9', '5RHA', '5RHB', '5RHC', '5RHD', '5RHE', '5RHF', '5RL0', '5RL1', '5RL2', '5RL3', '5RL4', '5RL5', '5SML', '5SMM', '5SMN', '6LU7', '6M03', '6M2N', '6M2N', '6M2N', '6M2N', '6M2Q', '6W63', '6WNP', '6WQF', '6WTJ', '6WTK', '6WTM', '6WTM', '6XB2', '6XCH', '6XHM', '6XHM', '6XHU', '6XHU', '6XQS', '6XQT', '6XQT', '6XQU', '6XR3', '6Y2E', '6Y2F', '6Y84', '6YNQ', '6YVF', '6Z2E', '6ZRT', '6ZRU', '7A1U', '7ABU', '7ADW', '7AF0', '7AGA', '7AHA', '7AK4', '7AKU', '7ALH', '7ALI', '7ALI', '7AMJ', '7ANS', '7AP6', '7APH', '7AQE', '7AQI', '7AQJ', '7AR6', '7AU4', '7AVD', '7AVD', '7AWR', '7AWS', '7AWW', '7AX6', '7AXM', '7AXO', '7AY7', '7B2J', '7B2U', '7B2U', '7B3E', '7B3E', '7B5Z', '7B77', '7BAJ', '7BAK', '7BAL', '7BB2', '7BB2', '7BE7', '7BE7', '7BFB', '7BFB', '7BGP', '7BGP', '7BIJ', '7BQY', '7BUY', '7C6S', '7C6U', '7C7P', '7C7P', '7C8B', '7C8R', '7C8T', '7C8U', '7COM', '7COM', '7CUT', '7CUU', '7CUU', '7CWB', '7CX9', '7D1O', '7D3I', '7D64', '7DAT', '7DAU', '7DAV', '7DGB', '7DGF', '7DGG', '7DGG', '7DGH', '7DGI', '7DGI', '7DHJ', '7DJR', '7DK1', '7DK1', '7DPU', '7DPU', '7DPV', '7DPV', '7DPV', '7DPV', '7E18', '7E19', '7E5X', '7E5X', '7E5X', '7E5X', '7EIN', '7EIN', '7EN8', '7EN8', '7EN9', '7FAY', '7FAZ', '7FAZ', '7GAV', '7GAV', '7GAW', '7GAW', '7GAX', '7GAY', '7GAZ', '7GB0', '7GB1', '7GB2', '7GB3', '7GB4', '7GB5', '7GB6', '7GB7', '7GB8', '7GB9', '7GBA', '7GBB', '7GBC', '7GBD', '7GBE', '7GBF', '7GBG', '7GBH', '7GBI', '7GBJ', '7GBK', '7GBL', '7GBM', '7GBN', '7GBO', '7GBP', '7GBQ', '7GBR', '7GBS', '7GBT', '7GBU', '7GBV', '7GBW', '7GBX', '7GBY', '7GBZ', '7GC0', '7GC1', '7GC2', '7GC3', '7GC4', '7GC5', '7GC6', '7GC7', '7GC8', '7GC9', '7GCA', '7GCB', '7GCC', '7GCD', '7GCE', '7GCF', '7GCG', '7GCI', '7GCJ', '7GCK', '7GCL', '7GCM', '7GCN', '7GCO', '7GCP', '7GCQ', '7GCR', '7GCS', '7GCT', '7GCU', '7GCV', '7GCW', '7GCX', '7GCY', '7GCZ', '7GD0', '7GD1', '7GD2', '7GD3', '7GD4', '7GD5', '7GD6', '7GD7', '7GD8', '7GD9', '7GDA', '7GDB', '7GDC', '7GDD', '7GDE', '7GDF', '7GDG', '7GDH', '7GDI', '7GDJ', '7GDK', '7GDL', '7GDM', '7GDN', '7GDO', '7GDP', '7GDQ', '7GDR', '7GDS', '7GDT', '7GDU', '7GDV', '7GDW', '7GDX', '7GDY', '7GDZ', '7GE0', '7GE1', '7GE2', '7GE3', '7GE4', '7GE5', '7GE6', '7GE7', '7GE8', '7GE9', '7GEA', '7GEB', '7GEC', '7GED', '7GEE', '7GEF', '7GEG', '7GEH', '7GEI', '7GEJ', '7GEK', '7GEL', '7GEM', '7GEN', '7GEO', '7GEQ', '7GER', '7GES', '7GET', '7GEU', '7GEV', '7GEW', '7GEX', '7GEY', '7GEZ', '7GF0', '7GF1', '7GF2', '7GF3', '7GF4', '7GF5', '7GF6', '7GF7', '7GF8', '7GF9', '7GFA', '7GFB', '7GFC', '7GFD', '7GFE', '7GFF', '7GFG', '7GFH', '7GFI', '7GFJ', '7GFK', '7GFL', '7GFM', '7GFN', '7GFO', '7GFP', '7GFQ', '7GFR', '7GFS', '7GFT', '7GFU', '7GFV', '7GFW', '7GFX', '7GFY', '7GFZ', '7GG0', '7GG1', '7GG2', '7GG3', '7GG4', '7GG5', '7GG6', '7GG7', '7GG8', '7GG9', '7GGA', '7GGB', '7GGC', '7GGD', '7GGE', '7GGF', '7GGG', '7GGH', '7GGI', '7GGJ', '7GGK', '7GGL', '7GGM', '7GGN', '7GGO', '7GGP', '7GGQ', '7GGR', '7GGS', '7GGT', '7GGU', '7GGV', '7GGW', '7GGX', '7GGY', '7GGZ', '7GH0', '7GH1', '7GH2', '7GH3', '7GH4', '7GH5', '7GH6', '7GH7', '7GH8', '7GH9', '7GHA', '7GHB', '7GHC', '7GHD', '7GHE', '7GHF', '7GHG', '7GHH', '7GHI', '7GHJ', '7GHK', '7GHL', '7GHM', '7GHM', '7GHN', '7GHN', '7GHO', '7GHO', '7GHP', '7GHP', '7GHQ', '7GHQ', '7GHR', '7GHR', '7GHS', '7GHS', '7GHT', '7GHT', '7GHU', '7GHU', '7GHV', '7GHV', '7GHW', '7GHW', '7GHX', '7GHX', '7GHY', '7GHY', '7GHZ', '7GHZ', '7GI0', '7GI0', '7GI1', '7GI1', '7GI2', '7GI2', '7GI3', '7GI3', '7GI4', '7GI4', '7GI5', '7GI5', '7GI6', '7GI6', '7GI7', '7GI7', '7GI8', '7GI8', '7GI9', '7GI9', '7GIA', '7GIA', '7GIB', '7GIB', '7GIC', '7GIC', '7GID', '7GID', '7GIE', '7GIE', '7GIF', '7GIF', '7GIG', '7GIG', '7GIH', '7GIH', '7GII', '7GII', '7GIJ', '7GIJ', '7GIK', '7GIK', '7GIL', '7GIL', '7GIM', '7GIM', '7GIN', '7GIN', '7GIO', '7GIO', '7GIP', '7GIP', '7GIQ', '7GIQ', '7GIR', '7GIR', '7GIS', '7GIS', '7GIT', '7GIT', '7GIU', '7GIU', '7GIV', '7GIV', '7GIW', '7GIW', '7GIX', '7GIX', '7GIY', '7GIY', '7GIZ', '7GIZ', '7GJ0', '7GJ0', '7GJ1', '7GJ1', '7GJ2', '7GJ2', '7GJ3', '7GJ3', '7GJ4', '7GJ4', '7GJ5', '7GJ5', '7GJ6', '7GJ6', '7GJ7', '7GJ7', '7GJ8', '7GJ8', '7GJ9', '7GJ9', '7GJA', '7GJA', '7GJB', '7GJB', '7GJC', '7GJC', '7GJD', '7GJD', '7GJE', '7GJE', '7GJF', '7GJF', '7GJG', '7GJG', '7GJH', '7GJH', '7GJI', '7GJI', '7GJJ', '7GJJ', '7GJK', '7GJK', '7GJL', '7GJL', '7GJM', '7GJM', '7GJN', '7GJN', '7GJO', '7GJO', '7GJP', '7GJP', '7GJQ', '7GJQ', '7GJR', '7GJR', '7GJS', '7GJS', '7GJT', '7GJT', '7GJU', '7GJU', '7GJV', '7GJV', '7GJW', '7GJW', '7GJX', '7GJX', '7GJY', '7GJY', '7GJZ', '7GJZ', '7GK0', '7GK0', '7GK1', '7GK1', '7GK2', '7GK2', '7GK3', '7GK3', '7GK4', '7GK4', '7GK5', '7GK5', '7GK6', '7GK6', '7GK7', '7GK7', '7GK8', '7GK8', '7GK9', '7GK9', '7GKA', '7GKA', '7GKB', '7GKB', '7GKC', '7GKC', '7GKD', '7GKD', '7GKE', '7GKE', '7GKF', '7GKF', '7GKG', '7GKG', '7GKH', '7GKH', '7GKI', '7GKI', '7GKJ', '7GKJ', '7GKK', '7GKK', '7GKL', '7GKL', '7GKM', '7GKM', '7GKN', '7GKN', '7GKO', '7GKO', '7GKP', '7GKP', '7GKQ', '7GKQ', '7GKR', '7GKR', '7GKS', '7GKS', '7GKT', '7GKT', '7GKU', '7GKU', '7GKV', '7GKV', '7GKW', '7GKW', '7GKX', '7GKX', '7GKY', '7GKY', '7GKZ', '7GKZ', '7GL0', '7GL0', '7GL1', '7GL1', '7GL2', '7GL2', '7GL3', '7GL3', '7GL4', '7GL4', '7GL5', '7GL5', '7GL6', '7GL6', '7GL7', '7GL7', '7GL8', '7GL8', '7GL9', '7GL9', '7GLA', '7GLA', '7GLB', '7GLB', '7GLC', '7GLC', '7GLD', '7GLD', '7GLE', '7GLE', '7GLF', '7GLF', '7GLG', '7GLG', '7GLH', '7GLH', '7GLI', '7GLI', '7GLJ', '7GLJ', '7GLK', '7GLK', '7GLL', '7GLL', '7GLM', '7GLM', '7GLN', '7GLN', '7GLO', '7GLO', '7GLP', '7GLP', '7GLQ', '7GLQ', '7GLR', '7GLR', '7GLS', '7GLS', '7GLT', '7GLT', '7GLU', '7GLU', '7GLV', '7GLV', '7GLW', '7GLW', '7GLX', '7GLX', '7GLY', '7GLY', '7GLZ', '7GLZ', '7GM0', '7GM0', '7GM1', '7GM1', '7GM2', '7GM2', '7GM3', '7GM3', '7GM4', '7GM4', '7GM5', '7GM5', '7GM6', '7GM6', '7GM7', '7GM7', '7GM8', '7GM8', '7GM9', '7GM9', '7GMA', '7GMA', '7GMB', '7GMB', '7GMC', '7GMC', '7GMD', '7GMD', '7GME', '7GME', '7GMF', '7GMF', '7GMG', '7GMG', '7GMH', '7GMH', '7GMI', '7GMI', '7GMJ', '7GMJ', '7GMK', '7GMK', '7GML', '7GML', '7GMM', '7GMM', '7GMN', '7GMN', '7GMO', '7GMO', '7GMP', '7GMP', '7GMQ', '7GMQ', '7GMR', '7GMR', '7GMS', '7GMS', '7GMT', '7GMT', '7GMU', '7GMU', '7GMV', '7GMV', '7GMW', '7GMW', '7GMX', '7GMX', '7GMY', '7GMY', '7GMZ', '7GMZ', '7GN0', '7GN0', '7GN1', '7GN1', '7GN2', '7GN2', '7GN3', '7GN3', '7GN4', '7GN4', '7GN5', '7GN5', '7GN6', '7GN6', '7GN7', '7GN7', '7GN8', '7GN8', '7GN9', '7GN9', '7GNA', '7GNA', '7GNB', '7GNB', '7GNC', '7GNC', '7GND', '7GND', '7GNE', '7GNE', '7GNF', '7GNF', '7GNG', '7GNG', '7GNH', '7GNH', '7GNI', '7GNI', '7GNJ', '7GNJ', '7GNK', '7GNK', '7GNL', '7GNL', '7GNM', '7GNM', '7GNN', '7GNN', '7GNO', '7GNO', '7GNP', '7GNP', '7GNQ', '7GNQ', '7GNR', '7GNR', '7GNS', '7GNS', '7GNT', '7GNT', '7GNU', '7GNU', '7GRE', '7GRE', '7GRF', '7GRF', '7GRG', '7GRG', '7GRH', '7GRH', '7GRI', '7GRI', '7GRJ', '7GRJ', '7GRK', '7GRK', '7GRL', '7GRL', '7GRM', '7GRM', '7GRN', '7GRN', '7GRO', '7GRO', '7GRP', '7GRP', '7GRQ', '7GRQ', '7GRR', '7GRR', '7GRS', '7GRS', '7GRT', '7GRT', '7GRU', '7GRU', '7GRV', '7GRV', '7GRW', '7GRW', '7GRX', '7GRX', '7GRY', '7GRY', '7GRZ', '7GRZ', '7GS0', '7GS0', '7GS1', '7GS1', '7GS2', '7GS2', '7GS3', '7GS3', '7GS4', '7GS4', '7GS5', '7GS5', '7GS6', '7GS6', '7HUC', '7HUD', '7HUE', '7I13', '7I13', '7I14', '7I14', '7I15', '7I15', '7I16', '7I16', '7I17', '7I17', '7I18', '7I18', '7I19', '7I19', '7I1A', '7I1A', '7I1C', '7I1C', '7I1D', '7I1D', '7I1E', '7I1E', '7I1F', '7I1F', '7I1G', '7I1G', '7I1H', '7I1H', '7I1I', '7I1I', '7I1J', '7I1J', '7JFQ', '7JP1', '7JR3', '7JST', '7JSU', '7JT0', '7JT7', '7JU7', '7JUN', '7JVZ', '7JW8', '7JW8', '7JW8', '7JW8', '7JYC', '7K3T', '7K40', '7K6D', '7K6E', '7KHP', '7KHP', '7KPH', '7KX5', '7KYU', '7L0D', '7L10', '7L11', '7L11', '7L12', '7L13', '7L13', '7L14', '7L14', '7L5D', '7L8I', '7L8I', '7L8J', '7LB7', '7LBN', '7LCO', '7LCR', '7LCR', '7LCS', '7LCT', '7LDL', '7LDL', '7LKD', '7LKD', '7LKE', '7LMD', '7LME', '7LME', '7LMF', '7LMF', '7LTJ', '7LTN', '7LYH', '7M8M', '7M8M', '7M8N', '7M8N', '7M8O', '7M8O', '7M8P', '7M8P', '7M8X', '7M8Y', '7M8Z', '7M90', '7M91', '7MAT', '7MAT', '7MAU', '7MAU', '7MAV', '7MAV', '7MAW', '7MAW', '7MAX', '7MAX', '7MAZ', '7MAZ', '7MB0', '7MB0', '7MB1', '7MB1', '7MB2', '7MB2', '7MB3', '7MB3', '7MB3', '7MB3', '7MB3', '7MB3', '7MBG', '7MBG', '7MBI', '7MBI', '7MBI', '7MBI', '7MHF', '7MHG', '7MHH', '7MHI', '7MHJ', '7MHK', '7MHL', '7MHM', '7MHN', '7MHO', '7MHP', '7MHQ', '7MLG', '7MNG', '7MRR', '7N44', '7N8C', '7NBR', '7NBS', '7NBT', '7NBY', '7NBY', '7NEO', '7NEO', '7NEV', '7NF5', '7NG3', '7NG3', '7NG6', '7NG6', '7NIJ', '7NT1', '7NT1', '7NT2', '7NT2', '7NT3', '7NT3', '7NTQ', '7NTT', '7NTT', '7NTV', '7NTV', '7NTW', '7NUK', '7NUK', '7NW2', '7NW2', '7NWX', '7NXH', '7O46', '7P2G', '7P35', '7P35', '7PFL', '7PFM', '7PFM', '7PHZ', '7PHZ', '7PXZ', '7PZQ', '7PZQ', '7Q5E', '7Q5E', '7Q5F', '7Q5F', '7QBB', '7QKA', '7QT8', '7QT8', '7R7H', '7R7H', '7RBZ', '7RC0', '7RFR', '7RFR', '7RFS', '7RFU', '7RLS', '7RM2', '7RMB', '7RME', '7RMT', '7RMZ', '7RN0', '7RN1', '7RN4', '7RNH', '7RNK', '7RNW', '7RNW', '7RNW', '7RNW', '7S3K', '7S3S', '7S4B', '7SET', '7SF1', '7SF3', '7SFB', '7SFH', '7SFI', '7SGH', '7SGH', '7SGH', '7SGH', '7SI9', '7T2T', '7TDU', '7TE0', '7TEH', '7TEK', '7TEL', '7TFR', '7TIA', '7TIU', '7TIV', '7TIW', '7TIX', '7TIY', '7TIZ', '7TJ0', '7TUU', '7U92', '7UKK', '7UR9', '7UR9', '7V1T', '7V7M', '7VFA', '7VFB', '7VH8', '7VIC', '7VJW', '7VJW', '7VJX', '7VJX', '7VJY', '7VJZ', '7VK0', '7VK0', '7VK1', '7VK2', '7VK3', '7VK3', '7VK4', '7VK4', '7VK5', '7VK5', '7VK6', '7VK6', '7VK7', '7VK7', '7VK8', '7VVT', '7VVT', '7WO1', '7WO2', '7WO3', '7WOF', '7WYM', '7WYP', '7WYP', '7X6J', '7X6K', '7XAR', '7XAR', '7Z2K', '7Z2K', '7ZQV', '7ZQV', '7ZV5', '7ZV7', '7ZV8', '7ZV8', '8A4Q', '8ACD', '8ACL', '8AEB', '8AIQ', '8AIQ', '8AIV', '8AIV', '8AIZ', '8B56', '8B56', '8BFQ', '8BFQ', '8BGA', '8BGA', '8BGD', '8CDC', '8CYU', '8CYU', '8CYU', '8CYU', '8CYZ', '8CYZ', '8CYZ', '8CYZ', '8CZ4', '8CZ4', '8CZ4', '8CZ4', '8CZ7', '8CZ7', '8CZ7', '8CZ7', '8D4P', '8DIB', '8DIB', '8DIC', '8DIC', '8DID', '8DID', '8DIE', '8DIE', '8DIF', '8DIF', '8DIG', '8DIG', '8DIH', '8DII', '8DL9', '8DLB', '8DMD', '8DSU', '8DSU', '8DZ0', '8DZ0', '8DZ2', '8DZ2', '8DZB', '8DZC', '8EZV', '8EZZ', '8F02', '8F2C', '8F2D', '8FIV', '8FIW', '8FIW', '8FTC', '8FTL', '8FTL', '8FY6', '8FY7', '8FY7', '8GTV', '8GTV', '8GTW', '8GTW', '8GVD', '8GVY', '8GXG', '8GXH', '8GXI', '8GZB', '8HBK', '8HHT', '8HI9', '8HI9', '8HTV', '8I30', '8IFP', '8IFQ', '8IFR', '8IFS', '8IFS', '8IFT', '8IGN', '8IGN', '8IGO', '8IGX', '8IGY', '8JCJ', '8JCK', '8JCL', '8JCM', '8JCN', '8JCO', '8JOP', '8OKB', '8OKC', '8OKK', '8OKK', '8OKL', '8OKL', '8OKM', '8OKM', '8OKN', '8OKN', '8P54', '8P54', '8P55', '8P55', '8P56', '8P56', '8P57', '8P57', '8P58', '8P58', '8P5A', '8P5A', '8P5B', '8P5B', '8P5C', '8P5C', '8P86', '8P86', '8P87', '8P87', '8PH4', '8PH4', '8Q71', '8Q71', '8Q71', '8Q71', '8QDC', '8QDC', '8R11', '8R11', '8R12', '8R12', '8R14', '8R14', '8R16', '8R16', '8RI4', '8RJV', '8RJY', '8RJZ', '8RJZ', '8S9Z', '8SK4', '8SKH', '8STY', '8STZ', '8SXR', '8SXR', '8T7Y', '8T7Y', '8TBE', '8TBE', '8TPD', '8TQH', '8TQJ', '8TQL', '8TQT', '8TQU', '8TY3', '8TY4', '8TY5', '8U40', '8U40', '8U9H', '8U9K', '8U9M', '8U9N', '8U9T', '8U9U', '8U9V', '8U9W', '8UAB', '8UDJ', '8UDO', '8UDP', '8UDQ', '8UDW', '8UE0', '8UEF', '8UEG', '8UEH', '8UH5', '8UHO', '8UHO', '8UIA', '8UIA', '8UIF', '8UIF', '8UND', '8UPS', '8UPV', '8UPW', '8UR9', '8UR9', '8UTE', '8V4U', '8V4U', '8VD7', '8VDJ', '8VDJ', '8VSG', '8VSG', '8W1T', '8W1T', '8W1U', '8W1U', '8WS3', '8WS3', '8WZ0', '8WZ0', '8Y42', '8Y42', '8Y42', '8Y42', '8Y44', '8Y44', '8YLS', '8YSA', '8Z46', '8ZBP', '9ARQ', '9BBP', '9BBQ', '9BBR', '9BBS', '9BBT', '9BBU', '9BBU', '9BBV', '9BBW', '9BBX', '9BBY', '9BBZ', '9BBZ', '9BC0', '9BC1', '9BC1', '9BPF', '9BPF', '9BQF', '9BQG', '9BQL', '9BQM', '9BQN', '9BQO', '9BQP', '9BQQ', '9BQT', '9BQY', '9BQZ', '9BR0', '9BR1', '9BS7', '9BS8', '9BSA', '9BSE', '9BSF', '9BSG', '9BSI', '9BSO', '9BSP', '9BSQ', '9BSR', '9BST', '9BTE', '9BTF', '9BTK', '9BTR', '9BTT', '9BVW', '9BVX', '9BVZ', '9C80', '9C80', '9C8Q', '9CEC', '9CED', '9CEK', '9CEK', '9CF9', '9CF9', '9CFB', '9CFB', '9DDF', '9DDG', '9E7S', '9EO6', '9EO6', '9EO6', '9EO6', '9EO6', '9EO6', '9EOR', '9EOR', '9EOR', '9EOR', '9EOR', '9EOR', '9EOX', '9EOX', '9EOX', '9EOX', '9EOX', '9EOX', '9F2V', '9F2V', '9F2X', '9F2X', '9FHQ', '9FHQ', '9FQ9', '9FQA', '9FX6', '9FX6', '9FX7', '9FX7', '9G0H', '9G0H', '9G0I', '9G0I', '9GF7', '9GIJ', '9GIL', '9GIL', '9GLV', '9GLV', '9GMQ', '9GMQ', '9GV2', '9GV2', '9H4B', '9HAJ', '9HAJ', '9HAK', '9HAK', '9HBQ', '9HC1', '9HC1', '9HD8', '9HDC', '9HDJ', '9HDJ', '9HDN', '9HDN', '9HDN', '9HDN', '9HFX', '9HFY', '9HFY', '9HJH', '9HJH', '9IK2', '9IR9', '9IZB', '9J8T', '9J8U', '9N6R', '9NSK', '9NSL', '9NU6', '9NU6', '9NWA', '9NWB', '9NWC', '9NWC', '9OIX', '9PJG', '9PJG', '9RHS', '9RHS', '9RHT', '9RHT', '9RHX', '9RHX', '9RI0', '9RI0', '9RI1', '9RI1', '9RI3', '9RI3', '9RI4', '9RI4', '9RI5', '9RI5', '9RI8', '9RI8', '9RID', '9RID', '9RIX', '9RIX', '9RIY', '9RIY', '9RIZ', '9RIZ', '9RJ0', '9RJ0', '9RJ3', '9RJ3', '9RJ5', '9RJ7', '9RJ7', '9RJ8', '9RJ8', '9RJF', '9RJF', '9RJR', '9RJR', '9UOQ', '9ZNL', '9ZO3'] | identities=304 | bits=636.336 6YB7_A | ['7AEG', '7AEH', '7E6K', '7QT5', '7QT6', '7QT7', '7QT9', '7Z59', '8A4T', '8A4T', '8AIU', '8AIU', '8B2T', '8BFO', '8TPB', '8TPB', '8TPC', '8TPE', '8TPE', '8TPF', '8TPG', '8TPH', '8TPI', '8WTS', '9CJU', '9CJU', '9F39', '9F3A', '9F3A', '9H0F', '9H0F', '9KSI', '9PKR', '9PKR', '9WHE'] | identities=304 | bits=636.336 9NNG_A | ['(no accession)'] | identities=303 | bits=635.95 8VQX_A | ['(no accession)'] | identities=304 | bits=635.95 - 8I4S: the complex structure of SARS-CoV-2 Mpro with D8 | method=X-RAY DIFFRACTION | resolution=[2.2] - 6XA4: Crystal structure of the SARS-CoV-2 (COVID-19) main protease in complex with UAW241 | method=X-RAY DIFFRACTION | resolution=[1.65] - 9LVR: Crystal structure of SARS-CoV-2 3CL protease in complex with compound 1 | method=X-RAY DIFFRACTION | resolution=[2.2] - 8ZQ8: SARS-Cov-2 3CL protease in complex with macrocyclic inhibitor CG-1039 | method=X-RAY DIFFRACTION | resolution=[1.77] - 7W9G: Complex structure of Mpro with ebselen-derivative inhibitor | method=X-RAY DIFFRACTION | resolution=[2.5] - 7VU6: The crystal structure of SARS-CoV-2 3CL protease in complex with compound 3 | method=X-RAY DIFFRACTION | resolution=[1.8] - 7CWC: Ambient-Temperature Serial Femtosecond X-ray Crystal structure of SARS-CoV-2 Main Protease at 2.1 A Resolution (P212121) | method=X-RAY DIFFRACTION | resolution=[2.1] - 7KFI: SARS-CoV-2 Main protease immature form - apo structure | method=X-RAY DIFFRACTION | resolution=[1.6] - 7VTH: The crystal structure of SARS-CoV-2 3CL protease in complex with compound 1 | method=X-RAY DIFFRACTION | resolution=[2.0] - 9ASV: Crystal structure of SARS-CoV-2 3CL protease in complex with a benzyl 2-pyrrolidone inhibitor | method=X-RAY DIFFRACTION | resolution=[1.8] - 9DTZ: SARS-CoV-2 Mpro in complex with compound 5 | method=X-RAY DIFFRACTION | resolution=[2.2] - 6M0K: The crystal structure of COVID-19 main protease in complex with an inhibitor 11b | method=X-RAY DIFFRACTION | resolution=[1.504] - 9KGJ: Discovery of an orally bioavailable reversible covalent SARS-CoV-2 Mpro inhibitor with pan-coronavirus activity | method=X-RAY DIFFRACTION | resolution=[1.37] - 7CB7: 1.7A resolution structure of SARS-CoV-2 main protease (Mpro) in complex with broad-spectrum coronavirus protease inhibitor GC376 | method=X-RAY DIFFRACTION | resolution=[1.69] - 6XMK: 1.70 A resolution structure of SARS-CoV-2 3CL protease in complex with inhibitor 7j | method=X-RAY DIFFRACTION | resolution=[1.7] - 7BRO: Crystal structure of the 2019-nCoV main protease | method=X-RAY DIFFRACTION | resolution=[2.0] - 5R7Y: PanDDA analysis group deposition -- Crystal Structure of COVID-19 main protease in complex with Z45617795 | method=X-RAY DIFFRACTION | resolution=[1.65] - 6YB7: SARS-CoV-2 main protease with unliganded active site (2019-nCoV, coronavirus disease 2019, COVID-19). | method=X-RAY DIFFRACTION | resolution=[1.25] - 9NNG: X-ray structure of SARS-CoV-2 main protease V186I covalently bound to inhibitor GRL-051-22 at 1.90 A | method=X-RAY DIFFRACTION | resolution=[1.9] - 8VQX: Structure of SARS-CoV-2 main protease with potent peptide aldehyde inhibitor | method=X-RAY DIFFRACTION | resolution=[1.35]
Explore BindingDB¶
BindingDB collects binding affinity data for small molecules versus protein targets; the cell below fetches the records for the top UniProt and PDB accessions.
import requests
from requests.exceptions import ReadTimeout, RequestException
API_BASE = "https://www.bindingdb.org/rwd/bind/BindingDBRESTfulAPI.jsp"
uniprot_hits = globals().get("UNIPROT_HITS") or []
pdb_hits = [hit[0][:4].upper() for hit in (globals().get("PDB_HITS") or []) if hit]
pdb_hits = list(dict.fromkeys(pdb_hits))[:PDB_LIMIT]
def flatten_bindingdb_response(data):
if isinstance(data, list):
return data
if isinstance(data, dict):
for key in ("records", "ligands", "data", "entries", "hits", "bindEntries", "bindings"):
value = data.get(key)
if isinstance(value, list) and value:
return value
nested = []
for value in data.values():
if isinstance(value, list):
nested.extend(value)
if nested:
return nested
return [data]
return []
def fetch_bindingdb(params):
try:
resp = requests.get(API_BASE, params=params, headers={"Accept": "application/json"}, timeout=15)
resp.raise_for_status()
data = resp.json()
except ReadTimeout:
print("BindingDB call timed out (15s)", params)
return []
except RequestException as exc:
print("BindingDB call failed:", exc)
return []
except ValueError:
text = resp.text.strip() if resp is not None else ""
print("BindingDB returned non-JSON; snippet:", text[:400])
if text:
return [line.strip() for line in text.splitlines() if line.strip()]
return []
return flatten_bindingdb_response(data)
bindingdb_ligands = []
uniprot_records = {}
for accession, *_ in uniprot_hits[:UNIPROT_LIMIT]:
if not accession:
continue
params = {"target": "uniprot", "targetid": accession, "format": "json"}
print("Fetching ligands for UniProt", accession)
ligands = fetch_bindingdb(params)
uniprot_records[accession] = ligands
bindingdb_ligands.extend([{"source": "uniprot", "accession": accession, "record": ligand} for ligand in ligands])
print(f" {len(ligands)} ligand records")
similar_proteins = {}
for code in pdb_hits:
params = {"pdb": code, "format": "json"}
print("Fetching BindingDB records for PDB", code)
entries = fetch_bindingdb(params)
if not entries:
continue
similar_proteins[code] = entries
bindingdb_ligands.extend([{"source": "pdb", "accession": code, "record": entry} for entry in entries])
print(f" {len(entries)} records for PDB {code}")
globals()["BINDINGDB_LIGANDS"] = bindingdb_ligands
globals()["BINDINGDB_SIMILAR"] = similar_proteins
globals()["BINDINGDB_UNIPROT_RECORDS"] = uniprot_records
print("Stored", len(bindingdb_ligands), "records in BINDINGDB_LIGANDS.")
Fetching ligands for UniProt P0DTC1 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0DTD1 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6F5 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6U8 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6V9 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6T7 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6X7 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6F8 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6W2 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6W6 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6T6 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6W5 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt K9N638 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt K9N7C7 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6T5 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6W4 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6U9 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6X8 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6T4 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching ligands for UniProt P0C6F7 BindingDB call failed: Expecting value: line 18 column 1 (char 19) 0 ligand records Fetching BindingDB records for PDB 8I4S BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 6XA4 BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 9LVR BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 8ZQ8 BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 7W9G BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 7VU6 BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 7CWC BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 7KFI BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 7VTH BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 9ASV BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 9DTZ BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 6M0K BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 9KGJ BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 7CB7 BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 6XMK BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 7BRO BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 5R7Y BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 6YB7 BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 9NNG BindingDB call failed: Expecting value: line 18 column 1 (char 19) Fetching BindingDB records for PDB 8VQX BindingDB call failed: Expecting value: line 18 column 1 (char 19) Stored 0 records in BINDINGDB_LIGANDS.
BindingDB ligand dataframe¶
Convert the stored ligand records into a pandas.DataFrame for downstream analysis, extracting ligand names and PubChem IDs.
import pandas as pd
records = globals().get("BINDINGDB_LIGANDS") or []
if not records:
print("No BindingDB ligand records yet; rerun the BindingDB cell after the ligand fetch completes.")
else:
parsed = []
for item in records:
raw = item.get("record")
if isinstance(raw, dict):
entry = dict(raw)
elif isinstance(raw, str):
entry = {
"raw": raw,
"source_string": raw,
}
else:
continue
entry["source_type"] = item.get("source")
entry["source_query"] = item.get("accession")
parsed.append(entry)
if not parsed:
print("No structured entry data available; raw records were returned.")
else:
df = pd.DataFrame(parsed)
def tseries(col):
if col in df.columns:
return df[col]
return pd.Series([None] * len(df), index=df.index)
df = df.assign(
name=tseries("name").fillna(tseries("ligandName")).fillna(tseries("compoundName")).fillna(tseries("LIGAND_NAME")),
pubchem_cid=tseries("pubchem_cid").fillna(tseries("pubChemCompoundID")).fillna(tseries("pubchemCID")).fillna(tseries("cid"))
)
subset = df[[col for col in ("name", "pubchem_cid", "source_type", "source_query") if col in df.columns]]
print("Unique ligands discovered in BindingDB:")
print(subset.drop_duplicates().to_dict("records"))
globals()["BINDINGDB_LIGANDS_DF"] = subset
No BindingDB ligand records yet; rerun the BindingDB cell after the ligand fetch completes.
Ligand clustering¶
Use RDKit fingerprints to group the BindingDB ligands we collected before querying ChEMBL, highlighting the main clusters and their PubChem IDs.
from rdkit import Chem
from rdkit.Chem import AllChem, DataStructs
from sklearn.cluster import AgglomerativeClustering
import numpy as np
ligand_df = globals().get("BINDINGDB_LIGANDS_DF")
if ligand_df is None or ligand_df.empty:
print("No BindingDB ligand dataframe yet; run the ligand-summary cell first.")
else:
smiles_col = next((col for col in ligand_df.columns if "smiles" in col.lower()), None)
if smiles_col is None:
print("No SMILES column found in ligand dataframe (looked for columns containing 'smiles').")
else:
rows = []
fps = []
for idx, row in ligand_df.iterrows():
smi = row.get(smiles_col)
if not isinstance(smi, str) or not smi.strip():
continue
mol = Chem.MolFromSmiles(smi)
if mol is None:
continue
fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=1024)
name = row.get("name") or row.get("ligandName") or row.get("compoundName") or f"ligand_{idx}"
pubchem = row.get("pubchem_cid") or row.get("PubChemCompoundID") or row.get("pubChemCompoundID") or row.get("cid")
rows.append((idx, name, smi, pubchem))
fps.append(fp)
n = len(fps)
if n == 0:
print("No valid ligands with SMILES could be parsed.")
elif n == 1:
print("Only one ligand available; nothing to cluster.")
else:
dist = np.zeros((n, n))
for i in range(n):
for j in range(i + 1, n):
sim = DataStructs.TanimotoSimilarity(fps[i], fps[j])
dist[i, j] = dist[j, i] = 1.0 - sim
n_clusters = min(4, n)
clustering = AgglomerativeClustering(n_clusters=n_clusters, affinity="precomputed", linkage="average")
labels = clustering.fit_predict(dist)
clusters = {i: [] for i in range(n_clusters)}
for label, info in zip(labels, rows):
clusters[label].append(info)
for label, items in clusters.items():
print(f"Cluster {label} ({len(items)} ligands):")
for item in items:
idx, name, smi, pubchem = item
print(f" - {name or '<unnamed>'} | SMILES={smi[:60]}{'...' if len(smi)>60 else ''} | PubChem={pubchem or 'N/A'}")
globals()["BINDINGDB_LIGAND_CLUSTERS"] = clusters
No BindingDB ligand dataframe yet; run the ligand-summary cell first.
Explore ChEMBL¶
ChEMBL provides curated bioactivity tables; this cell searches for targets with the UniProt accession.
import requests
accession = globals().get("TOP_UNIPROT_ACCESSION")
if not accession:
print("No UniProt accession available for ChEMBL search.")
else:
url = "https://www.ebi.ac.uk/chembl/api/data/target/search.json"
params = {"query": accession}
resp = requests.get(url, params=params, timeout=15)
resp.raise_for_status()
hits = resp.json().get("targets", [])
print(f"ChEMBL targets matching {accession}:")
for target in hits[:UNIPROT_LIMIT]:
print(" " + target.get("target_chembl_id", "<none>") + " / " + target.get("pref_name", "<unnamed>"))
--------------------------------------------------------------------------- HTTPError Traceback (most recent call last) Cell In[9], line 10 8 params = {"query": accession} 9 resp = requests.get(url, params=params, timeout=15) ---> 10 resp.raise_for_status() 11 hits = resp.json().get("targets", []) 12 print(f"ChEMBL targets matching {accession}:") File ~/miniconda3/envs/md-openmm/lib/python3.10/site-packages/requests/models.py:1026, in Response.raise_for_status(self) 1021 http_error_msg = ( 1022 f"{self.status_code} Server Error: {reason} for url: {self.url}" 1023 ) 1025 if http_error_msg: -> 1026 raise HTTPError(http_error_msg, response=self) HTTPError: 400 Client Error: Bad Request for url: https://www.ebi.ac.uk/chembl/api/data/target/search.json?query=P0DTC1
Explore PDBe-KB¶
PDBe-KB maps UniProt sequences to PDB entries; the snippet below prints that mapping for the top accession.
import requests
accession = globals().get("TOP_UNIPROT_ACCESSION")
if not accession:
print("No UniProt accession available for PDBe-KB search.")
else:
url = f"https://www.ebi.ac.uk/pdbe/api/mappings/uniprot/{accession}"
resp = requests.get(url, timeout=15)
resp.raise_for_status()
mapping = resp.json().get(accession, {})
print("PDBe-KB mappings for", accession)
print(mapping)
DrugBank and Drugs@FDA¶
These cells open the approved/investigational ligand search pages for the UniProt accession.
import requests
accession = globals().get("TOP_UNIPROT_ACCESSION")
if not accession:
print("No UniProt accession available for drug database searches.")
else:
urls = [
("DrugBank", f"https://go.drugbank.com/unearth/q?searcher=drugs&q={accession}"),
("Drugs@FDA", f"https://www.accessdata.fda.gov/scripts/cder/daf/index.cfm?event=basicSearch.process&ApplNo={accession}"),
]
for label, url in urls:
print("Querying", label, url)
resp = requests.get(url, timeout=15)
print("Status:", resp.status_code)
snippet = resp.text[:500] + ("..." if len(resp.text) > 500 else "")
print(snippet)