RMSF per residue for protein2¶
Uses MDAnalysis to compute the metric for protein2. The workflow mirrors the analysis scripts at https://github.com/JordiVillaFreixa/Pau_TFG_DAO/tree/main/analysis.
Table of contents¶
Step 1¶
In [ ]:
from pathlib import Path
import os
import matplotlib.pyplot as plt
import MDAnalysis as mda
from MDAnalysis.analysis import align
import numpy as np
import pandas as pd
COURSE_DIR = Path(os.environ.get('COURSE_DIR','~/Concepcion26')).expanduser()
topo = COURSE_DIR / 'data' / 'complex' / 'protein2.pdb'
traj = COURSE_DIR / 'results' / '03-simulaciones-clasicas' / 'complex' / 'output_traj.dcd'
if not topo.exists() or not traj.exists():
raise FileNotFoundError(f'Run Episode 3 scripts so {traj!s} exists')
u = mda.Universe(topo, traj)
average = align.AverageStructure(u, u, select='protein and name CA', ref_frame=0).run().results.universe
align.AlignTraj(u, average, select='protein and name CA', in_memory=True).run()
select = u.select_atoms('protein and name CA')
coords = []
for ts in u.trajectory[100:]:
coords.append(select.positions.copy())
coords = np.array(coords)
avg = coords.mean(axis=0)
rmsf = np.sqrt(((coords - avg)**2).sum(axis=2).mean(axis=0))
residues = [res.resid for res in select.residues]
df = pd.DataFrame({'Residue ID': residues, 'RMSF (Å)': rmsf})
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(df['Residue ID'], df['RMSF (Å)'])
ax.set_xlabel('Residue')
ax.set_ylabel('RMSF (Å)')
ax.set_title('Per-residue RMSF (Cα)')
ax.grid(True)
fig.tight_layout()
fig.savefig('rmsf_ca.png')
print(df.head())