Quickstart
End-to-end SAE pipeline on RFD3 in five commands.
Install
git clone https://github.com/RoseTTACommons/foundry && cd foundry
uv sync --extra sae
source .venv/bin/activatePrepare data
# Download SafeProtein hazards + UniProt benigns
./data_pipelines/download_safeprotein.sh
./data_pipelines/download_uniprot_pdb_benigns.sh 500 100 300
# Merge and filter to ≤300 residues
uv run python -m data_pipelines.filter_pdbs \
--sources tutorials/sae_data_rfd3_partial/sources.csv \
--out tutorials/sae_data_rfd3_partial/sources_filtered.csv
uv run python -m data_pipelines.build_inputs \
--sources tutorials/sae_data_rfd3_partial/sources_filtered.csv \
--out tutorials/sae_data_rfd3_partial/train_inputs.json \
--model rfd3 --partial-t 5.0Collect activations
saffron collect model=rfd3 \
hooks=rfd3_partial \
inputs=tutorials/sae_data_rfd3_partial/train_inputs.json \
out_dir=outputs/collect/trainTrain SAE
saffron train \
activations_path=outputs/collect/train/activations/activations.h5 \
hook_name=block12 \
steps=20000Evaluate
saffron eval \
checkpoint_path=outputs/train/matryoshka_batch_top_k/block12/<timestamp>/final.pt \
activations_path=outputs/collect/train/activations/activations.h5 \
hook_name=block12For RF3 (sequence-only), see the RF3 pipeline. No PDBs required.
Last updated on