openpilot is an open source driver assistance system. openpilot performs the functions of Automated Lane Centering and Adaptive Cruise Control for over 200 supported car makes and models.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

57 lines
3.5 KiB

#!/usr/bin/env bash
# adapted from https://github.com/mlcommons/training/blob/4bdf5c8ed218ad76565a2ba1ac27c919ccc6d689/stable_diffusion/README.md
# setup dirs
DATA=/raid/datasets/stable_diffusion
LAION=$DATA/laion-400m/webdataset-moments-filtered
COCO=$DATA/coco2014
mkdir -p $LAION $COCO
CKPT=/raid/weights/stable_diffusion
mkdir -p $CKPT/clip $CKPT/sd $CKPT/inception
# download data
# if rclone isn't installed system-wide / in your PATH, put the executable path in quotes below
#RCLONE=""
RCLONE="rclone"
## VAE-encoded image latents, from 6.1M image subset of laion-400m
## about 1 TB for whole download
$RCLONE config create mlc-training s3 provider=Cloudflare access_key_id=76ea42eadb867e854061a1806220ee1e secret_access_key=a53625c4d45e3ca8ac0df8a353ea3a41ffc3292aa25259addd8b7dc5a6ce2936 endpoint=c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com
$RCLONE copy mlc-training:mlcommons-training-wg-public/stable_diffusion/datasets/laion-400m/moments-webdataset-filtered/ ${LAION} --include="*.tar" -P
$RCLONE copy mlc-training:mlcommons-training-wg-public/stable_diffusion/datasets/laion-400m/moments-webdataset-filtered/sha512sums.txt ${LAION} -P
cd $LAION && grep -E '\.tar$' sha512sums.txt | sha512sum -c --quiet - && \
echo "All .tar files verified" || { echo "Checksum failure when validating downloaded Laion moments"; exit 1; }
## prompts and FID statistics from 30k image subset of coco2014
## 33 MB
$RCLONE config create mlc-training s3 provider=Cloudflare access_key_id=76ea42eadb867e854061a1806220ee1e secret_access_key=a53625c4d45e3ca8ac0df8a353ea3a41ffc3292aa25259addd8b7dc5a6ce2936 endpoint=c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com
$RCLONE copy mlc-training:mlcommons-training-wg-public/stable_diffusion/datasets/coco2014/val2014_30k.tsv ${COCO} -P
$RCLONE config create mlc-training s3 provider=Cloudflare access_key_id=76ea42eadb867e854061a1806220ee1e secret_access_key=a53625c4d45e3ca8ac0df8a353ea3a41ffc3292aa25259addd8b7dc5a6ce2936 endpoint=c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com
$RCLONE copy mlc-training:mlcommons-training-wg-public/stable_diffusion/datasets/coco2014/val2014_30k_stats.npz ${COCO} -P
# download checkpoints
## clip (needed for text and vision encoders for validation)
CLIP_WEIGHTS_URL="https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin"
CLIP_WEIGHTS_SHA256="9a78ef8e8c73fd0df621682e7a8e8eb36c6916cb3c16b291a082ecd52ab79cc4"
CLIP_CONFIG_URL="https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/open_clip_config.json"
wget -N -P ${CKPT}/clip ${CLIP_WEIGHTS_URL}
wget -N -P ${CKPT}/clip ${CLIP_CONFIG_URL}
echo "${CLIP_WEIGHTS_SHA256} ${CKPT}/clip/open_clip_pytorch_model.bin" | sha256sum -c
## sd (needed for latent->image decoder for validation, also has clip text encoder for training)
SD_WEIGHTS_URL='https://huggingface.co/stabilityai/stable-diffusion-2-base/resolve/main/512-base-ema.ckpt'
SD_WEIGHTS_SHA256="d635794c1fedfdfa261e065370bea59c651fc9bfa65dc6d67ad29e11869a1824"
wget -N -P ${CKPT}/sd ${SD_WEIGHTS_URL}
echo "${SD_WEIGHTS_SHA256} ${CKPT}/sd/512-base-ema.ckpt" | sha256sum -c
## inception (needed for validation)
FID_WEIGHTS_URL='https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth'
FID_WEIGHTS_SHA1="bd836944fd6db519dfd8d924aa457f5b3c8357ff"
wget -N -P ${CKPT}/inception ${FID_WEIGHTS_URL}
echo "${FID_WEIGHTS_SHA1} ${CKPT}/inception/pt_inception-2015-12-05-6726825d.pth" | sha1sum -c