infra/codestorage/hg-repo-sync

Paddy 2015-08-17 Child:cc08c8ed2128

0:d8c86bc750ad Browse Files

First commit. This contains the basic implementation of hg-repo-sync, which is only useful when you want to do a constant two-way sync every few minutes to Google Cloud Storage. It also hardcodes more than it should. But it's running on code.secondbit.org without issue, so it should probably be preserved.

.hgignore Dockerfile hgrc run.sh

     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/.hgignore	Mon Aug 17 18:56:19 2015 -0400
     1.3 @@ -0,0 +1,1 @@
     1.4 +.DS_Store
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/Dockerfile	Mon Aug 17 18:56:19 2015 -0400
     2.3 @@ -0,0 +1,27 @@
     2.4 +FROM ubuntu:14.04
     2.5 +MAINTAINER Paddy <paddy@secondbit.org>
     2.6 +
     2.7 +RUN apt-get update && apt-get install -y Mercurial wget unzip
     2.8 +RUN adduser --disabled-password --gecos "" -uid 2000 hg
     2.9 +
    2.10 +RUN mkdir -p /scratch
    2.11 +RUN mkdir -p /current
    2.12 +
    2.13 +VOLUME /current
    2.14 +VOLUME /scratch
    2.15 +
    2.16 +ADD run.sh /etc/run.sh
    2.17 +RUN chmod +x /etc/run.sh
    2.18 +
    2.19 +ADD hgrc /etc/mercurial/hgrc
    2.20 +RUN wget https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && unzip google-cloud-sdk.zip && rm google-cloud-sdk.zip
    2.21 +
    2.22 +ENV CLOUDSDK_PYTHON_SITEPACKAGES 1
    2.23 +RUN google-cloud-sdk/install.sh --usage-reporting=false --path-update=true --bash-completion=false --rc-path=/.bashrc --disable-installation-options
    2.24 +RUN google-cloud-sdk/bin/gcloud --quiet components update pkg-go pkg-python pkg-java preview alpha beta app
    2.25 +RUN google-cloud-sdk/bin/gcloud --quiet config set component_manager/disable_update_check true
    2.26 +RUN mkdir /.ssh
    2.27 +ENV PATH /google-cloud-sdk/bin:$PATH
    2.28 +ENV HOME /
    2.29 +
    2.30 +ENTRYPOINT ["/etc/run.sh"]
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/hgrc	Mon Aug 17 18:56:19 2015 -0400
     3.3 @@ -0,0 +1,2 @@
     3.4 +[trusted]
     3.5 +groups = committers
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/run.sh	Mon Aug 17 18:56:19 2015 -0400
     4.3 @@ -0,0 +1,75 @@
     4.4 +#!/bin/bash
     4.5 +today=$(date +%F)
     4.6 +
     4.7 +# added trying to be clever
     4.8 +yesterday=$(date --date yesterday +%F)
     4.9 +
    4.10 +echo "Fixing permissions..."
    4.11 +chgrp -R 2000 /current
    4.12 +echo "Cleaning up..."
    4.13 +rm -rf /scratch/*
    4.14 +echo "Downloading bundles from Google Cloud Storage..."
    4.15 +output=$(gsutil cp -R gs://backups.code.secondbit.org/$today/\* /scratch 2>&1)
    4.16 +echo $output
    4.17 +
    4.18 +bundles=$(find /scratch -name '*.bundle')
    4.19 +
    4.20 +# added trying to be clever
    4.21 +# basically, if we get an error downloading the bundles, try for yesterday's
    4.22 +# this could happen if the pod restarts between the last upload of day A and the first upload of day B
    4.23 +if [ $output == *"No URLs matched:"* ]
    4.24 +	echo "Downloading yesterday's bundles from Google Cloud Storage..."
    4.25 +	gsutil cp -R gs://backups.code.secondbit.org/$yesterday/\* /scratch
    4.26 +	mv /scratch/$yesterday /scratch/$today
    4.27 +	bundles=$(find /scratch -name '*.bundle' | wc -l)
    4.28 +fi
    4.29 +
    4.30 +for bundle in $bundles
    4.31 +do
    4.32 +	dir=$(dirname $bundle)
    4.33 +	echo "dir=$dir"
    4.34 +	stripped=${dir#.}
    4.35 +	echo "stripped=$stripped"
    4.36 +	stripped=${stripped#/scratch}
    4.37 +	echo "stripped=$stripped"
    4.38 +	target=${bundle#/scratch}
    4.39 +	echo "target=$target"
    4.40 +	target=${target%.bundle}
    4.41 +	echo "target=$target"
    4.42 +	if [ -d "/current${target}" ]
    4.43 +	then
    4.44 +		echo "Pulling changes from $bundle to /current$target"
    4.45 +		hg --cwd /current${target} pull $bundle
    4.46 +	else
    4.47 +		echo "Creating /current$target repo from $bundle"
    4.48 +		hg clone $bundle /current${target}
    4.49 +		chgrp -R 2000 /current${target}
    4.50 +	fi
    4.51 +done
    4.52 +echo "Cleaning up..."
    4.53 +rm -rf /scratch/*
    4.54 +
    4.55 +repos=$(find /current -name .hg -type d)
    4.56 +for repo in $repos
    4.57 +do
    4.58 +	dir=$(dirname $repo)
    4.59 +	echo "dir=$dir"
    4.60 +	stripped=${dir#.}
    4.61 +	echo "stripped=$stripped"
    4.62 +	stripped=${stripped#/current}
    4.63 +	echo "stripped=$stripped"
    4.64 +	target=/scratch/$today$stripped.bundle
    4.65 +	echo "target=$target"
    4.66 +	mkdir -p $(dirname $target)
    4.67 +	echo "Bundling $dir to $target"
    4.68 +	hg --cwd $dir bundle --all $target
    4.69 +done
    4.70 +echo "Pushing bundles to Google Cloud Storage..."
    4.71 +gsutil cp -R /scratch/* gs://backups.code.secondbit.org/
    4.72 +echo "Cleaning up..."
    4.73 +rm -rf /scratch/*
    4.74 +# sleep between 5 and 10 minutes
    4.75 +# randomized to prevent all our servers running this at the same time
    4.76 +sleepfor="$[($RANDOM % 5) + 5]m"
    4.77 +echo "Sleeping for $sleepfor..."
    4.78 +sleep $sleepfor