Commit 14b65a6e authored by Remi  PLANEL's avatar Remi PLANEL
Browse files

Add project build script

parent fd41416e
#!/usr/bin/perl
# Run an InterMine production build and dump the database occasionally.
# The project.xml in the current directory controls which actions to
# run/merge/postprocess.
# The database to dump will be found by reading the properties from $HOME/.intermine/<mine_name>.properties
# where <mine_name> is the name of the directory where the script is run
use strict;
use warnings;
use Getopt::Std;
use Cwd;
use XML::Parser::PerlSAX;
use Text::Glob qw(glob_to_regex);
my $saved_dir = getcwd;
my @ant_command = ("$saved_dir/gradlew --stacktrace --no-daemon");
my $dump_file_prefix;
my $dump_host;
my $project_file = 'project.xml';
my $pgpass_file = "$ENV{HOME}/.pgpass";
my $default_port = '5432';
my $final_dump_name = 'final-dump';
my $final_marker = 'final';
sub read_properties {
my $file = shift;
open(my $in, '<', "$file") or die "cannot open $file: $!\n";
my %properties;
while (<$in>) {
next if /^\s*#/;
chomp;
my ($k, $v) = split(/\s*=\s*/, $_, 2);
next unless $k;
$properties{$k} = $v;
}
close($in) or die "Problem reading from $file: $!\n";
return %properties;
}
sub usage
{
die <<"HELP";
usage:
$0 [-v] [-b] [-l | -r] [-T] [-u | -U] [-V <version>] [-D dest_database_name] [-a <actions>] dump_host dump_prefix
flags:
-v is passed to ant
-l attempt to restart by reading the last dump file
-r attempt to restart just after the last dump point _without_ loading a dump
-b run build-db before starting build and drop any existing backup databases
-n parse files and report actions, but don't execute anything
-V set the release number to pass to ant (as -Drelease=release_number)
-T instead of making backup copies of the database in the
server using the "CREATE DATABASE foo WITH TEMPLATE bar" command,
dump and reload to restart.
-E Set the default database encoding (defaults to SQL_ASCII if not passed)
-D set the destination database for the completed build; the database will be
copied to this name in the same postgres server that the build used
-a set the list of actions to perform - the list must be a subset of the
sources/postprocesses in the project.xml file.
- The -l and -r operate as usual.
- To run all steps starting at <some_action> use a dash after the action
name: -a <some_action>-
- To perform only the final dump use: -a $final_dump_name
- The action names can be patterns: -a 'flymine-static,*dmel*'
- To refer to dump step and skip its corresponding action use the action
name with "-dump" appended
eg -a fly-fish-dump- (starts with the fly-fish dump then continues)
-a fly-fish-dump,flymine-static,create-references,$final_dump_name
-u Build userprofile database if not already populated
-U Build userprofile database even if already populated
dump_host is the host to run pg_dump and pg_restore on
With the -t flag, one dump is made the end of the build with the name
dump_prefix.final
Without the -t flag, a dump is made after integrating each source in the
project.xml that has dump="true" set. A final dump is also made.
example:
$0 prod1 /tmp/production_dump
HELP
}
if (!-f $project_file) {
warn "can't find $project_file in the current directory\n";
usage();
}
my $verbose = 0;
my $load_last = 0;
my $restart = 0;
# if true, don't execute anything just print what will happen
my $dry_run = 0;
# if true don't dump and reload, instead do backup copies in the server using
# the "CREATE DATABASE foo WITH TEMPLATE bar" command
my $server_backup = 1;
my $db_encoding = 'SQL_ASCII';
# if true, run build-db before starting the integration
my $run_build_db = 0;
my $dest_db;
my $release;
my @required_actions_list = ();
my $start_action = undef;
# Process command-line opts
my %opts = ();
if (!getopts('vrlbTnuUE:D:a:V:', \%opts)) {
usage();
}
if ($opts{v}) {
$verbose = 1;
push @ant_command, '--stacktrace';
}
if ($opts{l}) {
$load_last = 1;
}
if ($opts{r}) {
$restart = 1;
}
if ($opts{n}) {
$dry_run = 1;
}
if ($opts{T}) {
$server_backup = 0;
}
if (defined $opts{E}) {
$db_encoding = $opts{E};
}
if ($opts{b}) {
$run_build_db = 1;
}
if ($opts{D}) {
$dest_db = $opts{D};
}
my $build_userprofile_db;
if ($opts{u} or $opts{U}) {
# $build_userprofile_db = 1;
}
my $overwrite_userprofile_ok;
if ($opts{U}) {
# $overwrite_userprofile_ok = 1;
}
if (exists $opts{a}) {
my $arg = $opts{a};
if ($arg =~ /^.*,.*-$/) {
warn "error: can't use commas and '-' in -a <actions> argument: $arg\n";
usage;
}
if ($arg =~ /(.*)-$/) {
$start_action = $1;
} else {
@required_actions_list = split (',', $arg);
}
}
if ($opts{V}) {
$release = $opts{V};
push @ant_command, "-Dorg.gradle.project.release=$release"
}
if (@ARGV == 2) {
$dump_host = $ARGV[0];
$dump_file_prefix = $ARGV[1];
} else {
usage;
}
my $log_file = "pbuild.log";
my $mode = ($run_build_db) ? '>' : '>>';
open(LOG, $mode, $log_file) or die "can't open $log_file: $!\n";
my $old_handle = select(LOG);
$| = 1; # autoflush
select $old_handle;
my $current_directory = (getcwd() =~ m:.*/(.*):)[0];
my $properties_file = "$ENV{HOME}/.intermine/$current_directory.properties";
if (defined $release) {
$properties_file .= ".$release";
}
my @psql_command = 'psql';
my @dump_command = qw[pg_dump -F c -i];
my @load_command = qw[pg_restore -1 -i];
my @dropdb_command = qw[dropdb];
my @createdb_command = (qw[createdb -E ], $db_encoding);
sub log_message
{
my $message = shift;
my $verbose = shift;
if (defined $message) {
print LOG "$message\n";
if (defined $verbose && $verbose) {
print STDERR "$message\n";
}
} else {
print LOG "\n";
}
}
sub log_and_print
{
log_message shift, 1;
}
# run a command and exit the script if it returns a non-zero
sub run_and_check_status
{
my $command_name = $_[0];
log_and_print `date`, "\n\n";
log_and_print "starting command: @_\n";
my $result = 0;
if (!$dry_run) {
open F, "@_ |" or die "can't run @_: $?\n";
while (<F>) {
chomp;
log_message " [$command_name] $_";
}
close F;
$result = $?;
}
log_and_print `date`, "\n\n";
log_and_print "finished\n\n";
if ($result != 0) {
warn "ERROR: $result\n";
}
if (!$dry_run && $result != 0) {
log_and_print "failed with exit code $?: @_\n";
print STDERR "check log: $log_file\n";
exit 1;
}
}
sub spawn
{
my $pass = shift;
my @spawn_args = @_;
if (!defined $dump_host || $dump_host eq 'localhost') {
$ENV{PAGER} = "/bin/cat";
@spawn_args = "sh -c '@spawn_args'";
} else {
unshift @spawn_args, "ssh", $dump_host;
}
if ($dry_run) {
log_and_print "command to run: @spawn_args\n";
} else {
my $pid = open(my $PROCESS, '-|', @spawn_args)
or die "Could not execute @spawn_args\n";
while (<$PROCESS>) {
if (/error/i) {
die "Error returned by @spawn_args\n";
}
}
close $PROCESS;
}
}
sub run_build_db
{
log_and_print `date`, "\n";
log_and_print "\nbuilding db: @ant_command builddb\n";
my $saved_dir = getcwd;
#chdir "$saved_dir/dbmodel" or die "can't change directory into: $saved_dir/dbmodel\n";
run_and_check_status @ant_command, "clean";
run_and_check_status @ant_command, "builddb";
#chdir $saved_dir
# or die "can't return to previous directory ($saved_dir) after $saved_dir/dbmodel\n";
}
sub dropdb_backups
{
my ($db, $user, $pass, $host, $port, $database_names_ref, $actions_ref) = @_;
my @database_names = @$database_names_ref;
my @actions = @$actions_ref;
for my $action (@actions) {
my $action_type = $action->{type};
my @action_args = @{$action->{args}};
if ($action_type eq 'dump') {
my $action_name = $action_args[0];
my $backup_name = "$db:$action_name";
if (grep {$_ eq $backup_name} @database_names) {
my @params = ('-U', $user, '-h', $host, $backup_name);
log_and_print `date`, "\n";
log_and_print "\ndropping old backup database: @dropdb_command @params\n";
spawn($pass, @dropdb_command, @params);
}
}
}
}
sub create_prod_db
{
my ($db, $user, $pass, $host, $port, $suffix) = @_;
my @params = ('-U', $user, '-h', $host, $db);
log_and_print `date`, "\n";
log_and_print "\ncreating database: @createdb_command @params\n";
eval {
# ignore error - it's OK if the database doesn't exist
spawn($pass, @createdb_command, @params);
};
run_build_db();
}
sub copy_db
{
my ($db, $user, $pass, $host, $port, $to_db) = @_;
my @params = ('-U', $user, '-h', $host, '-T', $db, $to_db);
log_and_print `date`, "\n\n";
log_and_print "\nmaking db copy: @createdb_command @params\n";
# retry a few times because sometimes the createdb fails with a "no
# such file or directory" error because some table/index files
# (temporary tables maybe) disappear while being copied
for (my $i = 1; $i <= 25; $i++) {
eval {
spawn($pass, @createdb_command, @params);
};
if ($@) {
my $pause = $i * 5;
warn "failure ($@) - will try again in $pause seconds ...\n";
sleep($pause);
} else {
last;
}
}
die "$@\n" if $@;
}
sub make_server_backup
{
my ($db, $user, $pass, $host, $port, $suffix) = @_;
copy_db($db, $user, $pass, $host, $port, "$db:$suffix");
log_and_print `date`, "\n\n";
log_and_print "finished backup\n\n";
}
sub server_restore
{
my ($db, $user, $pass, $host, $port, $suffix) = @_;
my @params = ('-U', $user, '-h', $host);
if (defined $port) {
unshift @params, "-p", $port;
}
log_and_print `date`, "\n\n";
log_and_print "\nrunning: @dropdb_command @params $db\n";
eval {
# ignore failures
spawn($pass, @dropdb_command, @params, $db);
};
log_and_print `date`, "\n\n";
log_and_print "\nrunning: @createdb_command @params -T $db:$suffix $db\n";
spawn($pass, @createdb_command, @params, '-T', "$db:$suffix", $db);
log_and_print `date`, "\n\n";
log_and_print "finished restore - now analysing\n\n";
my $saved_dir = getcwd;
#chdir "$saved_dir/dbmodel" or die "can't change directory into: $saved_dir/dbmodel\n";
#run_and_check_status @ant_command, "analyse-db-production";
#chdir $saved_dir
# or die "can't return to previous directory ($saved_dir) after $saved_dir/dbmodel\n";
log_and_print `date`, "\n\n";
log_and_print "finished analysing\n\n";
}
sub dump_db
{
my $db = shift;
my $user = shift;
my $pass = shift;
my $host = shift;
my $port = shift;
my $out_file = shift;
my @params = ('-U', $user, '-h', $host, '-f', $out_file, $db);
if (defined $port) {
unshift @params, "-p", $port;
}
log_and_print `date`, "\n\n";
log_and_print "\ndumping: @dump_command @params\n";
my @spawn_args = ($pass, "@dump_command @params");
spawn(@spawn_args);
log_and_print `date`, "\n\n";
log_and_print "finished dump\n\n";
}
sub load_db
{
my $db = shift;
my $user = shift;
my $pass = shift;
my $host = shift;
my $port = shift;
my $in_file = shift;
my @params = ('-U', $user, '-h', $host);
if (defined $port) {
unshift @params, "-p", $port;
}
log_and_print `date`, "\n\n";
log_and_print "\nrunning: @dropdb_command @params $db\n";
eval {
# ignore failures
spawn($pass, @dropdb_command, @params, $db);
};
log_and_print `date`, "\n\n";
log_and_print "\nrunning: @createdb_command @params $db\n";
spawn($pass, @createdb_command, @params, $db);
push @params, '-d', $db, $in_file;
log_and_print `date`, "\n\n";
log_and_print "\nrunning: @load_command @params\n";
spawn($pass, @load_command, @params);
log_and_print `date`, "\n\n";
#log_and_print "finished load - now analysing\n\n";
#my $saved_dir = getcwd;
#chdir "$saved_dir/dbmodel" or die "can't change directory into: $saved_dir/dbmodel\n";
#run_and_check_status @ant_command, "analyse-db-production";
#chdir $saved_dir
# or die "can't return to previous directory ($saved_dir) after $saved_dir/dbmodel\n";
#log_and_print `date`, "\n\n";
#log_and_print "finished analysing\n\n";
}
package ProjectXML::Handler;
use vars qw{ $AUTOLOAD };
sub new {
my $type = shift;
my $self = ( $#_ == 0 ) ? shift : { @_ };
$self->{sources} = [];
return bless $self, $type;
}
sub start_element
{
my $self = shift;
my $args = shift;
my $element_name = $args->{Name};
my $action_name = $args->{Attributes}{name};
my $dump_flag = (exists $args->{Attributes}{dump} and $args->{Attributes}{dump} eq 'true');
my $index_flag = exists $args->{Attributes}{index};
if ($element_name eq 'source') {
push @{$self->{actions}}, {
type => 'integrate',
args => ["integrate -Psource=$action_name"],
name => $action_name
};
} elsif ($element_name eq 'post-process') {
push @{$self->{actions}}, {
type => 'postprocess',
args => ["postprocess -Pprocess=$action_name"],
name => $action_name
};
} elsif ($element_name eq 'property' and $action_name eq 'intermine.properties.file') {
$self->{properties_file} = $args->{Attributes}{value};
} else {
return;
}
if ($dump_flag) {
push @{$self->{actions}}, {
type => 'dump',
args => ["$action_name"],
name => "$action_name-dump"
};
}
if ($index_flag) {
push @{$self->{actions}}, {
type => 'index',
args => ["$action_name"],
name => "$action_name-index"
};
}
}
sub processing_instruction { }
sub ignorable_whitespace { }
# Others
sub AUTOLOAD {
my $self = shift;
my $method = $AUTOLOAD;
$method =~ s/.*:://;
return if $method eq 'DESTROY';
print "UNRECOGNIZED $method\n";
}
1;
package main;
# add missing lines to .pgpass so that createdb and co never ask for a password
sub fix_pgpass
{
my ($db, $user, $pass, $host, $port) = @_;
if (-f $pgpass_file) {
open PGPASS, '<', $pgpass_file or die "can't open $pgpass_file: $!\n";
while (my $line = <PGPASS>) {
chomp $line;
my ($line_host, $line_port, $line_db, $line_user, $line_pass) =
split (/:/, $line);
if ($line_host eq $host &&
($line_port eq $default_port && !defined $port ||
defined $port && $line_port eq $port) &&
($line_db eq '*' || $line_db eq $db) &&
$line_user eq $user &&
$line_pass eq $pass) {
# match
chmod 0600, $pgpass_file or die "can't change mode of $pgpass_file: $!\n";
return;
}
}
close PGPASS;
}
open PGPASS, '>>', $pgpass_file
or die "can't open $pgpass_file for appending: $!\n";
my $out_port;
if (defined $port) {
$out_port = $port;
} else {
$out_port = $default_port;
}
print PGPASS "$host:$out_port:*:$user:$pass\n";
close PGPASS or die "can't close $pgpass_file: $!\n";
chmod 0600, $pgpass_file or die "can't change mode of $pgpass_file: $!\n";
}