| File: | lib/DBIx/SchemaChecksum.pm |
| Coverage: | 73.4% |
| line | stmt | bran | cond | sub | time | code |
|---|---|---|---|---|---|---|
| 1 | package DBIx::SchemaChecksum; | |||||
| 2 | ||||||
| 3 | 12 12 12 | 502833 30 343 | use 5.010; | |||
| 4 | 12 12 12 | 2315 5155350 194 | use Moose; | |||
| 5 | 12 12 12 | 123627 5224 157 | use version; our $VERSION = version->new('0.28'); | |||
| 6 | ||||||
| 7 | 12 12 12 | 8670 76186 512 | use DBI; | |||
| 8 | 12 12 12 | 2558 3985 332 | use Digest::SHA1; | |||
| 9 | 12 12 12 | 3015 36595 456 | use Data::Dumper; | |||
| 10 | 12 12 12 | 1433 148846 389 | use Path::Class; | |||
| 11 | 12 12 12 | 38 11 363 | use Carp; | |||
| 12 | 12 12 12 | 2520 74927 172 | use File::Find::Rule; | |||
| 13 | with 'MooseX::Getopt'; | |||||
| 14 | ||||||
| 15 | has 'dbh' => ( is => 'ro', required=>1 ); | |||||
| 16 | ||||||
| 17 | has 'catalog' => ( | |||||
| 18 | is => 'ro', | |||||
| 19 | isa => 'Str', | |||||
| 20 | default => '%', | |||||
| 21 | documentation => q[might be required by some DBI drivers] | |||||
| 22 | ); | |||||
| 23 | ||||||
| 24 | has 'schemata' => ( | |||||
| 25 | is => 'ro', | |||||
| 26 | isa => 'ArrayRef[Str]', | |||||
| 27 | default => sub { ['%'] }, | |||||
| 28 | documentation => q[List of schematas to include in checksum] | |||||
| 29 | ); | |||||
| 30 | ||||||
| 31 | has 'tabletype' => ( | |||||
| 32 | is => 'ro', | |||||
| 33 | isa => 'Str', | |||||
| 34 | default => 'table', | |||||
| 35 | documentation => q[Table type according to DBI->table_info] | |||||
| 36 | ); | |||||
| 37 | ||||||
| 38 | has 'sqlsnippetdir' => ( | |||||
| 39 | isa => 'Str', | |||||
| 40 | is => 'ro', | |||||
| 41 | documentation => q[Directory containing sql update files], | |||||
| 42 | ); | |||||
| 43 | ||||||
| 44 | # mainly needed for scripts | |||||
| 45 | has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); | |||||
| 46 | has 'dry_run' => ( is => 'rw', isa => 'Bool', default => 0 ); | |||||
| 47 | ||||||
| 48 | # internal | |||||
| 49 | has '_update_path' => ( is => 'rw', isa => 'HashRef', lazy_build=>1 ); | |||||
| 50 | has '_schemadump' => ( | |||||
| 51 | isa=>'Str', | |||||
| 52 | is=>'rw', | |||||
| 53 | lazy_build=>1, | |||||
| 54 | clearer=>'reset_checksum', | |||||
| 55 | ); | |||||
| 56 | ||||||
| 57 - 110 | =head1 NAME
DBIx::SchemaChecksum - Generate and compare checksums of database schematas
=head1 SYNOPSIS
my $sc = DBIx::SchemaChecksum->new( dsn => 'dbi:Pg:name=foo' );
print $sc->checksum;
=head1 DESCRIPTION
When you're dealing with several instances of the same database (eg.
developer, testing, stage, production), it is crucial to make sure
that all databases use the same schema. This can be quite an
hair-pulling experience, and this module should help you keep your
hair (if you're already bald, it won't make your hair grow back,
sorry...)
DBIx::SchemaChecksum connects to your database, gets schema
information (tables, columns, primary keys, foreign keys) and
generates a SHA1 digest. This digest can then be used to easily verify schema consistency across different databases.
B<Caveat:> The same schema might produce different checksums on
different database versions.
DBIx::SchemaChecksum works with PostgreSQL 8.3 and SQLite (but see
below). I assume that thanks to the abstraction provided by the C<DBI>
it works with most databases. If you try DBIx::SchemaChecksum with
different database systems, I'd love to hear some feedback...
=head2 SQLite and column_info
DBD::SQLite doesn't really implement C<column_info>, which is needed
to generate the checksum. We use the monkey-patch included in
http://rt.cpan.org/Public/Bug/Display.html?id=13631
to make it work
=head2 Scripts
Please take a look at the scripts included in this distribution:
=head3 schema_checksum.pl
Calculates the checksum and prints it to STDOUT
=head3 schema_update.pl
Updates a schema based on the current checksum and SQL snippet files
=head1 METHODS
=head2 Public Methods
=cut | |||||
| 111 | ||||||
| 112 | sub checksum { | |||||
| 113 | 22 | 18004 | my $self = shift; | |||
| 114 | 22 | 598 | return Digest::SHA1::sha1_hex($self->_schemadump); | |||
| 115 | } | |||||
| 116 | ||||||
| 117 - 124 | =head3 schemadump
my $schemadump = $self->schemadump;
Returns a string representation of the whole schema (as a Data::Dumper
Dump).
=cut | |||||
| 125 | ||||||
| 126 | sub _build__schemadump { | |||||
| 127 | 13 | 20 | my $self = shift; | |||
| 128 | ||||||
| 129 | 13 | 351 | my $tabletype = $self->tabletype; | |||
| 130 | 13 | 351 | my $catalog = $self->catalog; | |||
| 131 | ||||||
| 132 | 13 | 335 | my $dbh = $self->dbh; | |||
| 133 | ||||||
| 134 | 13 | 43 | my @metadata = qw(COLUMN_NAME COLUMN_SIZE NULLABLE TYPE_NAME COLUMN_DEF); | |||
| 135 | ||||||
| 136 | 13 | 25 | my %relevants = (); | |||
| 137 | 13 13 | 12 362 | foreach my $schema ( @{ $self->schemata } ) { | |||
| 138 | 13 | 246 | foreach | |||
| 139 | my $table ( $dbh->tables( $catalog, $schema, '%', $tabletype ) ) | |||||
| 140 | { | |||||
| 141 | 33 | 9287 | $table=~s/"//g; | |||
| 142 | 33 | 30 | my %data; | |||
| 143 | ||||||
| 144 | # remove schema name from table | |||||
| 145 | 33 | 37 | my $t = $table; | |||
| 146 | 33 | 84 | $t =~ s/^.*?\.//; | |||
| 147 | ||||||
| 148 | 33 | 555 | my @pks = $dbh->primary_key( $catalog, $schema, $t ); | |||
| 149 | 33 | 33967 | $data{primary_keys} = \@pks if @pks; | |||
| 150 | ||||||
| 151 | # columns | |||||
| 152 | 33 | 564 | my $sth_col = $dbh->column_info( $catalog, $schema, $t, '%' ); | |||
| 153 | ||||||
| 154 | 33 | 25350 | my $column_info = $sth_col->fetchall_hashref('COLUMN_NAME'); | |||
| 155 | ||||||
| 156 | 33 | 7210 | while ( my ( $column, $data ) = each %$column_info ) { | |||
| 157 | 75 375 | 63 454 | my $info = { map { $_ => $data->{$_} } @metadata }; | |||
| 158 | ||||||
| 159 | # add postgres enums | |||||
| 160 | 75 | 128 | if ( $data->{pg_enum_values} ) { | |||
| 161 | 0 | 0 | $info->{pg_enum_values} = $data->{pg_enum_values}; | |||
| 162 | } | |||||
| 163 | ||||||
| 164 | # some cleanup | |||||
| 165 | 75 | 93 | if (my $default = $info->{COLUMN_DEF}) { | |||
| 166 | 0 | 0 | if ( $default =~ /nextval/ ) { | |||
| 167 | 0 | 0 | $default =~ m{'([\w\.\-_]+)'}; | |||
| 168 | 0 | 0 | if ($1) { | |||
| 169 | 0 | 0 | my $new = $1; | |||
| 170 | 0 | 0 | $new =~ s/^\w+\.//; | |||
| 171 | 0 | 0 | $default = 'nextval:' . $new; | |||
| 172 | } | |||||
| 173 | } | |||||
| 174 | 0 | 0 | $default=~s/["'\(\)\[\]\{\}]//g; | |||
| 175 | 0 | 0 | $info->{COLUMN_DEF}=$default; | |||
| 176 | } | |||||
| 177 | ||||||
| 178 | 75 | 234 | $info->{TYPE_NAME} =~ s/^(?:.+\.)?(.+)$/$1/g; | |||
| 179 | ||||||
| 180 | 75 | 221 | $data{columns}{$column} = $info; | |||
| 181 | } | |||||
| 182 | ||||||
| 183 | # foreign keys | |||||
| 184 | 33 | 129 | my $sth_fk | |||
| 185 | = $dbh->foreign_key_info( '', '', '', $catalog, $schema, $t ); | |||||
| 186 | 33 | 51 | if ($sth_fk) { | |||
| 187 | 0 | 0 | $data{foreign_keys} = $sth_fk->fetchall_arrayref( { | |||
| 188 | 0 | 0 | map { $_ => 1 } | |||
| 189 | qw(FK_NAME UK_NAME UK_COLUMN_NAME FK_TABLE_NAME FK_COLUMN_NAME UPDATE_RULE DELETE_RULE) | |||||
| 190 | } | |||||
| 191 | ); | |||||
| 192 | # Nasty workaround | |||||
| 193 | 0 0 | 0 0 | foreach my $row (@{$data{foreign_keys}}) { | |||
| 194 | 0 | 0 | $row->{DEFERRABILITY} = undef; | |||
| 195 | } | |||||
| 196 | } | |||||
| 197 | ||||||
| 198 | # postgres unique constraints | |||||
| 199 | # very crude hack to see if we're running postgres | |||||
| 200 | 33 | 92 | if ( $INC{'DBD/Pg.pm'} ) { | |||
| 201 | 0 | 0 | my @unique; | |||
| 202 | 0 | 0 | my $sth=$dbh->prepare( "select indexdef from pg_indexes where schemaname=? and tablename=?"); | |||
| 203 | 0 | 0 | $sth->execute($schema, $t); | |||
| 204 | 0 | 0 | while (my ($index) =$sth->fetchrow_array) { | |||
| 205 | 0 | 0 | $index=~s/$schema\.//g; | |||
| 206 | 0 | 0 | push(@unique,$index); | |||
| 207 | } | |||||
| 208 | 0 | 0 | @unique = sort (@unique); | |||
| 209 | 0 | 0 | $data{unique_keys} = \@unique if @unique; | |||
| 210 | } | |||||
| 211 | ||||||
| 212 | 33 | 915 | $relevants{$table} = \%data; | |||
| 213 | } | |||||
| 214 | ||||||
| 215 | } | |||||
| 216 | 13 | 252 | my $dumper = Data::Dumper->new( [ \%relevants ] ); | |||
| 217 | 13 | 597 | $dumper->Sortkeys(1); | |||
| 218 | 13 | 311 | $dumper->Indent(1); | |||
| 219 | 13 | 344 | my $dump = $dumper->Dump; | |||
| 220 | 13 | 2159 | return $dump; | |||
| 221 | } | |||||
| 222 | ||||||
| 223 - 233 | =head3 build_update_path
my $update_info = $self->build_update_path( '/path/to/sql/snippets' )
Builds the datastructure needed by L<apply_sql_update>.
C<build_update_path> reads in all files ending in ".sql" in the
directory passed in (or defaulting to C<< $self->sqlsnippetdir >>). It
builds something like a linked list of files, which are chained by
their C<preSHA1sum> and C<postSHA1sum>.
=cut | |||||
| 234 | ||||||
| 235 | sub _build__update_path { | |||||
| 236 | 9 | 12 | my $self = shift; | |||
| 237 | 9 | 196 | my $dir = $self->sqlsnippetdir; | |||
| 238 | 9 | 18 | croak("Please specify sqlsnippetdir") unless $dir; | |||
| 239 | 9 | 79 | croak("Cannot find sqlsnippetdir: $dir") unless -d $dir; | |||
| 240 | ||||||
| 241 | 7 | 146 | say "Checking directory $dir for checksum_files" if $self->verbose; | |||
| 242 | ||||||
| 243 | 7 | 8 | my %update_info; | |||
| 244 | 7 | 132 | my @files = File::Find::Rule->file->name('*.sql')->in($dir); | |||
| 245 | ||||||
| 246 | 7 | 5285 | foreach my $file ( sort @files ) { | |||
| 247 | 18 | 2926 | my ( $pre, $post ) = $self->get_checksums_from_snippet($file); | |||
| 248 | ||||||
| 249 | 18 | 24 | if ( !$pre && !$post ) { | |||
| 250 | 0 | 0 | say "skipping $file (has no checksums)" if $self->verbose; | |||
| 251 | 0 | 0 | next; | |||
| 252 | } | |||||
| 253 | ||||||
| 254 | 18 | 31 | if ( $pre eq $post ) { | |||
| 255 | 3 | 7 | if ( $update_info{$pre} ) { | |||
| 256 | 3 | 7 | my @new = ('SAME_CHECKSUM'); | |||
| 257 | 3 3 | 3 7 | foreach my $item ( @{ $update_info{$pre} } ) { | |||
| 258 | 6 | 294 | push( @new, $item ) unless $item eq 'SAME_CHECKSUM'; | |||
| 259 | } | |||||
| 260 | 3 | 7 | $update_info{$pre} = \@new; | |||
| 261 | } | |||||
| 262 | else { | |||||
| 263 | 0 | 0 | $update_info{$pre} = ['SAME_CHECKSUM']; | |||
| 264 | } | |||||
| 265 | } | |||||
| 266 | ||||||
| 267 | 18 | 47 | if ( $update_info{$pre} | |||
| 268 | && $update_info{$pre}->[0] eq 'SAME_CHECKSUM' ) | |||||
| 269 | { | |||||
| 270 | 3 | 6 | if ( $post eq $pre ) { | |||
| 271 | 3 3 | 4 41 | splice( @{ $update_info{$pre} }, | |||
| 272 | 1, 0, Path::Class::File->new($file), $post ); | |||||
| 273 | } | |||||
| 274 | else { | |||||
| 275 | 0 0 | 0 0 | push( @{ $update_info{$pre} }, | |||
| 276 | Path::Class::File->new($file), $post ); | |||||
| 277 | } | |||||
| 278 | } | |||||
| 279 | else { | |||||
| 280 | 15 | 248 | $update_info{$pre} = [ Path::Class::File->new($file), $post ]; | |||
| 281 | } | |||||
| 282 | } | |||||
| 283 | ||||||
| 284 | 7 | 1234 | return $self->_update_path( \%update_info ) if %update_info; | |||
| 285 | 1 | 31 | return; | |||
| 286 | } | |||||
| 287 | ||||||
| 288 - 301 | =head3 get_checksums_from_snippet
my ($pre, $post) = $self->get_checksums_from_snippet( $file );
Returns a list of the preSHA1sum and postSHA1sum for the given file.
The file has to contain this info in SQL comments, eg:
-- preSHA1sum: 89049e457886a86886a4fdf1f905b69250a8236c
-- postSHA1sum: d9a02517255045167053ea92dace728e1389f8ca
alter table foo add column bar;
=cut | |||||
| 302 | ||||||
| 303 | sub get_checksums_from_snippet { | |||||
| 304 | 22 | 5904 | my ($self, $filename) = @_; | |||
| 305 | 22 | 48 | die "need a filename" unless $filename; | |||
| 306 | ||||||
| 307 | 21 | 16 | my %checksums; | |||
| 308 | ||||||
| 309 | 21 | 284 | open( my $fh, "<", $filename ) || croak "Cannot read $filename: $!"; | |||
| 310 | 20 | 186 | while (<$fh>) { | |||
| 311 | 114 | 252 | if (m/^--\s+(pre|post)SHA1sum:?\s+([0-9A-Fa-f]{40,})\s+$/) { | |||
| 312 | 39 | 114 | $checksums{$1} = $2; | |||
| 313 | } | |||||
| 314 | } | |||||
| 315 | 20 | 57 | close $fh; | |||
| 316 | 20 40 | 25 152 | return map { $checksums{$_} || '' } qw(pre post); | |||
| 317 | } | |||||
| 318 | ||||||
| 319 - 362 | =head2 Attributes generated by Moose All of this methods can also be set from the commandline. See MooseX::Getopts. =head3 dbh The database handle (DBH::db). =head3 dsn The dsn. =head3 user The user to use to connect to the DB. =head3 password The password to use to authenticate the user. =head3 catalog The database catalog searched for data. Not implemented by all DBs. See C<DBI::table_info> Default C<%>. =head3 schemata An Arrayref containg names of schematas to include in checksum calculation. See C<DBI::table_info> Default C<%>. =head3 tabletype What kind of tables to include in checksum calculation. See C<DBI::table_info> Default C<table>. =head3 verbose Be verbose or not. Default: 0 =cut | |||||
| 363 | ||||||
| 364 | q{ Favourite record of the moment: The Dynamics - Version Excursions } | |||||
| 365 | ||||||