#!/usr/perl/perl580/bin/perl -w

use strict;
use Algorithm::Cluster qw/kcluster/;

my $file = "../../data/cyano.txt";
my $i = 0;
my (@orfname,@orfdata,@weight,@mask);

open(DATA,"<$file") or die "Can't open file $file: $!";


#------------------
# Read in the data file, and save the data to @orfdata
# We know that the file is intact and has no holes, 
# so just set the mask to 1 for every item.
# We don't check for errors in this case, because the file
# is short and we can spot errors by eye. 
#
my $firstline = <DATA>;  # Skip the title line
while(<DATA>) {

	chomp(my $line = $_);
	my @field     = split /\t/, $line;
	$orfname[$i]  =   $field[0];
	$orfdata[$i]  = [ @field[2..5] ];
	$mask[$i]     = [ 1,1,1,1 ];
	++$i;
}
close(DATA);

#------------------
# Make a reverse-lookup index of the @orfnames hash:
#
my %orfname_by_rowid;
$i=0;
$orfname_by_rowid{$i++} = $_, foreach(@orfname);

@weight = (1.0) x 4;


#------------------
# Define the params we want to pass to kcluster
my %params = (
	nclusters =>         6,
	transpose =>         0,
	npass     =>       100,
	method    =>       'a',
	dist      =>       'e',
	data      =>    \@orfdata,
	mask      =>       \@mask,
	weight    =>     \@weight,
);


#------------------
# Here is where we invoke the library function!
#
my ($clusters, $centroids, $found) = kcluster(%params);
#
#------------------



#------------------
# Create a reverse index of the ORF names,  by cluster ID
#
my %orfname_by_cluster;
$i=0;
foreach(@{$clusters}) {
	push @{$orfname_by_cluster{$_}}, $orfname_by_rowid{$i++};
}


#------------------
# Print out a list of the ORFs, grouped by cluster ID,
# as returned by the kcluster() function.
#
$i=0;
foreach(@{$centroids}) {

	print "------------------\n";
	printf("Cluster %d:  %d ORFs,  centroids: ( %s )\n\n", 
		$i, scalar(@{$orfname_by_cluster{$i} }),
		join ', ', map { sprintf "%.3f",$_ } @{$_}
	);

	print "\t$_\n", foreach( sort { $a cmp $b } @{$orfname_by_cluster{$i} } );
	print "\n";
	++$i;
}



