#!/usr/local/bin/perl -w

package UniProtAccessor;  # this should be the same as your filename!
use FindBin;                # where was script installed?
use lib "$FindBin::Bin";      # use that dir for libs, too
#use lib "/home/osboxes/Documents/CODE/Hackathon2016/FAIR/Accessor/DataFairPort/Libraries/Perl/FAIR/lib";  # remember to delete this later
use lib "/home/linkeddata/public_html/cgi-bin/AccessorTest/FAIR/lib/";
use lib "/home/markw/CODE/DataFairPort/Libraries/Perl/FAIR/lib/";
use strict;
use warnings;
use JSON;
use FAIR::Accessor::Distribution;
use FAIR::Accessor::Container;
use FAIR::Accessor::MetaRecord;


#-----------------------------------------------------------------
# Configuration and Daemon
#-----------------------------------------------------------------

use base 'FAIR::Accessor';

my $config = {
   title => 'UniProt Slice FAIR Accessor - Aspergillus RNA Processing proteins',
   serviceTextualDescription => 'Takes a SAPRQL query of the UniProt database specific to proteins and their GO annotations related to RNA Procssing proteins in Aspergillus and makes it a FAIR Accessor source',
   textualAccessibilityInfo => "The information from this server requries no authentication; HTTP GET is sufficient",  # this could also be a $URI describing the accessibiltiy
   mechanizedAccessibilityInfo => "",  # this must be a URI to an RDF document
   textualLicenseInfo => "CC-BY-ND 4.0",  # this could also be a URI to the license info
   mechanizedLicenseInfo =>  "http://purl.org/NET/rdflicense/cc-by-nd4.0", # this must be a URI to an RDF document
   ETAG_Base => "TopLevelMetadata_Accessor_For_UniProtAnidulansRNAProcessing", # this is a unique identificaiton string for the service (required by the LDP specification)
   localNamespaces => {
	pfund => 'http://vocab.ox.ac.uk/projectfunding#term_',
	},  # add a few new namespaces to the list of known namespaces....
   basePATH => '/cgi-bin/AccessorTest/UniProtAccessor', # REQUIRED regexp to match the RESTful PATH part of the URL, before the ID number

};

my $SPARQL = '
            PREFIX up:<http://purl.uniprot.org/core/>
            PREFIX taxon:<http://purl.uniprot.org/taxonomy/>
            PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
            SELECT distinct ?id
            
            WHERE
            {
                ?protein a up:Protein .
                ?protein up:organism ?organism .
                ?organism rdfs:subClassOf taxon:162425 .
                ?protein up:classifiedWith ?go .
                ?go rdfs:subClassOf* <http://purl.obolibrary.org/obo/GO_0006396> .
              
                bind(replace(str(?protein), "http://purl.uniprot.org/uniprot/", "", "i") as ?id)
            }
            ';
            
my $service = UniProtAccessor->new(%$config);

# start daemon
$service->handle_requests;


#-----------------------------------------------------------------
# Accessor Implementation
#-----------------------------------------------------------------


#------------- Container Resource --------------

sub Container {

   my ($self, %ARGS) = @_;
   
   my $Container = FAIR::Accessor::Container->new(NS => $self->Configuration->Namespaces);
 
   $self->fillContainerMetadata($Container);
   
   my $BASE_URL = "http://" . $ENV{'SERVER_NAME'} . $ENV{'REQUEST_URI'};

   use RDF::Query::Client;
   my $query = RDF::Query::Client->new($SPARQL);   

   my $iterator = $query->execute('http://sparql.uniprot.org/sparql/');
   my @records;
   while (my $row = $iterator->next) {
      my $ID = $row->{id}->value;
      push @records, "$BASE_URL/$ID";   # need to make a URL for each of the meta-records, based on the ID of the PHIBase record, push it onto a list
   }
  
   $Container->addRecords(\@records); # the listref of record ids

   return $Container;
}

# --------------------MetaRecord Resource ---------------

sub MetaRecord {
   my ($self, %ARGS) = @_;

   my $ID = $ARGS{'ID'};

   my $MetaRecord = FAIR::Accessor::MetaRecord->new(ID => $ID,
                                                    NS => $self->Configuration->Namespaces);
   $self->fillMetaRecordMetadata($MetaRecord);
   
   $MetaRecord->addDistribution(availableformats => 'text/html',
                                downloadURL => "http://www.uniprot.org/uniprot/$ID.html");
   $MetaRecord->addDistribution(availableformats => 'application/rdf+xml',
                                downloadURL => "http://purl.uniprot.org/uniprot/$ID.rdf");

   
   my $encodedsubject = urlencode("http://identifiers.org/uniprot/$ID");
   my $encodedpredicate = urlencode("http://purl.uniprot.org/core/organism");
   my $TPF = "http://linkeddata.systems:3001/fragments?subject=$encodedsubject&predicate=$encodedpredicate";  
   $MetaRecord->addDistribution(availableformats => "application/x-turtle, text/rdf+n3, application/rdf+xml, text/html",
                                downloadURL => $TPF,
                                source => "http://identifiers.org/uniprot/$ID",
                                subjecttemplate =>  "http://identifiers.org/uniprot/{ID}",
                                subjecttype => "http://purl.uniprot.org/core/organism",
                                predicate => "http://purl.uniprot.org/core/organism",
                                objecttemplate => "http://identifiers.org/taxon/{TAX}",
                                objecttype => "http://edamontology.org/data_1179",      
   );
   
   
   $encodedpredicate = urlencode("http://purl.uniprot.org/core/classifiedWith");
   $TPF = "http://linkeddata.systems:3001/fragments?subject=$encodedsubject&predicate=$encodedpredicate";
   $MetaRecord->addDistribution(availableformats => "application/x-turtle, text/rdf+n3, application/rdf+xml, text/html",
                                downloadURL => $TPF,
                                source => "http://identifiers.org/uniprot/$ID",
                                subjecttemplate =>  "http://identifiers.org/uniprot/{ID}",
                                subjecttype => "http://edamontology.org/data_3021",
                                predicate => "http://purl.uniprot.org/core/classifiedWith",
                                objecttemplate => "http://purl.obolibrary.org/obo/{GO}",
                                objecttype => "http://edamontology.org/data_1176",      
   );

   return $MetaRecord;

}

sub fillContainerMetadata {
   my ($self, $Container) = @_;
   $Container->addMetadata({
                           'dc:title' => "UniProt Slice FAIR Accessor - Aspergillus RNA Processing proteins",
                           'dcat:description' => "Takes a SAPRQL query of the UniProt database specific to proteins and their GO annotations related to RNA Procssing proteins in Aspergillus and makes it a FAIR Accessor source.  The precise query is:\n\n$SPARQL                        ",
                           'dcat:identifier' => "http://linkeddata.systems/cgi-bin/Accessors/UniProtAccessor",
                           'dcat:keyword' => ["Aspergillus", "Aspergillus nidulans", "RNA Processing", "Proteins"],
                           'dcat:landingPage' => 'http://uniprot.org',
                           'foaf:page' => ['http://sparql.uniprot.org/sparql','http://uniprot.org/'],
                           'dcat:language' => 'http://id.loc.gov/vocabulary/iso639-1/en',
                           'dc:language' => 'http://lexvo.org/id/iso639-3/eng', 
                           'dcat:publisher' => ['http://wilkinsonlab.info'],
                           'dcat:theme'  => 'http://linkeddata.systems/ConceptSchemes/RNA_Processing_conceptscheme.rdf',  # this is the URI to a SKOS Concept Scheme
                          'pfund:hasPrincipalInvestigator' => ["Dr. Mark Wilkinson"],
                          'dc:creator' => 'http://wilkinsonlab.info',
                          'pav:authoredBy' => ['http://orcid.org/0000-0002-9699-485X'],
                          'dcat:contactPoint' => 'http://biordf.org/DataFairPort/MiscRDF/Wilkinson.rdf',
                          'dc:license' => 'http://purl.org/NET/rdflicense/cc-by-nd4.0',
                          'rdf:type' => ['prov:Collection', 'dctypes:Dataset'],
   });
}

sub fillMetaRecordMetadata {
  my ($self, $MetaRecord) = @_;
  my $ID = $MetaRecord->ID;
  $MetaRecord->addMetadata({
      'dcat:description' => "FAIR Accessor Meta-record for UniProt Record $ID",
	'foaf:primaryTopic' => "http://identifiers.org/uniprot/$ID",
      'dc:title' => "UniProt Protein $ID",
      'dcat:identifier' => "http://linkeddata.systems/cgi-bin/Accessors/UniProtAccessor/$ID",
      'dcat:keyword' => ["Aspergillus", "Aspergillus nidulans", "RNA Processing", "Proteins", "Annotation", "Functinal Annotation", "Gene Ontology", "GO"],
      'dcat:landingPage' => 'http://uniprot.org',
      'foaf:page' => ['http://sparql.uniprot.org/sparql','http://uniprot.org/'],
      'dcat:language' => 'http://id.loc.gov/vocabulary/iso639-1/en',
      'dc:language' => 'http://lexvo.org/id/iso639-3/eng', 
      'dcat:publisher' => ['http://wilkinsonlab.info'],
      'dcat:language' => 'http://id.loc.gov/vocabulary/iso639-1/en',
      'dcat:publisher' => ['http://wilkinsonlab.info'],
      'pfund:hasPrincipalInvestigator' => ["Dr. Mark Wilkinson"],
      'dc:creator' => 'http://wilkinsonlab.info',
      'pav:authoredBy' => ['http://orcid.org/0000-0002-9699-485X'],
      'dcat:contactPoint' => 'http://biordf.org/DataFairPort/MiscRDF/Wilkinson.rdf',
      'void:inDataset' => 'http://linkeddata.systems/cgi-bin/Accessors/UniProtAccessor/',
      'dc:license' => 'http://purl.org/NET/rdflicense/cc-by-nd4.0',
	});
 
}



sub urlencode {
    my $s = shift;
    $s =~ s/ /+/g;
    $s =~ s/([^A-Za-z0-9\+-])/sprintf("%%%02X", ord($1))/seg;
    return $s;
}


