#! /usr/bin/perl
use strict;

our $VERSION="0.4, 2004-12-07";	#RMH
# Omit GLYPH field on Anchors where gdef has no NAME entry
#our $VERSION="0.3, 2003-11-14";	#RMH
# Display warnings from Font::Scripts::AP;
#our $VERSION="0.2, 2003-11-14";	#RMH
#	Require font for input -- eliminates dependency on GID field of AP database
#	Change to use Font::Scripts::AP -- permits AP by contour point

# 0.1   RMH    14-Aug-02     Import VOLT anchor definitions from XML

use Font::TTF::Font;
use Font::Scripts::AP;
use Getopt::Std;

our ($opt_d, $opt_l, $opt_s);
getopts("dl:s");

unless (@ARGV == 3 || @ARGV == 4)
{
    die <<"EOT";

VOLTImportAnchors [-d] [-s] [-l alist.txt] in.ttf Anchors.xml out.ttf [out.vtp]

Imports anchor definitions from XML (in TTFBuilder syntax) into a VOLT project.

Original VOLT project must be a .TTF file (.VTP files no longer supported)
New anchor data, using TTFBuilder syntax, is read from Anchors.xml. A modified 
VOLT font project will be saved in out.ttf. If out.vtp is supplied, a copy of
the modified VOLT source is saved there.

Options:
    -d  discard existing VOLT anchor definitions that are not updated.

    -s  do not warn about existing VOLT anchor definitions that
        are not updated.

    -l  specifies a file that identifies which anchors to import. Anchors
        are listed on separate lines using the syntax:
            xml-anchor-type-name [, [volt-anchor-name] [, [component-num]]]
        A semicolon at start of line is a comment.
        For example:
            ; anchor list example. 
            ; XML anchor "vabove" will be imported directly
            ; "vbelow" will be renamed "BotCenter"
            ; "vbelow_2" will be renamed "BotCenter" but for component 2
            vabove
            vbelow,BotCenter
            vbelow_2,BotCenter,2
        If -l is not supplied, then all anchors will be imported and will
        be assumed to be for component 1 (no ligatures).
        (Note: the conversion between FontLab's convention of "_name" and
        VOLT's convention of "MARK_name" is automatic and should not be
        specified in a -l file).
        
$VERSION

EOT
}

my ($warningCount);

# Sub to print warning messages to console and log. 1st parm is the message.
# 2nd param, if supplied, is a line number (from the volt source).

sub MyWarn {
	my ($msg, $line) = @_;
	$warningCount++;
	if (defined $line) {
		print LOG "line $line: " . $msg;
		warn "line $line: " . $msg;
	} else {
		print LOG $msg;
		warn $msg;
	}
}


my ($inFont, $inXML, $outFont, $outSrc);	# Command line parameters


my ($Src, $SrcLine);	# Input VOLT source (slurped in as one long string), and line number counter
my $out;				# Output VOLT source

# If it is supplied the list file (-l parm) is read into a hash indexed by XML anchor type name,
# each element of which contains a two-element array consisting of the VOLT anchor name
# and the component number:
my (%XMLAnchorMap);

# NB: The VOLT source parsing is stolen from VOLTFixup.

# For each GDEF record in the VOLT source, a structure
# is created using an anonymous hash. Gdef structures have these elements:
#
#    'NAME'
#    'ID'
#    '@UNICODES'    a referemce to an array of Unicode values
#				NB: for Glyph structures, these are Unicode values for the new font (i.e., from the cmap)
#				and for Gdef structures these are Unicode values from the old font (i.e., from the VOLT source)
#    'UNICODE' or 'UNICODEVALUES'  string values from the GDEF line (sans quote marks)
#    'TYPE'			string value from GDEF line
#    'COMPONENTS'	string value from GDEF line
#    'LINE'       linenumber from source
#
#and, if DEF_ANCHOR records are present:	
# 	 'ANCHORS' 	a hash containing:
#		name		the attachment point name, which yields an array, indexed by:
#			component	the component number (1, 2, etc), which yields a hash containing
#				'SRC'	the source record

#		
#
my (%GdefFromID, %GdefFromName, %GdefFromGdefUnicode);	

my $f;		# TTF font instance
my $numGlyphs;	# Number of glyphs in the TTF.

my $g;		# Reference to glyph structure
my $d;		# Reference to GDEF structure

my ($gid, $gname, $u);	# Glyph ID, Glyph name, and Unicode

my ($xName, $xAnchor);	# XML Anchor name, XML Anchor structure;

my ($vName, $vComponent, $vAnchor); 	# Volt Anchor name, Volt Anchor Component, Volt Anchor structure


my $xx;		# Temp variable

# Pick up command line parms:
($inFont, $inXML, $outFont, $outSrc) = @ARGV;

# Open logfile:
open (LOG, "> VoltImportAnchor.log") or die "Couldn't open 'VoltImportAnchor.log' for writing, stopping at";
print LOG "STARTING VOLTImportAnchors " . ($opt_d ? "-d " : "") . ($opt_s ? "-s " : "") . ($opt_l ? "-l $opt_l " : "") . "$inFont $inXML $outFont $outSrc\n\n";

# Parse -l list file if supplied
if ($opt_l) {
	open (IN, "<$opt_l") or die "Couldn't open '$opt_l' for reading.";
	while (<IN>) {
		chomp;
		next if m/^;/o;
		s/ //go;
		my ($xName, $vName, $vComponent) = split(',');
		next if $xName eq '';
		$vName = $xName unless defined $vName;
		$vComponent = 1 unless defined $vComponent;
		$XMLAnchorMap{$xName} = [ $vName, $vComponent ];
		$XMLAnchorMap{"_$xName"} = [ "MARK_$vName", $vComponent ];
	}
}

# Sub to determine VOLT anchorname from an XML anchor "type" name; 
# Returns a list containing VOLT anchor name and component,
# or undef if we aren't to import this anchor.

sub ConvertXMLAnchorName {
	my ($xName, $xComponent);
	$xName = shift;
	
	if ($opt_l)
	{
		# -l supplied so do look; ignore any names we are to ignore:
		return undef unless exists $XMLAnchorMap{$xName};
		($xName, $xComponent) = @{$XMLAnchorMap{$xName}};
	}
	else
	{
		# -l not supplied: Assume we strip any trailing digits off as the component number:
		($xName, $xComponent) = ($xName =~ m/^(.*?)(\d*)$/);
		$xComponent ||= 1;
	}
	# adjust leading "_" to "MARK_":
	$xName =~ s/^_/MARK_/;
	# done!
	return ($xName, $xComponent);
}


# Read in font and anchor point data:

my $ap = Font::Scripts::AP->read_font($inFont,$inXML) || die "Can't read $inXML";
if (exists $ap->{'WARNINGS'})
{
	print LOG $ap->{'WARNINGS'};
	warn $ap->{'WARNINGS'};
	$warningCount += $ap->{'cWarnings'};
}
	

# Open and slurp VOLT source into $Src from existing font:

$f = $ap->{'font'};
exists $f->{'TSIV'} or die "Cannot find VOLT source table in file '$inFont'.\n";
$f->{'TSIV'}->read_dat;
$Src = $f->{'TSIV'}{' dat'};
$numGlyphs = $f->{'maxp'}{'numGlyphs'};

sub GetSrcLine {
	# Returns one line of text from the source, or undef if nothing left.
	# If the source was extracted from VOLT, the separators will be \r
	# If the source was read from CRLF delimited file, the separator will be \n
	# Need to allow either separator, but we don't return the terminator:
	return undef if $Src eq "";
	$SrcLine++;		# Keep track of line number in source.
	my $res;
	($res, $Src) = split (/\r|\n/, $Src, 2);
	return $res;
}



sub WriteAnchors {
	# Finally, here is the real work!
	#
	foreach $gid (0 .. $numGlyphs-1) {
		$d = $GdefFromID{$gid};

		# For each XML anchor definition, migrate location into GDEF structure, creating Anchor record if needed.
		if (defined $ap->{'glyphs'}[$gid]{'points'})
		{
			unless (defined $d)
			{
				MyWarn("attempt to import anchors on undefined glyph $gid");
				next;
			}
			foreach $xAnchor (values %{$ap->{'glyphs'}[$gid]{'points'}}) {
				$xName = $xAnchor->{'name'};
				($vName, $vComponent) = ConvertXMLAnchorName($xName);
				next unless defined $vName;
				$vAnchor = $d->{'ANCHORS'}{$vName}[$vComponent];
				if (defined $vAnchor) {
					# Edit existing anchor:
					$vAnchor->{'SRC'} =~ s/ POS .*END_POS /" POS DX $xAnchor->{'x'} DY $xAnchor->{'y'} END_POS "/e;
					$vAnchor->{'SRC'} =~ s/ AT / LOCKED AT / unless $vAnchor->{'SRC'} =~ / LOCKED /;
				} else {
					# Create new anchor:
					$d->{'ANCHORS'}{$vName}[$vComponent]{'SRC'} = "DEF_ANCHOR \"$vName\" ON $gid GLYPH "
						. (exists $d->{'NAME'} ? "$d->{'NAME'} " : "glyph$gid " )
						. "COMPONENT $vComponent LOCKED AT  POS DX $xAnchor->{'x'} DY $xAnchor->{'y'} END_POS END_ANCHOR";
					$vAnchor = $d->{'ANCHORS'}{$vName}[$vComponent];
				}
				# In any case, mark "dirty":
				$vAnchor->{'DIRTY'} = 1;
			}
		}
		
		# Now write out anchors for this glyph:
		next unless exists $d->{'ANCHORS'};
		foreach $vName (sort keys %{$d->{'ANCHORS'}}) {
			foreach $vComponent (0 .. @{$d->{'ANCHORS'}{$vName}}-1) {
				$vAnchor = $d->{'ANCHORS'}{$vName}[$vComponent];
				next unless $vAnchor->{'SRC'};
				unless ($vAnchor->{'DIRTY'})
				{
					if ($opt_d)
					{
						MyWarn ("Deleted anchor $vName,$vComponent on glyph $gid ($d->{'NAME'})\n") unless $opt_s;
						next;
					}
					MyWarn ("Didn't modify anchor $vName,$vComponent on glyph $gid ($d->{'NAME'})\n") unless $opt_s;
				}
				$out .= "$vAnchor->{'SRC'}\r";
			}
		}

	}
	
}

SRCLOOP: while (defined ($_ = GetSrcLine)) {

	if (/^DEF_ANCHOR /) {
		# need to modify anchordefs based on data from XML. A typical definition is:

		# DEF_ANCHOR "Below" ON 553 COMPONENT 1 LOCKED AT  POS DX 312 DY -540 END_POS END_ANCHOR

		# Note: Newer versions of VOLT include a glyph name:

		# DEF_ANCHOR "Below" ON 553 GLYPH gname COMPONENT 1 LOCKED AT  POS DX 312 DY -540 END_POS END_ANCHOR

		# The glyph name is not (currently) in quotes, but probably should be. This glyph name and the "ON"
		# glyph ID are mutually redundant with the DEF_GLYPH records, so why both? And what happens if
		# they disagree? Currently, the ON field is required (if it is absent, even if the GLYPH field
		# is present, VOLT resets the Anchor data to empty), so rather than risk an inconsistency I'm 
		# going to strip out the GLYPH field:

		# Anchor information is accumulated until
		# we have it all (so we can sort it)

		# For now, just add record to GDEF structure:
		
		($vName, $gid, $vComponent) = (m/DEF_ANCHOR (\S+).* ON (\d+) .* COMPONENT (\d+)/og);
		# make the quotes optional:
		$vName =~ s/^\"(.*)\"$/$1/;

		$GdefFromID{$gid}{'ANCHORS'}{$vName}[$vComponent]{'SRC'} = $_;
		next SRCLOOP;
	} 

	# PROCESS ALL OTHER KINDS OF LINES:

	if (/^GRID_PPEM/) {
		# Write out the anchor definitions (in sorted order: GID, Anchor name, component)
		WriteAnchors;
	}

	# Throw away any null bytes (typically at the end of the table):
	next SRCLOOP if /^\0+$/;
	
	# All other lines output as is...
	$out .= "$_\r";
	
	if (/^DEF_GLYPH/) {

		# PROCESS GDEF LINE:
		
		# extract important info from GDEF. Note the text lines are essentially in hash form already, e.g.:
		# DEF_GLYPH "U062bU062cIsol" ID 1097 UNICODE 64529 TYPE LIGATURE COMPONENTS 2 END_GLYPH
		# or
		# DEF_GLYPH "middot" ID 167 UNICODEVALUES "U+00B7,U+2219" TYPE BASE END_GLYPH
		# so it is easy to construct a hash:

		($xx = $_) =~ s/ END_GLYPH.*//;	# Remove end line sequence
		$xx =~ s/DEF_GLYPH/NAME/;		# Change DEF_GLYPH to NAME so we get correct structure variables
		$d = {split(' ', $xx)};			# Create GDEF structure
		$d->{'LINE'} = $SrcLine;

		# members of the hash at this point are:
		#	NAME	name of glyph
		#	ID		glyph ID
		#	UNICODE	decimal unicode value (optional), or
		#	UNICODEVALUES comma-separated list of Unicode values in U+nnnn string format
		#	TYPE	one of SIMPLE, MARK, or LIGATURE (optional)
		#	COMPONENTS	number of components in ligature (optional)
		#	LINE		line number from source


		if (not (exists $d->{'NAME'} and exists $d->{'ID'})) {
			MyWarn "Incomprehensible DEF_GLYPH\n", $SrcLine;
			next SRCLOOP;
		}
			
		# Some beta versions of VOLT didn't quote the glyph names, so let's make the quotes optional:
		$d->{'NAME'} =~ s/^\"(.*)\"$/$1/;

		$gname = $d->{'NAME'};
		$gid = $d->{'ID'};

		if (exists $GdefFromID{$gid}) {
			MyWarn "Glyph # $gid defined more than once in source -- second definition ignored\n", $SrcLine; 
			next SRCLOOP;
		};

 		# Coalesce UNICODE or UNICODEVALUES, if present, into @UNICODES array
		if (exists $d->{'UNICODE'}) {
			# Create array with one element:
			$d->{'@UNICODES'} = [ $d->{'UNICODE'} ];		
		} elsif (exists $d->{'UNICODEVALUES'}) {
			# Have to parse comma-separate list such as "U+00AF,U+02C9". But first get rid of quotes:
			$d->{'UNICODEVALUES'} =~ s/^\"(.*)\"$/$1/;
			$d->{'@UNICODES'} = [ map { hex (substr($_,2))} split (",", $d->{'UNICODEVALUES'})]; 
		}

		if ($gname =~ /^glyph\d+$/) {
			# GDEF includes a generic name -- we can only do so much at this point:
			$GdefFromID{$gid} = $d;							# Able to look up by GID
			if (exists $d->{'@UNICODES'}) {					# Able to lookup by Unicode
				foreach $u (@{$d->{'@UNICODES'}}) { $GdefFromGdefUnicode{$u} = $d;}
			}
			delete $d->{'NAME'};		# discard the generic name
			next SRCLOOP;
		}

		if (exists $GdefFromName{$gname}) {
			MyWarn "Glyph '$gname' defined more than once in source -- second definition ignored\n", $SrcLine;
			next SRCLOOP;
		}

		# Finally ... we can save this GDEF information
		$GdefFromID{$gid} = $d;								# Able to look up by GID
		$GdefFromName{$gname} = $d;							# Able to look up non-generic names only
		if (exists $d->{'@UNICODES'}) {						# Able to lookup by Unicode
			foreach (@{$d->{'@UNICODES'}}) { $GdefFromGdefUnicode{$_} = $d;}
		}		
		next SRCLOOP;
	} 

}			

# Write out new font:

# Insert the replacement source into the font
# Create, if it doesn't exist, the VOLT source table we are going to insert
$f->{'TSIV'} = Font::TTF::Table->new (PARENT => $f, NAME => 'TSIV') unless exists $f->{'TSIV'};

# Replace source:
$f->{'TSIV'}->{' dat'} = $out;

# Remove compiled tables if they exist:
for (qw( TSID TSIP TSIS )) { delete $f->{$_} };


$f->out($outFont);

if (defined $outSrc)
{
	# Open output source file:
	open (OUT, ">$outSrc") or die "Couldn't open '$outSrc' for writing.";
	print OUT $out;
	close OUT;
}


$xx = "\nFINISHED. ";
$xx .= ($warningCount > 0 ? $warningCount : "No") . " warning(s) issued. ";
print LOG $xx;
close LOG;

printf "%s%s\n", $xx, ($warningCount > 0) ? " See VoltImportAnchor.log for details." : "";
