/*********************************
***                            ***
***         HTML v1.3          ***
***         ==== ====          ***
***                            ***
*** (c) 24.1.94 by Andreas Ley ***
***                            ***
*** (u) 30.5.94		       ***
***                            ***
*********************************/

#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <sys/param.h>
#include "html.h"

char	*isindex_attribs[] = { "ACTION" };	/* Not standard */
char	*nextid_attribs[] = { "N" };
char	*link_attribs[] = { "NAME", "HREF", "REL", "REV", "URN", "TITLE", "METHODS" };
char	*base_attribs[] = { "HREF" };
char	*img_attribs[] = { "SRC", "ALIGN",
				"ALT", "ISMAP" };	/* Not standard */
char	*list_attribs[] = { "COMPACT" };
char	*form_attribs[] = { "ACTION", "METHOD", "ENCTYPE" };	/* Not standard */
char	*input_attribs[] = { "TYPE", "NAME", "VALUE", "CHECKED", "SIZE", "MAXLENGTH", "SRC" };	/* Not standard */
char	*select_attribs[] = { "NAME", "SIZE", "MULTIPLE" };	/* Not standard */
char	*option_attribs[] = { "SELECTED", "VALUE" };	/* Not standard */
char	*textarea_attribs[] = { "NAME", "ROWS", "COLS" };	/* Not standard */
char	*pre_attribs[] = { "WIDTH" };

elements_list elements[] = {
		{ "<text>", 0, NULL, EMPTY, FALSE },
		{ "!--", 0, NULL, EMPTY, FALSE },
		{ "SGML", 0, NULL, NONEMPTY, FALSE },
		{ "HTML", 0, NULL, NONEMPTY, FALSE },
		{ "HEAD", 0, NULL, NONEMPTY, FALSE },
		{ "TITLE", 0, NULL, NONEMPTY, FALSE },
		{ "ISINDEX", sizeof(isindex_attribs), isindex_attribs, EMPTY, FALSE },
		{ "NEXTID", sizeof(nextid_attribs), nextid_attribs, EMPTY, FALSE },
		{ "LINK", sizeof(link_attribs), link_attribs, EMPTY, FALSE },
		{ "BASE", sizeof(base_attribs), base_attribs, EMPTY, FALSE },
		{ "EM", 0, NULL, NONEMPTY, FALSE },
		{ "TT", 0, NULL, NONEMPTY, FALSE },
		{ "STRONG", 0, NULL, NONEMPTY, FALSE },
		{ "B", 0, NULL, NONEMPTY, FALSE },
		{ "I", 0, NULL, NONEMPTY, FALSE },
		{ "U", 0, NULL, NONEMPTY, FALSE },
		{ "CODE", 0, NULL, NONEMPTY, FALSE },
		{ "SAMP", 0, NULL, NONEMPTY, FALSE },
		{ "KBD", 0, NULL, NONEMPTY, FALSE },
		{ "KEY", 0, NULL, NONEMPTY, FALSE },
		{ "VAR", 0, NULL, NONEMPTY, FALSE },
		{ "DFN", 0, NULL, NONEMPTY, FALSE },
		{ "CITE", 0, NULL, NONEMPTY, FALSE },
		{ "BODY", 0, NULL, NONEMPTY, FALSE },
		{ "A", sizeof(link_attribs), link_attribs, NONEMPTY, FALSE },
		{ "IMG", sizeof(img_attribs), img_attribs, EMPTY, FALSE },
		{ "P", 0, NULL, EMPTY, TRUE },
		{ "BR", 0, NULL, EMPTY, TRUE },	/* Not standard */
		{ "HR", 0, NULL, EMPTY, TRUE },	/* Not standard */
		{ "H1", 0, NULL, NONEMPTY, TRUE },
		{ "H2", 0, NULL, NONEMPTY, TRUE },
		{ "H3", 0, NULL, NONEMPTY, TRUE },
		{ "H4", 0, NULL, NONEMPTY, TRUE },
		{ "H5", 0, NULL, NONEMPTY, TRUE },
		{ "H6", 0, NULL, NONEMPTY, TRUE },
		{ "DL", sizeof(list_attribs), list_attribs, NONEMPTY, TRUE },
		{ "DT", 0, NULL, EMPTY, TRUE },
		{ "DD", 0, NULL, EMPTY, TRUE },
		{ "UL", sizeof(list_attribs), list_attribs, NONEMPTY, TRUE },
		{ "OL", sizeof(list_attribs), list_attribs, NONEMPTY, TRUE },
		{ "DIR", sizeof(list_attribs), list_attribs, NONEMPTY, TRUE },
		{ "MENU", sizeof(list_attribs), list_attribs, NONEMPTY, TRUE },
		{ "LI", 0, NULL, EMPTY, TRUE },
		{ "BLOCKQUOTE", 0, NULL, NONEMPTY, TRUE },
		{ "ADDRESS", 0, NULL, NONEMPTY, TRUE },
		{ "FORM", sizeof(form_attribs), form_attribs, NONEMPTY, FALSE },	/* Not standard */
		{ "INPUT", sizeof(input_attribs), input_attribs, EMPTY, FALSE },	/* Not standard */
		{ "SELECT", sizeof(select_attribs), select_attribs, NONEMPTY, FALSE },	/* Not standard */
		{ "OPTION", sizeof(option_attribs), option_attribs, EMPTY, TRUE },	/* Not standard */
		{ "PRE", sizeof(pre_attribs), pre_attribs, NONEMPTY, TRUE },
		{ "XMP", 0, NULL, NONEMPTY, TRUE },
		{ "LISTING", 0, NULL, NONEMPTY, TRUE },
		{ "PLAINTEXT", 0, NULL, EMPTY, TRUE },
		{ "TEXTAREA", sizeof(textarea_attribs), textarea_attribs, NONEMPTY, TRUE },	/* Not standard */
	};

#define	TEXT	0
#define	COMMENT	1
#define	TAGS	1


int stricmp(s1,s2)
register char *s1,*s2;
{
  register int	cmp,c;

  while (!(cmp=(c=(*s1++),'a'<=c&&c<='z'?c-('a'-'A'):c)-(c=(*s2++),'a'<=c&&c<='z'?c-('a'-'A'):c))&&c);
  return(cmp);
}



char *strndup(src,len)
char	*src;
size_t	len;
{
	char	*tmp;

	tmp=malloc(len+1);	/* for free see strndup */
	strncpy(tmp,src,len);
	tmp[len]='\0';
	return(tmp);
}



int element_type(elem)
char	*elem;
{
	int	cnt;

	for (cnt=TAGS;cnt<sizeof(elements)/sizeof(elements_list);cnt++)
		if (!stricmp(elements[cnt].name,elem))
			return(cnt);
	return (-1);
}


int attrib_type(elem,attr)
int	elem;
char	*attr;
{
	int	cnt;

	for (cnt=0;cnt<elements[elem].attribs/sizeof(char *);cnt++)
		if (!stricmp(elements[elem].attrib[cnt],attr))
			return(cnt);
	return (-1);
}



element *new_element(type)
int	type;
{
	element	*new;
	int	cnt;

	new=(element *)malloc(sizeof(element));	/* for free see new_element */
	new->next=NULL;
	new->parent=NULL;
	new->type=type;
	new->attrib=(char **)malloc(elements[type].attribs);	/* for free see new_element */
	for (cnt=0;cnt<elements[type].attribs/sizeof(char *);cnt++)
		new->attrib[cnt]=NULL;
	new->content=NULL;
	new->open=elements[type].bracket!=EMPTY;
	return(new);
}



void parsetext(current,text,ln)
element	**current;
char	*text;
int	ln;
{
	element	*eptr;
	int	pre;
	char	*ptr,*nptr;

	/* Are we in a <PRE> environment (or similar)? */
	pre=element_type("PRE");
	for(eptr=(*current)->open?*current:(*current)->parent;eptr;eptr=eptr->parent) {
		if (eptr->type>=pre)
			break;
	}
	/* No <PRE>, so float text */
	if (!eptr) {
		nptr=ptr=text;
		while (*nptr=*ptr++)
			if (strchr(" \t\r\n",*nptr)) { /* Multiple whitespace gives a single space */
				if (nptr==text||nptr[-1]!=' ')
					*nptr++=' ';
				}
			else
				nptr++;
	}

	if (*text) {
		if ((*current)->open) {
			(*current)->content=new_element(TEXT);	/* for free see parsetext */
			(*current)->content->parent=(*current);
			(*current)=(*current)->content;
		}
		else {
			(*current)->next=new_element(TEXT);	/* for free see parsetext */
			(*current)->next->parent=(*current)->parent;
			(*current)=(*current)->next;
		}
		(*current)->content=(element *)strdup(text);	/* for free see parsetext */
	}
}



void parseitem(current,tag,ln)
element	**current;
char	*tag;
int	ln;
{
	char	c,*ptr,*nptr,*attr,*val;
	int	type,cnt;

	ptr=strchr(tag,' ');
	if (ptr)
		*ptr++='\0';

	/* Opening or closing tag? */

	if (*tag!='/') {
		/* Opening tag */
		type=element_type(tag);
		if (type<0)
			(void)fprintf(stderr,"line %d, tag <%s> illegal\n",ln,tag);
		else {
			if (type!=COMMENT) {
				if ((*current)->open) {
					(*current)->content=new_element(type);	/* for free see parseitem */
					(*current)->content->parent=(*current);
					(*current)=(*current)->content;
				}
				else {
					(*current)->next=new_element(type);	/* for free see parseitem */
					(*current)->next->parent=(*current)->parent;
					(*current)=(*current)->next;
				}

				/* Has tag any attributes? */
				if (ptr) {
					/* Still more attributes? */
					while (*ptr) {
						/* Skip whitespace */
						while (*ptr==' ')
							ptr++;
						/* Start of attribute */
						attr=ptr;
						/* Look for value */
						while (*ptr&&*ptr!=' '&&*ptr!='=')
							ptr++;
						/* End of attribute */
						nptr=ptr;
						while (*ptr==' ')
							ptr++;
						if (*ptr=='=') {
							*nptr='\0';
							/* value */
							ptr++;
							/* Skip whitespace */
							while (*ptr==' ')
								ptr++;
							/* Remember starting character */
							c=' ';
							if (*ptr=='\"'||*ptr=='\'')
								c=*ptr++;
							/* Start of value */
							val=ptr;
							/* Look for ending character */
							while (*ptr&&*ptr!=c)
								ptr++;
							/* Found ending character? */
							if (*ptr)
								*ptr++='\0';
							/* Check and store attribute value */
							cnt=attrib_type(type,attr);
							if (cnt<0)
								(void)fprintf(stderr,"line %d, attribute %s illegal for tag <%s>\n",ln,attr,tag);
							else
								if (!((*current)->attrib[cnt]))
									(*current)->attrib[cnt]=strdup(val);	/* for free see parseitem */
						}
						else {
							/* No value */
							*nptr='\0';
							cnt=attrib_type(type,attr);
							if (cnt<0)
								(void)fprintf(stderr,"line %d, attribute %s illegal for tag <%s>\n",ln,attr,tag);
							else
								if (!(*current)->attrib[cnt])
									(*current)->attrib[cnt]=NOVALUE;
						}
					}
				}
			}
		}
	}
	else {
		/* Closing tag */
		tag++;
		type=element_type(tag);
		if (type<0)
			(void)fprintf(stderr,"line %d, tag </%s> illegal\n",ln,tag);
		else
			if (elements[type].bracket==EMPTY)
				(void)fprintf(stderr,"line %d, closing tag </%s> illegal: tag is empty\n",ln,tag);
			else {
				while ((*current)->type!=type&&(*current)->parent)
					(*current)=(*current)->parent;
				(*current)->open=FALSE;
			}
	}
}



element *file2parse(file)
int	file;
{
	element			base,*current;
	char			c,*ptr,*tmp;
	int			ln,cln;
	enum {OPEN,CLOSE}	mode=CLOSE;
	static size_t		linesize=0;
	static char		*line=NULL;

	base.next=NULL;
	base.parent=NULL;
	base.open=FALSE;
	current=&base;

	cln=ln=1;
	ptr=line;
	while (read(file,&c,1)) {
		if (c=='\n')
			ln++;
		if (c=='\r')
			continue;
		if (ptr-line>=linesize) {
			linesize+=1024;
			tmp=malloc(linesize);	/* freed upon next growth */
			if (line) {
				(void)memcpy((void*)tmp,(void*)line,(size_t)(ptr-line));
				ptr=tmp+(ptr-line);
				free((void*)line);
			}
			line=tmp;
		}
		if (c=='<'&&mode==CLOSE) {
			mode=OPEN;
			if (ptr>line) {
				*ptr='\0';
				parsetext(&current,line,cln);	/* for free see file2parse */
			}
			ptr=line;
			cln=ln;
		}
		else if (c=='>'&&mode==OPEN) {
			mode=CLOSE;
			if (ptr>line) {
				*ptr='\0';
				parseitem(&current,line,cln);	/* for free see file2parse */
			}
			ptr=line;
			cln=ln;
		}
		else
			if ((c=='\t'||c=='\n')&&mode==OPEN)
				*ptr++=' ';
			else
				*ptr++=c;
	}
	return(base.next);
}



element *mem2parse(data,len)
char	*data;
int	len;
{
	element			base,*current;
	char			c,*ptr,*tmp;
	int			ln,cln;
	enum {OPEN,CLOSE}	mode=CLOSE;
	static size_t		linesize=0;
	static char		*line=NULL;

	base.next=NULL;
	base.parent=NULL;
	base.open=FALSE;
	current=&base;

	cln=ln=1;
	ptr=line;
	while (len--) {
		c=*data++;
		if (c=='\n')
			ln++;
		if (ptr-line>=linesize) {
			linesize+=1024;
			tmp=malloc(linesize);	/* freed upon next growth */
			if (line) {
				(void)memcpy((void*)tmp,(void*)line,(size_t)(ptr-line));
				ptr=tmp+(ptr-line);
				free((void*)line);
			}
			else
				ptr=tmp;
			line=tmp;
		}
		if (c=='<'&&mode==CLOSE) {
			mode=OPEN;
			if (ptr>line) {
				*ptr='\0';
				parsetext(&current,line,cln);	/* for free see mem2parse */
			}
			ptr=line;
			cln=ln;
		}
		else if (c=='>'&&mode==OPEN) {
			mode=CLOSE;
			if (ptr>line) {
				*ptr='\0';
				parseitem(&current,line,cln);	/* for free see mem2parse */
			}
			ptr=line;
			cln=ln;
		}
		else
			if ((c=='\t'||c=='\n')&&mode==OPEN)
				*ptr++=' ';
			else
				*ptr++=c;
	}

	return(base.next);
}



int defport(scheme)
char	*scheme;
{
	if (!strcmp(scheme,"file")||!strcmp(scheme,"ftp"))
		return(21);
	if (!strcmp(scheme,"telnet"))
		return(23);
	if (!strcmp(scheme,"mailto")||!strcmp(scheme,"smtp"))
		return(25);
	if (!strcmp(scheme,"wais"))
		return(210);
	if (!strcmp(scheme,"gopher"))
		return(70);
	if (!strcmp(scheme,"http"))
		return(80);
	if (!strcmp(scheme,"news")||!strcmp(scheme,"nntp"))
		return(119);
}



void txt2url(dest,src,txt)
url	*dest;
url	*src;
char	*txt;
{
	char	path[MAXPATHLEN],*tptr,*ptr,*nptr;
	int	port;

#ifdef DEBUG
(void)fprintf(stderr,"txt2url(0x%08x,\"%s://%s:%d%s#%s\",\"%s\")\n",dest,src->scheme,src->host,src->port,src->path,src->anchor,txt);
#endif

	tptr=txt;
	ptr=tptr;	/* Default port may be changed by scheme */
	port=src->port;
	while (*ptr&&*ptr!=':'&&*ptr!='/')
		ptr++;

	/* : means we've got a scheme in front */
	if (*ptr==':') {
		dest->scheme=strndup(tptr,ptr-tptr);	/* for free see txt2url */
		port=defport(dest->scheme);
		while (*ptr==':')
			ptr++;
		tptr=ptr;
	}
	else
		dest->scheme=strdup(src->scheme);	/* for free see txt2url */
#ifdef DEBUG
(void)fprintf(stderr,"dest->scheme=\"%s\"\n",dest->scheme);
#endif

	/* // means host in front */
	if (ptr==tptr&&ptr[0]=='/'&&ptr[1]=='/') {
		ptr+=2;
		while (*ptr&&*ptr!=':'&&*ptr!='/')
			ptr++;
		dest->host=strndup(tptr+2,ptr-tptr-2);	/* for free see txt2url */
		/* : means we've got a port appended */
		if (*ptr==':') {
			while (*ptr==':')
				ptr++;
			dest->port=atoi(ptr);
		}
		else
			dest->port=port;
		while (*ptr&&*ptr!='/')
			ptr++;
		while (ptr[1]=='/')
			ptr++;
		tptr=ptr;
	}
	else {
		dest->host=strdup(src->host);	/* for free see txt2url */
		dest->port=port;
	}
#ifdef DEBUG
(void)fprintf(stderr,"dest->host=\"%s\"\n",dest->host);
(void)fprintf(stderr,"dest->port=%d\n",dest->port);
#endif
		

	ptr=tptr;
	while (*ptr&&*ptr!='#')
		ptr++;

	/* # means we've got an anchor appended */
	if (*ptr=='#')
		dest->anchor=strdup(ptr+1);	/* for free see txt2url */
	else
		dest->anchor=NULL;
#ifdef DEBUG
(void)fprintf(stderr,"dest->anchor=\"%s\"\n",dest->anchor);
(void)fprintf(stderr,"tptr=\"%s\"\n",tptr);
(void)fprintf(stderr,"ptr=\"%s\"\n",ptr);
#endif

	if (strcmp(dest->scheme,"news")) {
		/* Missing path with scheme or host or port defined gives / */
		if (!*tptr&&tptr>txt)
			(void)strcpy(path,"/");
		else
			/* Leading / means full path */
			if (*tptr=='/') {
				(void)strncpy(path,tptr,ptr-tptr);
				path[ptr-tptr]='\0';
			}
			else {
				(void)strcpy(path,src->path);
				if (ptr>tptr) {
					nptr=strrchr(path,'/')+1;
					(void)strncpy(nptr,tptr,ptr-tptr);
					nptr[ptr-tptr]='\0';
				}
			}
#ifdef DEBUG
(void)fprintf(stderr,"path=\"%s\"\n",path);
#endif
		ptr=path;
		nptr=ptr;
		while (*ptr) {
			if (*ptr=='/') {
				if (nptr>path&&nptr[-1]=='/')
					ptr++;
				else
					if (nptr>path+3&&nptr[-3]=='/'&&nptr[-2]=='.'&&nptr[-1]=='.') {
						nptr-=3;
						while (nptr>path&&nptr[-1]!='/')
							nptr--;
						ptr++;
					}
					else
						*nptr++=*ptr++;
			}
			else
				*nptr++=*ptr++;
		}
		*nptr='\0';
		dest->path=strdup(path);	/* for free see txt2url */
	}
	else
		dest->path=strdup(tptr);	/* for free see txt2url */
#ifdef DEBUG
(void)fprintf(stderr,"dest->path=\"%s\"\n",dest->path);
(void)fprintf(stderr,"dest=\"%s://%s:%d%s#%s\"\n",dest->scheme,dest->host,dest->port,dest->path,dest->anchor);
#endif
}



void url2txt(dest,src,anchor_flag)
char	*dest;
url	*src;
int	anchor_flag;
{
	char	port[6];

	(void)sprintf(port,":%d",src->port);
	if (strcmp(src->scheme,"news")) {
		(void)sprintf(dest,"%s://%s%s%s",src->scheme,src->host,src->port==defport(src->scheme)?"":port,src->path);
		if (anchor_flag&&src->anchor) {
			strcat(dest,"#");
			strcat(dest,src->anchor);
		}
	}
	else
		/*
		(void)sprintf(dest,"%s://%s%s%s",src->scheme,src->host,src->port==defport(src->scheme(?"":port,src->path);
		*/
		(void)sprintf(dest,"%s:%s",src->scheme,src->path);
}



void url2fname(dest,src)
char	*dest;
url	*src;
{
	char	port[6];

	(void)sprintf(port,":%d",src->port);
	(void)sprintf(dest,"%s%s%s%s",src->host,src->port==defport(src->scheme)?"":port,src->path,src->path[strlen(src->path)-1]!='/'?"":"index.html");
}


/*
** Change an URL to a relative one. Coming from src and following dest you'll
** reach link. If anchor_flag is set, a specific NAME anchor in the destination
** document will be pointed to.
*/
void url2rel(dest,src,link,anchor_flag)
char	*dest;
url	*src;
url	*link;
int	anchor_flag;
{
	char	port[6],*ptr,*sptr,*nptr;

	(void)sprintf(port,":%d",link->port);

	/*
	** We don't have a filesystem syntax for different schemes yet,
	** so we hope directory trees for different schemes don't mix.
	** Since FTP usually uses /pub and gopher uses /11 and lookalikes,
	** there won't be much trouble. If there is, we'll have to get into
	** the trouble of creating a new toplevel directory specyfying the
	** scheme. Since this violates current practice, it is delayed.
	if (strcmp(src->scheme,link->scheme)) {
	}
	else 
	*/

	if (strcmp(src->scheme,link->scheme)||
	strcmp(src->host,link->host)||src->port!=link->port) {
		sptr=src->path;
		while (nptr=strchr(sptr,'/')) {
			strcpy(dest,"../");
			dest+=3;
			sptr=nptr+1;
		}
		strcpy(dest,link->host);
		if (link->port!=defport(link->scheme))
			strcat(dest,port);
		strcat(dest,link->path);
		if (anchor_flag&&link->anchor) {
			strcat(dest,"#");
			strcat(dest,link->anchor);
		}
	}

	else if (strcmp(src->path,link->path)) {
		ptr=link->path;
		while ((nptr=strchr(ptr,'/'))&&!strncmp(link->path,src->path,nptr-link->path))
			ptr=nptr+1;
		
		sptr=src->path+(ptr-link->path);
		while (nptr=strchr(sptr,'/')) {
			strcpy(dest,"../");
			dest+=3;
			sptr=nptr+1;
		}
		strcpy(dest,ptr);
		if (anchor_flag&&link->anchor) {
			strcat(dest,"#");
			strcat(dest,link->anchor);
		}
	}

	else if (anchor_flag&&link->anchor) {
		strcpy(dest,"#");
		strcat(dest,link->anchor);
	}

	else
		strcpy(dest,"");
}



void freeparse(ptr)
element	*ptr;
{
	int	cnt;
	element	*tmp;

	while(ptr) {
		if (ptr->attrib) {
			for (cnt=0;cnt<elements[ptr->type].attribs/sizeof(char *);cnt++)
				if (ptr->attrib[cnt]&&ptr->attrib[cnt]!=NOVALUE)
						free(ptr->attrib[cnt]);
			free(ptr->attrib);
		}

		if (ptr->content)
			if (ptr->type)
				freeparse(ptr->content);
			else
				free(ptr->content);

		tmp=ptr;
		ptr=ptr->next;
		free(tmp);
	}
}



void freeurl(ptr)
url	*ptr;
{
	if (ptr->scheme)
		free(ptr->scheme);
	if (ptr->host)
		free(ptr->host);
	if (ptr->path)
		free(ptr->path);
	if (ptr->anchor)
		free(ptr->anchor);
}
