/*
 *	Copyright 1989 by Rayan S. Zachariassen, all rights reserved.
 *	This will be free software, but only when it is finished.
 */

/*
 * Filename expansion (globbing) routines.  The expand() function is called
 * just before pushing an argv onto a command descriptor in the interpreter.
 * As a side-effect, multiple buffers are mashed into one, and word separation
 * using IFS characters is also done here.
 */

#include "hostenv.h"
#include <stdio.h>
#include <sys/file.h>
#include <sys/stat.h>

#ifdef HAVE_DIRENT_H
# include <dirent.h>
#else /* not HAVE_DIRENT_H */
# define dirent direct
# ifdef HAVE_SYS_NDIR_H
#  include <sys/ndir.h>
# endif /* HAVE_SYS_NDIR_H */
# ifdef HAVE_SYS_DIR_H
#  include <sys/dir.h>
# endif /* HAVE_SYS_DIR_H */
# ifdef HAVE_NDIR_H
#  include <ndir.h>
# endif /* HAVE_NDIR_H */
#endif /* HAVE_DIRENT_H */

#include "sh.h"
#include "flags.h"
#include "malloc.h"
#include "listutils.h"
#include "io.h"			/* redefines stdio routines */
#include "shconfig.h"

extern int glut();
extern int interrupted, glob_match();
extern void *tmalloc();

/*
 * For speed we look up magic characters (*, ?, and [) to check whether
 * a word requires filename globbing.  The globchars array contains such flags.
 */

char globchars[CHARSETSIZE];

/*
 * Initialize the array, called from main(), once.
 */

void
glob_init()
{
	globchars['*'] = globchars['?'] = globchars['['] = 1;
	/* see also sJumpIfMatch in interpreter for globchars['|'] */
}

/*
 * Because the result of filename expansion must be presented in alphabetical
 * order, we have to keep the pathnames somewhere temporarily to do a qsort().
 * Since we don't know how many there will be, we maintain a linked list of
 * (struct sawpath), then turn that into an array that we can sort easily.
 */

struct sawpath {
	struct sawpath *next;
	u_char	array[1];	/* is really array[strlen(path)+1] */
};

/*
 * Normally we don't want to descend into symlink'ed directories, but in
 * case you do, this is where you change it.  It is INADVISABLE to use
 * stat() since in that case a super-glob can lead to infinite recursion.
 */

#ifdef	HAVE_LSTAT
extern int lstat();
int (*statfcn)() = lstat;	/* stat if following symlinks, o/w lstat */
#else	/* !HAVE_LSTAT */
extern int stat();
int (*statfcn)() = stat;
#endif	/* HAVE_LSTAT */

/*
 * Qsort() comparison function to sort pathnames.
 */

int
pathcmp(ap, bp)
     const void *ap, *bp;
{
	register const u_char **a = (const u_char **)ap;
	register const u_char **b = (const u_char **)bp;

	if (**a == **b)
		return strcmp((char *)*a, (char *)*b);
	return **a - **b;
}

/* note that | is going to be used instead of / in some pathname examples */

/*
 * Super-glob.  This is exactly like glob except that the sequence: |**|
 * in a pathname will match any number of levels of directories.  Its an
 * old idea of mine so here's a wonderful opportunity to express myself within
 * the confines of sh!  The major thing to note is that quoted characters
 * do not qualify for globbing.  How does one know if a character is quoted?
 * Each character is stored in an int, the u_char value is obtained using
 * the BYTE() macro, and the quotedness is determined by the QUOTEBYTE bit
 * in the int.
 */

#define BYTE(X)		((X)&0xff)
#define	QUOTEBYTE	(1<<15)

STATIC struct conscell *
sglob(ibuf)
	int *ibuf;	/* unglobbed pathname w/ each byte stored as int */
{
	register int i, n;
	register struct conscell *d, *tmp;
	struct sawpath *spp;
	struct conscell cc;
	struct sawpath head;
	u_char	*pwd,		/* points to end of current directory in cwd */
		**base,		/* array of expanded filenames, for sorting */
		cwd[4096];	/* all pathnames are constructed in cwd */
	
	if (BYTE(ibuf[0]) == '/') {
		cwd[0] = '/';
		pwd = cwd+1;
	} else if (BYTE(ibuf[0]) == '.' && BYTE(ibuf[1]) == '/') {
		cwd[0] = '.';
		cwd[1] = '/';
		pwd = cwd+2;
		ibuf += 2;
	} else
		pwd = cwd;

	*pwd = '\0';
	head.next = NULL;
	spp = &head;
	if (BYTE(ibuf[0]) != 0)
		n = glut(cwd, pwd, ibuf, 0, &spp);	/* do expansion */
	else
		return NULL;
	if (n <= 0)
		return NULL;
	base = (u_char **)tmalloc((sizeof (char *))*n);
	i = 0;
	for (spp = head.next; spp != NULL ; spp = spp->next)
		base[i++] = spp->array;
	qsort(base, n, sizeof base[0], pathcmp);
	/* construct a sorted linked list */
	d = &cc;
	for (i = 0; i < n; ++i) {
		cdr(d) = conststring(base[i]);
		d = cdr(d);
		/* printf("saw %s\n", base[i]); */
	}
	cdr(d) = NULL;
	return cdr(&cc);
}

/*
 * Utility function to append a pathname to a linked list for later sorting.
 */

STATIC struct sawpath *
stash(s, len, ps)
	u_char *s;		/* the pathname we want to stash away */
	int len;		/* its length */
	struct sawpath *ps;	/* the previous list element */
{
	register struct sawpath *spp;
	
	spp = (struct sawpath *)tmalloc(sizeof (struct sawpath) + len);
	ps->next = spp;
	spp->next = NULL;
	memcpy(spp->array, s, len);
	spp->array[len] = 0;
	return spp;
}

STATIC int kleene[] = { '*', '/', 0 };	/* foo|**|  becomes foo|**|*| */

/*
 * This routine is the recursive workhorse of the filename globbing.
 */

int
glut(cwd, pwd, bp, recur, swp)
	u_char	*cwd,		/* always the same buffer */
		*pwd;		/* pointer to end of directories/ inside cwd */
	int	*bp,		/* next character in the name to glob */
		recur;		/* flag: superglob mode, deep dir. descend */
	struct sawpath **swp;
{
	int	*start,		/* beginning of simple file name to expand */
		*eoname,	/* end of same */
		*ip,
		havepattern,	/* the simple file name contains glob chars */
		flag,		/* remember to put the trailing / on cwd back */
		count,		/* number of expansions */
		namlen,		/* filename length */
		i;
	struct stat stbuf;
	struct dirent *dp;
	DIR *dirp;
	
	if (interrupted)
		return 0;
	if (pwd > cwd+1) {
		*--pwd = '\0';
		flag = 1;
	} else
		flag = 0;
	/* printf("%s:\n", cwd); */
	/*
	 * must do stat since assuming opendir will
	 * fail on files might not be portable.
	 */
	if ((*cwd == '\0' && statfcn(".", &stbuf) < 0)
	    || (*cwd != '\0' && statfcn(cwd, &stbuf) < 0)
	    || (stbuf.st_mode & S_IFMT) != S_IFDIR)
		return -1;
again:
	while (*bp != '\0' && BYTE(*bp) == '/')
		++bp;
	if (*bp == '\0') {
		if (recur)	/* we're at the end of the road of a |**| */
			bp = kleene;
		else {
			*swp = stash(cwd, pwd-cwd, *swp);
			return 1;
		}
	}
	if (*bp == '*' && *(bp+1) == '*' && BYTE(*(bp+2)) == '/') {
		recur = 1;	/* superglob mode */
		bp += 2;
		if (*(bp+1))
			goto again;
		else
			++bp;	/* start and eoname will point at 0 */
	}
	start = bp;
	while (*bp != '\0' && BYTE(*bp) != '/')
		++bp;
	eoname = bp;

	/*
	 * Now we have a local name between start and eoname, relative to
	 * the directory we have open. search through the directory for
	 * that name, then do the whole thing again, recursively.
	 */
	
	havepattern = count = 0;
	/* optimization... is it worth globbing in inner loop far below? */
	for (ip = start; ip < eoname; ++ip)
		if (*ip == '*' || *ip == '?' || *ip == '[') {
			havepattern = 1;
			break;
		}
	if ((*cwd == '\0' && (dirp = opendir(".")) == (DIR *)0)
	    || (*cwd != '\0' && (dirp = opendir((char *)cwd)) == (DIR *)0)) {
		perror((char *)cwd);
		return 0;
	}
	if (flag) {
		*pwd++ = '/';
		*pwd = '\0';
	}

	/* major loop */

	for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) {

		/* a * won't match .files */
		if (*start == '*' && start == eoname - 1
		    && dp->d_name[0] == '.')
			continue;

		/* if we can't match the simple name, forget it */
		if (!recur && glob_match(start, eoname, dp->d_name) == 0)
			continue;
		strcpy((char *)pwd, dp->d_name);
		namlen = strlen(dp->d_name);
		if (recur && !(dp->d_name[0] == '.' && (dp->d_name[1] == '\0'
			|| (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))) {
			/* in superglob mode... glut returns -1 if is a file */
			i = glut(cwd, pwd+namlen+1, start, recur, swp);
			if (i >= 0) {
				count += i;
				strcpy((char *)pwd, dp->d_name);
				if (*start != 0 && start != kleene)
					continue;
				++count;
				*swp = stash(cwd, (pwd+namlen)-cwd, *swp);
			}
		}
		if (*eoname == 0) {	/* end of the globbable name */
			/* we aren't interested in descending directories */
			if (!recur || glob_match(start, eoname, dp->d_name))
				++count,
				*swp = stash(cwd, (pwd+namlen)-cwd, *swp);
		} else if (!recur) {
			/* we are only interested in directories */
			i = glut(cwd, pwd+namlen+1, eoname, recur, swp);
			count += (i > 0 ? i : 0);
		}
		if (recur || havepattern)
			continue;
		else
			break;
	}
#ifdef	BUGGY_CLOSEDIR
	/*
	 * Major serious bug time here;  some closedir()'s
	 * free dirp before referring to dirp->dd_fd. GRRR.
	 * XX: remove this when bug is eradicated from System V's.
	 */
	close(dirp->dd_fd);
#endif
	closedir(dirp);
	return count;
}

/*
 * This is a heavily recursive globbing function, it uses the string (which
 * is actually an int array as mentioned above) as a DFA that it interprets.
 * This routine should be kept in sync with the case-statement globbing
 * in the interpreter.  Unfortunately the requirements are different enough
 * that sharing code would be hard.
 */

int
glob_match(pattern, eopattern, s)
	register int	*pattern, *eopattern;
	register u_char	*s;
{
	register int i, i2, sense;

	while (eopattern == NULL || pattern < eopattern) {
		switch (*pattern) {
		case '*':
			while (*pattern == '*')
				pattern++;
			do {
				if (glob_match(pattern, eopattern, s))
					return 1;
			} while (*s++ != '\0');
			return 0;
		case '[':
			if (*s == '\0')
				return 0;
			sense = (*(pattern+1) != '!');
			if (!sense)
				++pattern;
			while ((*++pattern != ']') && (*pattern != *s)) {
				if (pattern == eopattern)
					return !sense;
				if (*(pattern+1) == '-'
				    && (i2 = *(pattern+2)) != ']' && i2 != 0) {
					i2 = (i2 < 128) ? i2 : 127;
					for (i = (*pattern)+1; i <= i2; i++)
						if (i == *s) {
							if (sense)
								goto ok;
							else
								return 0;
						}
					pattern += 2;
				}
			}
			if ((*pattern == ']') == sense)
				return 0;
ok:
			while (*pattern++ != ']')
				if (pattern == eopattern)
					return 0;
			s++;
			break;
		case '?':
			if (*s == '\0')
				return 0;
			s++;
			pattern++;
			break;
		case '\0':
			return (*s == '\0');
		default:
			if (BYTE(*pattern++) != *s++)
				return 0;
		}
	}
	return (*s == '\0');
}

/* 
 * Mash the linked list of buffers passed into a single buffer on return.
 */

int
squish(d, bufp, ibufp)
	struct conscell *d;
	u_char **bufp;
	int **ibufp;
{
	register u_char *cp, *bp;
	register int *ip;
	register int sawglob, mask, len;
	register struct conscell *l;
	u_char *buf;
	int *ibuf;

	if ((LIST(d) || ISQUOTED(d)) && cdr(d) == NULL)
		return -1;
	/* how much space will unexpanded concatenation of buffers take? */
	for (l = d, len = 0, sawglob = 0; l != NULL; l = cdr(l)) {
		if (l->string == NULL)
			continue;
		cp = l->string;
		if (!sawglob) {
			while (*cp != '\0') {
				if (globchars[*cp] != 0 &&
				    !(cp == d->string &&
				      *cp == '[' && *(d->string+1) == '\0')) {
					++sawglob;
					break;
				}
				++cp;
			}
		}
		while (*cp != '\0')
			++cp;
		len += cp - l->string;
	}
	/* allocate something large enough to hold integer per char */
	*bufp = buf = (u_char *)tmalloc((len+1)*(sawglob ? sizeof(int) : 1));
	*ibufp = ibuf = (int *)buf;

	/* f option disables filename generation */
	sawglob = sawglob && !isset('f');
	if (!sawglob && cdr(d) == NULL)
		return -1;

	for (l = d, bp = buf, ip = ibuf; l != NULL; l = cdr(l)) {
		if (l->string) {
			if (sawglob) {
				/*
				 * Create int array with quoted characters
				 * marked by the QUOTEBYTE bit.
				 */
				mask = ISQUOTED(l) ? QUOTEBYTE : 0;
				for (cp = l->string; *cp != '\0'; ++cp) {
					if (*cp == '\\' && *(cp+1) != '\0')
						*ip++ = *++cp | QUOTEBYTE;
					else
						*ip++ = *cp | mask;
				}
			} else if (ISQUOTED(l)) {
				for (cp = l->string; *cp != '\0'; ++cp)
					*bp++ = *cp;
			} else {
				for (cp = l->string; *cp != '\0'; ++cp) {
					if (*cp == '\\' && *(cp+1) != '\0')
						++cp;
					*bp++ = *cp;
				}
			}
		}
	}
	if (sawglob)
		*ip = 0;
	else
		*bp = '\0';
	return sawglob;
}

/*
 * Given a buffer list, check all non-quoted non-list portions for
 * file globbing characters and if relevant perform filename expansion.
 * The caller relies on the return value never being NULL.
 */

STATIC struct conscell *
glob(d)
	struct conscell *d;
{
	register u_char *bp;
	register int *ip;
	struct conscell *tmp;
	u_char *buf;
	int *ibuf;

	switch (squish(d, &buf, &ibuf)) {
	case -1:
		return d;
	case 1:
		if ((tmp = sglob(ibuf)) != NULL)
			return tmp;
		for (bp = buf, ip = ibuf; *ip != '\0'; ++ip)
			*bp++ = BYTE(*ip);
		*bp = '\0';
		/* FALLTHROUGH */
	case 0:
		return newstring(buf);
	}
	abort();
	/* NOTREACHED */
	return 0;
}


/*
 * This function is called with the unexpanded buffer contents (usually
 * a linked list of strings) just prior to it being added as a command
 * argument.  Expansion involves scanning unquoted strings for whitespace
 * (as defined by IFS) and breaking those apart into multiple argv's,
 * as well as filename globbing of the resulting unquoted strings.
 * The return value is a list of argv's.
 */

struct conscell *
expand(d)
	register struct conscell *d;
{
	register struct conscell *tmp, *head, *next, *orig;
	struct conscell *globbed, **pav;
	register char *cp;

	/* grindef("EXP = ", d); */
	orig = d;
	pav = &globbed;
	for (head = d; d != NULL; d = next) {
		if (head == NULL)
			head = d;
		next = cdr(d);
		if (LIST(d) || ISQUOTED(d)) {
			continue;
		} else if (ISELEMENT(d)) {
			if (head != d) {
				cdr(head) = NULL;
				*pav = glob(head);
				pav = &cdr(s_last(*pav));
			}
			head = NULL;
			d = copycell(d);
			d->flags &= ~ELEMENT;
			*pav = d;
			pav = &cdr(d);
			continue;
		}
		/* null strings should be retained */
		/* printf("checking '%s'\n", d->string); */
		cp = (char *)d->string;
		if (head == d) {
			/* skip leading whitespace */
			while (*cp != '\0' && WHITESPACE(*cp))
				++cp;
			d->string = (u_char *)cp;
		}
		while (*cp != '\0') {
			if (WHITESPACE(*cp)) {
				/* can do this because stored data was copied */
				*cp++ = '\0';
				cdr(d) = NULL;
				/* wrap the stuff at head into its own argv */
				/* printf("wrapped '%s'\n", d->string); */
				*pav = glob(head);
				pav = &cdr(s_last(*pav));
				/* now find the continuation */
				while (*cp != '\0' && WHITESPACE(*cp))
					++cp;
				if (*cp == '\0') {
					head = NULL;
					break;
				} else {
					head = d = conststring((u_char *)cp);
					cdr(head) = next;
				}
			}
			++cp;
		}
	}
	if (head != NULL) {
		/* printf("trailing '%s'\n", head->string); */
		/* glob is guaranteed to not return NULL */
		*pav = glob(head);
		pav = &cdr(s_last(*pav));
	}
	*pav = NULL;
	if (orig->prev != globbed->prev) {
		s_set_prev(orig->prev, globbed);
		globbed->pflags = orig->pflags;
	}
	return globbed;
}
