; -*-Emacs-Lisp-*-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; File:         mh-address.el
; RCS:          $Header: /users/darrylo/.repository/mh-e/mh-address.el,v 1.4 1998/07/23 22:31:57 darrylo Exp $
; Description:  Automatic email address harvesting
; Author:       Darryl Okahata
; Created:      Fri May 8 10:00:42 1998
; Modified:     Wed Jul 22 20:58:59 1998 (Darryl Okahata) darrylo@sr.hp.com
; Language:     Emacs-Lisp
; Package:      N/A
; Status:       Experimental
;
; (C) Copyright 1998, Hewlett-Packard, all rights reserved.
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(if (or (not (string-match "XEmacs" emacs-version))
	(not (fboundp 'make-hashtable)))
    (error "Sorry, you must be using XEmacs w/hashtable support!"))

(require 'mh-e)			;; mh-e.el must be loaded before this one


(defvar y-mh-address-book-file (progn
				 (mh-find-path)
				 (concat mh-user-path ".mh-e-address-book"))
  "Filename of the mh-e address book")


(defvar y-mh-address-book-initial-size 4096
  "Initial hash table size for the address book, if hashes are used.
The exact size of this number is not really important, although it's
more efficient if this size is initially larger than the number of
address entries.  If the number of addresses goes beyond this,
it's OK; XEmacs will be a bit more efficient if the numer is less,
though.")


(defvar y-mh-parse-address-function nil
  "Optional function to call to parse \"From:\" addresses.")


(defvar y-mh-scan-address-folders-exclude '(
					    "^[+]drafts"
					    "^[+]junkmail"
					    )
  "List of folders to exclude.
This is a list of regular expressions, which match folder names that
should not be used for email address harvesting.  Basically, address
harvesting is not done in any folder that matches any of these regular
expressions.  This variable is used only if
`y-mh-scan-address-folders-include' is nil.")


(defvar y-mh-scan-address-folders-include nil
  "List of folders to include.
This is a list of regular expressions, which match folder names that
should only be used for email address harvesting.  Basically, address
harvesting is done only on folders whose name matches any of these
regular expressions.  This variable takes precedence over
`y-mh-scan-address-folders-exclude'; if both are non-nil, only this
one is used.  This variable must be nil in order for
`y-mh-scan-address-folders-exclude' to be used.")


(defvar y-mh-suppress-names nil
  "List of names to ignore.
This is a list of regular expressions.  Any harvested name that
matches any of these regular expressions will not be added to the
database.  Note that this is the name, and not the email address (the
name is equivalent to the email alias).")


(defvar y-mh-suppress-address '(
				"^root$"
				"^root@"
				"^mailer-daemon@"
				)
  "List of addresses to ignore.
This is a list of regular expressions.  Any harvested address that
matches any of these regular expressions will not be added to the
database.")


(defvar y-mh-suppress-address-local '(
				      )
  "List of addresses to ignore.
This is like `y-mh-suppress-address', except that this list is
appended to the end of `y-mh-suppress-address' before address
suppression is done.  Basically, the union of `y-mh-suppress-address'
and `y-mh-suppress-address-local' is used for address suppression.")


(defvar y-mh-canonicalize-name-function 'y-mh-canonicalize-name-capitalize
  "Function to \"standardize\" a name.
Generally, the function specified here takes a user's name (alias) and
converts it into some useful standard form (capitalized, or all
lower-case, or all upper-case, etc.).")


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; No user variables past this point.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(defvar y-mh-address-book nil
  "The address book database.")


(defvar y-mh-address-book-modified nil
  "Non-nil if the address book has been modified since it was last restored
or saved.")


(defun y-mh-canonicalize-name-capitalize (name)
  "Return a properly capitalized version of `name'."
  (let ( (case-fold-search nil) )
    (setq name (capitalize name))
    (while (string-match "_\\([a-z]\\)" name)
      (setq letter (upcase (match-string 1 name)))
      (replace-match (concat "_" letter) t t name)
      )
    name
    ))


(defun y-mh-parse-address (addr)
  "Parse the address in the \"From:\" header."
  (let (name address retval)
    (catch 'done
      (cond
       ;; "firstname lastname" <foo@bar.com>
       ( (string-match "\"\\(..*\\)\"[ \t]*<\\(..*\\)>" addr)
	 (setq name (match-string 1 addr)
	       address (match-string 2 addr))
	 )
       ;; somename <foo@bar.com>
       ( (string-match "\\([^ \t].*\\b\\)[ \t]*<\\(..*\\)>" addr)
	 (setq name (match-string 1 addr)
	       address (match-string 2 addr))
	 )
       ;; foo@bar.com (my name)
       ( (string-match "\\([^ \t].*\\b\\)[ \t]*(\\(..*\\))" addr)
	 (setq name (match-string 2 addr)
	       address (match-string 1 addr))
	 )
       ;; Call optional, user-supplied function to get address
       ( (and (boundp 'y-mh-parse-address-function)
	      y-mh-parse-address-function
	      (setq retval (funcall y-mh-parse-address-function addr)))
	 (throw 'done nil)
	 )
       ( t
	 (setq address addr)
	 )
       )

      ;; throw out bad addresses, typically from people who try to hide their
      ;; email address.
      (if (string-match "[][* \t\"()<>]" address)
	  (throw 'done nil))

      (setq retval (cons name address))
      )
    retval
    ))


(defun y-mh-update-address (name address)
  "Update the name/address pair in the database.
This function is the only function allowed to modify the database."
  (let (value)
    (catch 'done
      (progn
	(let ( (case-fold-search t) )

	  (if y-mh-canonicalize-name-function
	      (setq name (funcall y-mh-canonicalize-name-function name)))

	  ;; Don't update names that we want suppressed
	  (mapcar '(lambda (pattern)
		     (if (and (stringp pattern)
			      (string-match pattern name))
			 (throw 'done nil))
		     )
		  y-mh-suppress-names)

	  ;; Don't update addresses that we want suppressed
	  (mapcar '(lambda (pattern)
		     (if (and (stringp pattern)
			      (string-match pattern address))
			 (throw 'done nil))
		     )
		  (append y-mh-suppress-address y-mh-suppress-address-local))

	  )

	(if (setq value (gethash name y-mh-address-book))
	    (progn
	      ;; If the name exists, update the address only if it's different
	      (if (string= (car value) address)
		  (throw 'done nil))
	      )
	  (progn
	    ;; If the name doesn't exist, but the address does, remove the
	    ;; current entry.  This is to handle the case where the name
	    ;; changes.
	    (catch 'foundaddress
	      (maphash '(lambda (itemname itemfoo)
			  (if (string= (car itemfoo) address)
			      (progn
				(remhash itemname y-mh-address-book)
				(throw 'foundaddress nil)
				))
			  )
		       y-mh-address-book)
	      )

;	    (catch 'foundaddress
;	      (maphash '(lambda (itemname itemfoo)
;			  (let ( (address itemfoo) )
;			    (if (string= itemfoo address)
;				(progn
;				  (if (string= name itemname)
;				      (throw 'done nil))
;				  (remhash itemname y-mh-address-book)
;				  (throw 'foundaddress nil)
;				  ))
;			    ))
;		       y-mh-address-book)
;	      )


	    ))
	(puthash name (cons address (current-time)) y-mh-address-book)
	(setq y-mh-address-book-modified t)
	))
    ))


(defun y-mh-scan-address-in-buffer ()
  "Harvest an email address from the current buffer."
  (interactive)
  (let ((begin (point-min))
	(case-fold-search t)
	name
	address
	end)
    (catch 'done
      (save-excursion
	(goto-char begin)
	(if (not (re-search-forward "^[ \t]*$" nil t))
	    (throw 'done nil))
	(setq end (point))
	(goto-char begin)

	;; First, look for "From:"
	(if (re-search-forward "^From:[ \t][ \t]*\\(..*\\)$" end t)
	    (let ( (addr (match-string 1)) item)
	      (setq item (y-mh-parse-address addr))
	      (if (car item)
		  (setq name (car item)))
	      (if (cdr item)
		  (setq address (cdr item)))
	      ))

	;; Next, look for "Reply-To:"
	;; If this is not found, "From:" will take precedence (which was,
	;; hopefully, found).
	(if (re-search-forward "^Reply-To:[ \t][ \t]*\\(..*\\)$" end t)
	    (let (item)
	      (setq item (y-mh-parse-address (match-string 1)))
	      (if (car item)
		  (setq name (car item)))
	      (if (cdr item)
		  (setq address (cdr item)))
	      ))

	;; If a name wasn't found, but an address was, try to extract a name
	;; from the address
	(if (and (not name)
		 address
		 (string-match "^\\([^ \t@]\\{1,\\}\\)@" address))
	    (setq name (match-string 1 address)))

	;; If we have both a name and address, update the database
	(if (and name address)
	    (progn
	      ;; In the name, replace all "funny" characters with an underscore.
	      (while (string-match "\\([^A-Za-z0-9_]\\)" name)
		(setq name (replace-match "_" t t name)))
	      ;; Merge adjacent underscores
	      (while (string-match "\\(_\\{2,\\}\\)" name)
		(setq name (replace-match "_" t t name)))

	      (if y-mh-canonicalize-name-function
		  (setq name (funcall y-mh-canonicalize-name-function name)))

	      ;; Try to update the database.
	      ;; If the name/address is already in the database, the database
	      ;; will not be updated.
	      (if (y-mh-update-address name address)
		  ;; The database was updated, so tell the user about this
		  (message (format "%s: %s" name address)))
	      ))
	))
    ))


(defun y-mh-init-address-book (&optional force-init)
  "Initialize and optionally read in the address book"
  (let (file (tmp-buffer " *y-mh-addr*") )
    ;; If the database hasn't been initialized, or we want to force a
    ;; re-initialization ...
    (if (or (not y-mh-address-book) force-init)
	(progn
	  (setq file (expand-file-name y-mh-address-book-file))
	  (setq y-mh-address-book (make-hashtable y-mh-address-book-initial-size
						  'equal))

	  ;; If a saved database file exists, read it in.
	  (if (file-readable-p file)
	      (save-excursion
		(setq tmp-buffer (set-buffer (get-buffer-create tmp-buffer)))
		(buffer-disable-undo tmp-buffer)
		(erase-buffer)
		(insert-file-contents file)
		(goto-char (point-min))
		(if (condition-case nil
			(let (y-mh-init-address-book-data entry name info
							  addr rest)
			  (eval-buffer)
			  (if (and (boundp 'y-mh-init-address-book-data)
				   y-mh-init-address-book-data)
			      (progn
				(while y-mh-init-address-book-data
				  (setq entry 
					(car y-mh-init-address-book-data))
				  (setq name (car entry))
				  (setq info (cdr entry))
				  (setq addr (car info))
				  (setq rest (car (cdr info)))
				  (setq info (cons addr rest))
				  (puthash name info y-mh-address-book)
				  (setq y-mh-init-address-book-data
					(cdr y-mh-init-address-book-data))
				  )
				))
			  nil
			  )
		      (error t))
		    (progn
		      (message (format "Error occurred while reading \"%s\" (press a key)"
				       file))
		      (ding)
		      (sit-for 60)
		      ))
		(kill-buffer tmp-buffer)
		))
	  ))
    ))


(defun y-mh-dump-address-book-to-buffer (&optional buffer human-readable
						   with-date)
  "Format the address book and write it to a buffer.
If `human-readable' is non-nil, the address book is written in a
human-readable form.  If `with-date' is non-nil, the date associated
with each address (when the address was entered into the database) is
also output."
  (let ()
    (y-mh-init-address-book)
    (if (not buffer)
	(setq buffer (current-buffer)))
    (set-buffer (get-buffer-create buffer))
    (buffer-disable-undo)
    (erase-buffer)
    (if (not human-readable)
	(insert "(setq y-mh-init-address-book-data '(\n"))
    (maphash '(lambda (itemname itemfoo)
		(let (date)
		  (setq date (format-time-string "%D" (cdr itemfoo)))
		  (if human-readable
		      (insert (format "%-32s %s%s\n"
				      itemname (car itemfoo)
				      (if with-date
					  (format "\t(%s)" date)
					"")))
		    (insert (format "  (\"%s\" \"%s\" %s)\n"
				    itemname (car itemfoo) (cdr itemfoo)
				    )))
		  ))
	     y-mh-address-book)
    (if human-readable
	(let ( (sort-fold-case t) )
	  (sort-lines nil (point-min) (point-max)))
      (insert "))\n"))
    ))


;(y-mh-dump-address-book-to-buffer "foo" t t)


(defun y-mh-save-address-book ()
  "Save the MH harvested address book"
  (interactive)
  (let (file (tmp-buffer " *y-mh-addr*"))
    (save-excursion
      (setq tmp-buffer (set-buffer (get-buffer-create tmp-buffer)))
      (buffer-disable-undo tmp-buffer)
      (erase-buffer)
      (y-mh-dump-address-book-to-buffer tmp-buffer nil)
      (setq file (expand-file-name y-mh-address-book-file))
      (condition-case nil
	  (write-file file)
	(progn
	  (message "Unable to save y-mh-e address book -- perhaps the disk is full?")
	  (ding)
	  ))
      (kill-buffer tmp-buffer)
      )
    ))


(defun y-mh-address-book-update-aliases (&optional force-init)
  ""
  (let (l)
    (y-mh-init-address-book force-init)
    (maphash '(lambda (name info)
		(setq l (cons (cons name (car info)) l))
		)
	     y-mh-address-book)
    (setq y-mh-additional-aliases-list l)
    ))


(defun y-mh-scan-address ()
  "Try to harvest an email address from the current message of the current folder."
  (catch 'exit
    (let ( (current-folder mh-current-folder) )

      (if y-mh-scan-address-folders-include
	  (let (found)
	    ;; are we in a folder we want to check?
	    (mapcar '(lambda (folder-regexp)
		       (if (string-match folder-regexp current-folder)
			   (setq found t))
		       )
		    y-mh-scan-address-folders-include)
	    (if (not found)
		(throw 'exit nil))
	    )
	(progn
	  ;; don't update folders that we want to ignore
	  (mapcar '(lambda (folder-regexp)
		     (if (string-match folder-regexp current-folder)
			 (throw 'exit nil))
		     )
		  y-mh-scan-address-folders-exclude)
	  ))

      (y-mh-init-address-book)

      ;; OK, look for an address
      (save-excursion
	(save-window-excursion
	  (set-buffer mh-show-buffer)
	  (y-mh-scan-address-in-buffer)
	  (if y-mh-address-book-modified
	      (progn
		(y-mh-address-book-update-aliases)
		(setq y-mh-address-book-modified nil)
		))
	  ))
      )
    ))


(add-hook 'mh-show-hook 'y-mh-scan-address)
(add-hook 'mh-quit-hook 'y-mh-save-address-book)
(y-mh-address-book-update-aliases t)		;; initialize aliases

(provide 'mh-address)
