;; ;; Put this file in the Extras directory in the directory in ;; which you start Arc. ;; Source: http://www.stat.umn.edu/arc ;; ;;; ;;; Methods to allow for simple cross-validation ;;; June 26, 1999 ;;; Modified (bug-fix) January 12, 2000 ;;; Cleanup: January 31, 2002 ;;; ;;; Tested for GLIMs but should be OK for nonlinear models, too. (defmeth dataset-proto :cross-validation (&key validation-set (fraction .50) remove) "Method args: (&key validation-set (fraction .33) new remove) This method sets up cross validation. A validation set is selected, either according to the subscripts given in the keyword validation-set, or else selected at random with probability fraction if validation-set is not set. All models will be fit based only on those cases not in the validation-set. For cases wtihout missing values in the validation set the deviance or other summaries will be computed and displayed. Calling this method a second time creates a new validation set." (cond (remove (send self :delete-slot 'cross-validation) (send self :toggle-cases) nil) (t (send self :add-slot 'cross-validation t) (send self :add-slot 'validation-set (if validation-set validation-set (select (order (uniform-rand (send self :num-cases))) (iseq (floor (* fraction (send self :num-cases))))))) (let* ((inc (send self :included))) (setf (select inc (slot-value 'validation-set)) nil) (send self :included inc) t)))) (defmeth dataset-proto :cv (&rest args) "Method args: (&rest args) alias for :cross-validation." (apply #'send self :cross-validation args)) ;; ;; override for display methods to add to the standard output ;; This could conflict with other methods also try to add output ;; (defmeth glim-proto :display (&rest args) (apply #'send self :display-estimates args) (when (send self :data) (when (send (send self :data) :has-slot 'cross-validation) (send self :display-cross-validation)))) (defmeth normalreg-proto :display (&rest args) (cond ((eq (send self :link) identity-link) (call-method regression-model-proto :display) (when (send self :data) (when (send (send self :data) :has-slot 'cross-validation) (send self :display-cross-validation)))) (t (call-next-method :anova nil)))) (defmeth nonlin-model-proto :display () (send self :display-estimates :t-value "Wald test" :r-squared nil) (send self :display-anova) (print-termination-reason (slot-value 'termination-reason)) (when (send self :data) (when (send (send self :data) :has-slot 'cross-validation) (send self :display-cross-validation)))) ;; ;; This method computes the cross-validation statsitics that are to be ;; displayed. To get other statistics, simply rewrite this method ;; and the display methods below. ;; (defmeth regression-model-proto :cross-validation-statistics () "Method args: () Computes the deviance and sample size for the fully-observed cases in the validation set for cross validation." (let* ((dataset (send self :data)) (val-set (send dataset :slot-value 'validation-set)) (devs (select (send self :cv-values) val-set)) (obs (find-obs devs))) (if (> (length obs) 0) (list :deviance (sum (select devs obs)) :df (length obs))))) (defmeth glim-proto :cv-values () (send self :set-deviances)) (defmeth nonlin-model-proto :cv-values () (^ (send self :residuals) 2)) (defmeth regression-model-proto :display-cross-validation () "Method args: () Displays the deviance and sample size for the fully-observed cases in the validation set for cross validation." (let* ((ans (send self :cross-validation-statistics))) (when ans (apply #'send self :display-cross-validation1 ans)))) (defmeth glim-proto :display-cross-validation1 (&key deviance df) (format t "~%Cross validation summary:~%") (format t "Deviance:~24t~13a~%" (optfmt deviance)) (format t "Mean Deviance:~24t~13a~%" (optfmt (/ deviance df))) (format t "Number of observations:~24t~8d~%" df)) (defmeth normalreg-proto :display-cross-validation1 (&rest args) (apply #'call-method regression-model-proto :display-cross-validation1 args)) (defmeth regression-model-proto :display-cross-validation1 (&key deviance df) (format t "~%Cross validation summary of cases not used to get estimates:~%") (format t "Sum of squared deviations:~30t~13a~%" (optfmt deviance)) (format t "Mean squared deviation:~30t~13a~%" (optfmt (/ deviance df))) (format t "Sqrt(mean squared deviation):~30t~13a~%" (optfmt (sqrt (/ deviance df)))) (format t "Number of observations:~30t~8d~%" df)) (defmeth regression-model-proto :press () "Method args: () Computes the Predicted Residual Sum of Squares for a regression model." (sum (^ (select (/ (send self :residuals) (- 1 (send self :leverages))) (which (send self :included))) 2))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; interface stuff ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun cv-dialog (&optional (frac .5)) "Gets necessary information to setup cross validation." (let* ((title (send text-item-proto :new (format nil "Setup cross-validation... "))) (fraction (send text-item-proto :new "Fraction of cases in the validation sample")) (whichcases (send text-item-proto :new "List of cases in the validation sample (overrides fraction)")) (get-fraction (send edit-text-item-proto :new (format nil "~a" frac) :text-length (apply #'ms *text-width-short*))) (get-cases (send edit-text-item-proto :new "" :text-length (apply #'ms *text-width-long*))) (remove (send toggle-item-proto :new "Remove cross-validation" :value nil)) (cancel (send modal-button-proto :new "Cancel")) (get-answer #'(lambda () (let ((cases (eval (send get-cases :value)))) (setf cases (if (listp cases) cases nil)) (list :fraction (send get-fraction :value) :validation-set cases :remove (send remove :value))))) (ok (send modal-button-proto :new "OK" :action #'(lambda () (funcall get-answer)))) (help (send modal-button-proto :new "Help" :action #'(lambda () (cv-dialog-help frac)))) (dialog (send modal-dialog-proto :new (list (list title) (list fraction) (list get-fraction) (list whichcases) (list get-cases) (list remove) (list ok cancel help))))) (send dialog :default-button ok) (send dialog :modal-dialog))) (defun cv-dialog-help (fraction) (big-message-dialog "Cross-validation is used to divide the data into to parts. Estimates are computed using one part called the construction set, and fit statistics are computed for both the construction set and the remaining validation set. In this dialog, set the fraction equal to the fraction of the data to be put into the validation set (selected at random). If you wish, you can specify the validation set expicitly by typing a list of case indices like '(1 2 3 4 5) or (iseq 0 99) or if c evaluates to a list of indices, you can type c." "If you check remove, then cross-validation is ended and all cases are restored. Choosing an explicit validation set has priority over choosing a fraction, and choosing remove has priority over the other two. You can select this item repeatedly, each time getting a different cross-validation sample." ) (cv-dialog fraction)) (rc-menu-item 'cv-item "Cross-validation..." :start-cv) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; The next method does not work because cv is set up as a regression ;;;; menu item, but it should be a dataset menu item. (defun cv-menu-item () (let* ((in (position 'cv-item *arc-dataset-menu-items*))) (unless in (defparameter *arc-dataset-menu-items* (combine *arc-dataset-menu-items* 'cv-item))))) (cv-menu-item) (defmeth dataset-proto :start-cv () (let* ((ans (cv-dialog))) (when ans (apply #'send self :cv ans))))