## code to accompany Lecture 3 of ## Statistical computing with R by ## Robert B. Gramacy ## so the code will wrap appropriately in the slides options(width=45) ## curly braces optional for single expressions ## in the body of a function f <- function(x,y) x + y f <- function(x,y) { x + y } ## default arguments f(1,2) g <- function(x, y=10) { x + y } g(1) ## missing argument f(1) ## overriding the default value g(1,2) ## optional arguments h <- function(x, y) { args <- as.list(match.call()) if(is.null(args\$y)) { y <- 10 } x + y } h(1) h(1,2) ## try/not in slides ## ellipses v <- sqrt(1:100) f <- function(x, ...) { print(x) summary(...) } f("Summarising v:", v, digits=2) ## reading variable-length arguments addemup <- function(x, ...) { args <- list(...) for(a in args) x <- x + a x } addemup(1, 2, 3, 4, 5) ## using return f <- function(a, b) { if(a < b) return(a) b } f(1,2) ## sapply: functions as arguments a <- 1:7 sapply(a, sqrt) ## this is a little silly since sqrt is already ## vectorized sqrt(a) ## try/not in code ## a less silly example mylist <- list(a=1:7, b=21:25, c=101:111) sapply(mylist, length) length(mylist) ## anonymous functions apply.to.three <- function(f) { f(3) } apply.to.three(function(x) { x * 7 }) ## sapply with anonymous functions v <- 1:20 w <- NULL for(i in 1:length(v)) { w[i] <- v[i]^2 } w ## try/not in slides w <- sapply(v, function(i) { i^2 }) w ## try/not in slides ## try/not in slides v <- 1:1e+6 w <- apply(v, function(i) { i^2 })) w <- v^2 ## way faster, more later ## function information and manipilation f <- function(x, y=1, z=2) { x + y + z } ff <- formals(f) ff ## try/not in code ff\$y <- 3 formals(f) <- ff f ## try/not in code body(f) <- expression({ x * y * z }) f ## function argument referral addTheLog <- function(first, second) { first + log(second) } addTheLog(1, exp(4)) addTheLog(second=exp(4), first=1) addTheLog(s=exp(4), f=1) ## side effects with <<- x doesnt.assign.x <- function(i) x <- i doesnt.assign.x(4) x assigns.x <- function(i) x <<- i assigns.x(4) x ## Recursion fib1 <- function(n, verb=0) { if(verb > 0) cat("called fact1(", n, ")\n", sep="") if(n == 1 || n == 2) return(1) else return(fib1(n-1, verb) + fib1(n-2, verb)) } fib1(5) fib1(5, verb=1) ## prime searching with Sieve of Eratosthenes ## siev: sorted vector of sieved numbers ## unsiev: sorted vector of unsieved numbers ## v: verbosity level primesieve <- function(siev, unsiev, v=0) { if(v > 0) { cat("sieved:", siev, "\n") cat("unsieved:", unsiev, "\n"); } p <- unsiev[1] n <- unsiev[length(unsiev)] if(p^2 > n) return(c(siev, unsiev)) else { unsiev <- unsiev[unsiev %% p != 0] siev <- c(siev, p) return(primesieve(siev, unsiev, v)) } } primesieve(c(), 2:200) primesieve(c(), 2:200, 1) ## try/not in code ## density of primes n <- 10000 ps <- primesieve(c(), 2:n) primes <- rep(0, n-1) primes[ps-1] <- 1 density <- cumsum(primes)/(2:n) ## plot on next slide plot(2:n, density, type="l", main="prime density", xlab="n", ylab="density", lwd=2) lines(2:n, 1/log(2:n), col="red") plot(2:n, density*log(2:n), type="l", xlab="n", ylab="", main="prime density * log(n)", lwd=2) ## try with a much larger n to see convergence, ## which is slow ## preallocation ## slow way seq1 <- function(n) { F <- c(1,1) for(i in 3:n) F[i] <- F[i-1]/F[i-2] + F[i-2] F } system.time(s1 <- seq1(100000)) ## much faster seq2 <- function(n) { F <- rep(NA, n) F[1:2] <- 1 for(i in 3:n) F[i] <- F[i-1]/F[i-2] + F[i-2] F } system.time(s2 <- seq2(100000)) ## vectorization s2 <- function(n) { S <- 0 for(i in 1:n) S <- S + i^2 S } system.time(s2(1000000)) system.time(sum((1:1000000)^2)) ## of course, we can always use n(n+1)(2n+1)/6 :) ## big matrix for column sums bigM <- matrix(runif(1e+07), nrow=1000) ## with a double-for loop csfor <- function(M) { cs <- rep(NA, ncol(M)) for(i in 1:ncol(M)) { s <- 0 for(j in 1:nrow(M)) s <- s + M[j, i] cs[i] <- s } } system.time(cs1 <- csfor(bigM)) ## with apply or a single loop system.time(cs2 <- apply(bigM, 2, sum)) system.time({ cs3 <- rep(NA, ncol(bigM)) for(i in 1:ncol(bigM)) cs3[i] <- sum(bigM[,i]) }) ## with colSums system.time(cs4 <- colSums(bigM)) ## matrix/vector products on the cars data X <- cbind(1, as.matrix(trees[,1:2])) y <- trees[,3] XtX <- t(X) %*% X XtXi <- solve(XtX) XtXiXt <- XtXi %*% t(X) XtXiXt %*% y ## or beta.hat <- solve(t(X) %*% X) %*% t(X) %*% y beta.hat ## double-checking against LM coef(lm(y~X[,1]+X[,2])) ## new S3 class myobj <- list(v=sample((1:20)^2), m="squares") class(myobj) <- "myclass" myobj ## new S3 method print.myclass <- function(x, ...) { cat("This is a myclass object containing\n") cat("a", class(x\$v), "vector ") cat("with", length(x\$v), x\$m, "\n") } myobj ## how does it work print ## the formals must match names(formals(print)) ## you can still inspect the contents names(myobj) myobj\$v ## plot generic method plot.myclass <- function(x, sf=TRUE, ...) { if(sf) plot(sort(x\$v), ylab=x\$m, ...) else plot(x\$v, ylab=x\$m, ...) } plot(myobj, main="Parabola!") ## summary generic method summary.myclass <- function(x, ...) { cat("Your", x\$m, "are summarized as\n") summary(x\$v) } summary(myobj) ## view methods methods(plot) methods(class="myclass") ## our own new generic method penultimate <- function(x) UseMethod("penultimate") penultimate.myclass <- function(x) { sort(x\$v)[length(x\$v)-1] } penultimate(myobj) penultimate(myobj\$v) ## now for numeric verctors, leveraging coersion penultimate.numeric <- function(x) { sort(x)[length(x)-1] } penultimate(myobj\$v) M <- matrix(1:10, nrow=2) penultimate(M) ## moved bisection code to bisection.R source("bisection.R") ## test it on a function f <- function(x) log(x) - exp(-x) fr <- bisection(f, 1, 2) fr <- bisection(f, 1, 2, verb=1) ## try/not in slides fr ## now make a printing method print.bisection <- function(x, ...) { cat("Root of:\n") print(x\$f) cat("in (", x\$prog\$xl[1], ", ", x\$prog\$xr[1], ") found after ", nrow(x\$prog), " iterations: ", x\$ans, "\n", "to a tolerance of ", x\$tol, "\n", sep="") } ## try it fr ## and a summary method summary.bisection <- function(object, ...) { print(object, ...) cat("\nProgress is as follows\n") print(object\$prog) } ## try it summary(fr) ## and a plotting method; gridlen specifies the grid on which ## the function is plotted for comparison, and after provides ## the ability to zoom in plot.bisection <- function(object, gridlen=1000, after=NULL, ...) { ## check if there is something to do if(nrow(object\$prog) == 0) { cat("nothing to plot\n") invisible(NULL) } ## check after if(!is.null(after)) { if(after < 0 || after >= nrow(object\$prog)) stop("after must be in 1:(nrow(objext\$prog)-1)") object\$prog <- object\$prog[after:nrow(object\$prog),] } else after <- 0 ## plots the true function xgrid <- seq(object\$prog\$xl[1], object\$prog\$xr[1], length=gridlen) xgrid <- sort(union(xgrid, c(object\$prog\$xl, object\$prog\$xr))) y <- object\$f(xgrid) ## assumes a vectorized function plot(xgrid, y, type="l", col="gray", ...) ## indicates (only) the changes in xl and xr n <- nrow(object\$prog) dl <- duplicated(object\$prog\$xl) text(object\$prog\$xl[!dl], object\$prog\$fl[!dl], ((1:n)+after)[!dl]) dr <- duplicated(object\$prog\$xr) text(object\$prog\$xr[!dr], object\$prog\$fr[!dr], ((1:n)+after)[!dr], col=2, ) ## adds a reference line abline(h=0, col=3, lty=3) } ## try it plot(fr, main="bisection progress", ylab="f(x)", xlab="x") plot(fr, main="bisection progress [zoom]", after=20, ylab="f(x)", xlab="x") plot(fr, main="bisection progress [zoom]", after=25, ylab="f(x)", xlab="x")