# alcune funzioni richiamate sono nel package rmf # che va quindi caricato come tale o come workspace di R library(rmf) # La media campionaria (pag. 187) set.seed(654321) (x <- sample(c(0:9),size=2,replace=TRUE)) mean(x) # pag. 188 set.seed(654321) xx <- sample(c(0:9),size=2*10000,replace=TRUE) X <- matrix(xx, ncol=2) xm <- rowMeans(X) frq(xm) # x <- c(0:9) tbl <- outer(x,x,"+") (tbl <- tbl/2) f <- prop.table(table(tbl)) x <- dimnames(f); x <- as.numeric(unlist(x)) rbind(x,f) # Figura 4.1 (pag. 189) # ------------------------------------------------------------------------------- # (a) ff <- prop.table(table(xm)) xx <- dimnames(ff); xx <- as.numeric(unlist(xx)) plot(xx,as.numeric(ff),type="h",ylab="Frequenza relativa",xlab="Media campionaria") # (b) plot(x,as.numeric(f),type="h",ylab="Probabilità",xlab="Media campionaria") # ------------------------------------------------------------------------------- # pag. 190 set.seed(654321) x <- sample(c(0:9),size=10*10000,replace=TRUE) X <- matrix(x, ncol=10) xm <- rowMeans(X) c(min(xm),max(xm)) set.seed(654321) x <- sample(c(0:9),size=100*10000,replace=TRUE) X <- matrix(x, ncol=100) xm <- rowMeans(X) c(min(xm),max(xm)) set.seed(654321) x <- sample(c(0:9),size=1000*10000,replace=TRUE) X <- matrix(x, ncol=1000) xm <- rowMeans(X) c(min(xm),max(xm)) # pag. 191 set.seed(123456) out <- matrix(nrow=128,ncol=3) for (n in 1:128) { x <- sample(c(0:9),size=n*10000,replace=TRUE) X <- matrix(x, ncol=n) xm <- rowMeans(X) out[n,1] <- n out[n,2] <- mean(xm) out[n,3] <- var(xm) } n <- c(1,2,4,8,16,32,64,128) colnames(out) <- c("n","Media","Varianza") out[n,] k <- out[,1]*out[,3] round(k[n],3) # Figura 4.2 (pag. 192) # ------------------------------------------------------------------------------- par(mfrow=c(2,2)) x <- out[,1]; y <- out[,2] plot(c(0,130),c(4.4,4.6),type="n",ylab="media delle medie campionarie",xlab="dimensione del campione",main="(a)") points(x,y) abline(h=4.5) x <- out[,1]; y <- out[,3] plot(c(0,130),c(0,8.5),type="n",ylab="varianza delle medie campionarie",xlab="dimensione del campione",main="(b)") points(x,y) x <- out[,1]; y <- out[,3] plot(c(20,130),c(0,0.6),type="n",ylab="varianza delle medie campionarie",xlab="dimensione del campione",main="(c)") points(x,y) x <- out[,1]; y <- out[,3]*x plot(c(0,130),c(6.5,10),type="n",ylab="varianza x n",xlab="dimensione del campione",main="(d)") points(x,y) abline(h=8.25) par(mfrow=c(1,1)) # ------------------------------------------------------------------------------- # Figura 4.3 (pag. 194) # ------------------------------------------------------------------------------- plotm <- function(x,p,n,mn,mx,seed=123456,nrep=1000000,col="darkgray") { set.seed(seed) xx <- sample(x,size=n*nrep,prob=p,replace=TRUE) xx <- matrix(xx,ncol=n) xx <- rowMeans(xx) xx <- as.vector(xx) tbl <- table(xx) f <- prop.table(tbl) plot(f, xlim=c(mn,mx), type="h", col=col, axes=FALSE, xlab="", ylab="") } # uniforme (colonna a) x <- c(1:201) p <- rep(1,length(x)) p <- p/sum(p) # plot(x,p,xlim=c(1,201),type="h",col="darkgray", axes=FALSE, xlab="", ylab=""); box(); title("(a)") plotm(x,p,2,1,201) plotm(x,p,5,1,201) plotm(x,p,20,1,201) # bimodale (colonna b) x <- c(1:201) p <- c(100:0,1:100) p <- p/sum(p) # plot(x,p,xlim=c(1,201),type="h",col="darkgray", axes=FALSE, xlab="", ylab=""); box(); title("(b)") plotm(x,p,2,1,201) plotm(x,p,5,1,201) plotm(x,p,20,1,201) # discendente (colonna c) x <- c(1:201) p <- c(201:1) p <- p/sum(p) # plot(x,p,xlim=c(1,201),type="h",col="darkgray", axes=FALSE, xlab="", ylab=""); box(); title("(c)") plotm(x,p,2,1,201) plotm(x,p,5,1,201) plotm(x,p,20,1,201) # ------------------------------------------------------------------------------- # Alcune simulazioni sulla bontà del teorema centrale (pag. 195) simtc <- function(n, nrep=10000) { x <- runif(n*nrep) X <- matrix(x,ncol=n) xm <- apply(X,1,mean) qqn(xm) } # Figura 4.4 (pag. 196) # ------------------------------------------------------------------------------- set.seed(654321) simtc(2) # a simtc(10) # b # ------------------------------------------------------------------------------- # Figura 4.5 (pag. 196) # ------------------------------------------------------------------------------- data(bmi) hist(bmi$bmi); box() # a qqn(bmi$bmi) # b # ------------------------------------------------------------------------------- # Figura 4.6 (pag. 197) # ------------------------------------------------------------------------------- simtc <- function(n, y, nrep=10000) { x <- sample(y,n*nrep,replace=TRUE) X <- matrix(x,ncol=n) xm <- apply(X,1,mean) qqn(xm) } set.seed(654321) simtc(10, bmi$bmi) # a simtc(30, bmi$bmi) # b # ------------------------------------------------------------------------------- # Figura 4.7 (pag. 198) # ------------------------------------------------------------------------------- set.seed(123456) x <- rlnorm(1000) qqn(x) # a qqn(log(x)) # b # ------------------------------------------------------------------------------- # Figura 4.8 (pag. 198) # ------------------------------------------------------------------------------- simtc <- function(n, nrep=10000) { x <- rlnorm(n*nrep) X <- matrix(x,ncol=n) xm <- apply(X,1,mean) qqn(xm) } set.seed(654321) simtc(100) # a simtc(1000) # b # ------------------------------------------------------------------------------- # Figura 4.9 (pag. 199) # ------------------------------------------------------------------------------- set.seed(123456) x <- rexp(1000) qqn(x) # a qqn(log(x)) # b # ------------------------------------------------------------------------------- # Figura 4.10 (pag. 200) # ------------------------------------------------------------------------------- # a simtc <- function(n, nrep=10000) { x <- rexp(n*nrep) X <- matrix(x,ncol=n) xm <- apply(X,1,mean) qqn(xm) } set.seed(654321) simtc(100) # b simtc <- function(n, nrep=10000) { x <- rexp(n*nrep) X <- matrix(log(x),ncol=n) xm <- apply(X,1,mean) qqn(xm) } set.seed(654321) simtc(100) # ------------------------------------------------------------------------------- # Errore stndard e dimensione del campione (pag. 204) n <- c(1:1000) a <- 2*1.96*sqrt(8.25/n) k <- first(2, a, type="LE") cbind(n[(k-1):k],a[(k-1):k]) set.seed(123456) x <- sample(c(0:9),size=32*10000,replace=TRUE) X <- matrix(x, ncol=32) xm <- rowMeans(X) aa <- xm < 3.5; sum(aa) bb <- xm > 5.5; sum(bb) length(xm)-sum(aa)-sum(bb) ProbNorm(da=3.5,a=5.5,mu=4.5,sigma=sqrt(8.25/32)) # La proporzione campionaria (pag. 205) simpc <- function(n,p, nrep=10000) { x <- rbinom(nrep,n,p) xm <- x/n z <- qnorm(0.05, lower.tail=FALSE) # 10% me <- z * sqrt(p*(1-p)/n) print(cbind(p-me, p+me)) print(quantile(xm,probs=c(0.05,0.95),type=2)) z <- qnorm(0.025, lower.tail=FALSE) me <- z * sqrt(p*(1-p)/n) print(cbind(p-me, p+me)) print(quantile(xm,probs=c(0.025,0.975),type=2)) } n <- 10; p <- 0.5 set.seed(654321) simpc(n,p) n <- 50; p <- 0.5 set.seed(654123) simpc(n,p) n <- 100; p <- 0.5 set.seed(456321) simpc(n,p) n <- 50; p <- 0.1 set.seed(654321) simpc(n,p) n <- 100; p <- 0.1 set.seed(654123) simpc(n,p) n <- 1000; p <- 0.01 set.seed(123321) simpc(n,p) # La varianza campionaria (pag. 209) X <- c(0:9) n <- 3 nrep <- 1000000 set.seed(654321) x <- sample(X,n*nrep,replace=TRUE) x <- matrix(x,ncol=3) job <- function(x) ((x[1]-4.5)^2+(x[2]-4.5)^2+(x[3]-4.5)^2)/3 out <- apply(x,1,job) mean(out) X <- c(0:9) n <- 3 nrep <- 1000000 set.seed(654321) x <- sample(X,n*nrep,replace=TRUE) x <- matrix(x,ncol=3) job <- function(x) ((x[1]-mean(x))^2+(x[2]-mean(x))^2+(x[3]-mean(x))^2)/3 out <- apply(x,1,job) mean(out) mean(out)*3/2 # pag. 211 simvcnorm <- function (n,m,s2,nrep){ x <- rnorm(n*nrep,m,sqrt(s2)) X <- matrix(x,ncol=n) y <- apply(X,1,var) u <- y*(n-1)/s2 print(c(mean(y),s2)) print(c(mean(u),var(u))) out <- list(y=y,u=u) } set.seed(456456) ris <- simvcnorm(5,170,100,10000) ris <- simvcnorm(10,170,100,10000) ris <- simvcnorm(50,170,100,10000) ris <- simvcnorm(100,170,100,10000) # Figura 4.12 (pag. 213) # ------------------------------------------------------------------------------- qqchisq <- function (z,df,q=c(1:99)/100){ thq <- qchisq(q,df) obq <- quantile(z,q) plot(thq,obq,xlab="Theoretical Quantiles",ylab="Sample Quantiles") abline(0,1) } set.seed(456456) par(mfrow=c(2,2)) ris <- simvcnorm(5,170,100,10000); qqchisq(ris$u,4) ris <- simvcnorm(10,170,100,10000); qqchisq(ris$u,9) ris <- simvcnorm(50,170,100,10000); qqchisq(ris$u,49) ris <- simvcnorm(100,170,100,10000); qqchisq(ris$u,99) par(mfrow=c(1,1)) # ------------------------------------------------------------------------------- # La mediana campionaria (pag. 215) mc <- function(n,mu,sigma,nrep=100000,seed=123456) { set.seed(seed) x <- rnorm(n*nrep,mu,sigma) X <- matrix(x,ncol=n) y <- apply(X,1,median) c(mean(y),var(y),sigma^2/n) } mc(11,170,10) mc(101,170,10) # pag. 216 mc <- function(n,mu,nrep=100000,seed=123456) { set.seed(seed) x <- rpois(n*nrep,mu) X <- matrix(x,ncol=n) y <- apply(X,1,median) y } z <- mc(11,-log(0.5)) table(z) z <- mc(101,-log(0.5)) table(z) g <- function(l) exp(-l) + l*exp(-l) - 0.5 (mu <- uniroot(g,c(1,2))$root) ppois(1,mu) z <- mc(101,mu) table(z) g <- function(l) ppois(10,l) - 0.5 (mu <- uniroot(g,c(10,11))$root) ppois(10,mu) z <- mc(101,mu) table(z) # L'odds ratio campionario (pag. 217) # Figura 4.13 (pag. 218) # ------------------------------------------------------------------------------- # a n <- 100; pa <- 0.5; pb <- 0.5 nrep <- 100000 set.seed(123456) xa <- rbinom(nrep,n,pa) xb <- rbinom(nrep,n,pb) p1 <- xa/n p2 <- xb/n y <- p1 - p2 c(mean(y),var(y)) y1 <- y xlb <- expression(hat(delta)) hist(y1,breaks=50,xlim=c(-0.3,0.3),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() curve(dnorm(x,0,sqrt(0.005)),lwd=2,add=TRUE) # b n <- 100; pa <- 0.5; pb <- 0.25 nrep <- 100000 set.seed(123456) xa <- rbinom(nrep,n,pa) xb <- rbinom(nrep,n,pb) p1 <- xa/n p2 <- xb/n y <- p1 - p2 c(mean(y),var(y)) y2 <- y xlb <- expression(hat(delta)) hist(y,breaks=50,xlim=c(0,0.5),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() curve(dnorm(x,(pa-pb),sqrt(0.004375)),lwd=2,add=TRUE) # ------------------------------------------------------------------------------- # Figura 4.14 (pag. 218) # ------------------------------------------------------------------------------- n <- 100; pa <- 0.5; pb <- 0.5 nrep <- 100000 set.seed(123456) xa <- rbinom(nrep,n,pa) xb <- rbinom(nrep,n,pb) x <- cbind(xa,xb) nok <- which((xa == 0) | (xa == n)) if (length(nok) > 0) x <- x[-nok,] xb <- x[,2] nok <- which((xb == 0) | (xb == n)) if (length(nok) > 0) x <- x[-nok,] nrow(x) p1 <- x[,1]/n p2 <- x[,2]/n num <- p1/(1 - p1) den <- p2/(1 - p2) y <- num/den table(sign(y - 1)) y1 <- y c(min(y1),max(y1)) # a xlb <- expression(hat(psi)) hist(y1,breaks=18,xlim=c(0.25,3),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() # b ly1 <- log(y) c(min(ly1),max(ly1)) xlb <- expression(log(hat(psi))) hist(ly1,breaks=18,xlim=c(-1.3,1.4),ylim=c(0,1.4),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") box() axis(1) curve(dnorm(x,0,sqrt(0.0817)),lwd=2,add=TRUE) # ------------------------------------------------------------------------------- # pag. 221 ly <- log(y) c(mean(ly),exp(mean(ly))) var(ly) ya <- n - x[,1] yb <- n - x[,2] X <- cbind(x,ya,yb) v <- 1/X[,1] + 1/X[,2] + 1/X[,3] + 1/X[,4] mean(v) # pag. 222 n <- 100; pa <- 0.5; pb <- 0.25 nrep <- 100000 set.seed(123456) xa <- rbinom(nrep,n,pa) xb <- rbinom(nrep,n,pb) x <- cbind(xa,xb) nok <- which((xa == 0) | (xa == n)) if (length(nok) > 0) x <- x[-nok,] xb <- x[,2] nok <- which((xb == 0) | (xb == n)) if (length(nok) > 0) x <- x[-nok,] nrow(x) p1 <- x[,1]/n p2 <- x[,2]/n num <- p1/(1 - p1) den <- p2/(1 - p2) y <- num/den table(sign(y - 1)) table(sign(y - 3)) # Figura 4.15 (pag. 223) # ------------------------------------------------------------------------------- # a y1 <- y xlb <- expression(hat(psi)) hist(y1,breaks=50,xlim=c(0.6,8),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() # b ly1 <- log(y) xlb <- expression(log(hat(psi))) hist(ly1,breaks=50,xlim=c(-0.4,2.7),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() curve(dnorm(x,log(3),sqrt(0.0966)),lwd=2,add=TRUE) # ------------------------------------------------------------------------------- # pag. 223 ly <- log(y) c(mean(ly),exp(mean(ly))) var(ly) ya <- n - x[,1] yb <- n - x[,2] X <- cbind(x,ya,yb) v <- 1/X[,1] + 1/X[,2] + 1/X[,3] + 1/X[,4] mean(v) # Il rischio relativo campionario (pag. 224) # Figura 4.16 (pag. 224) # ------------------------------------------------------------------------------- # a n <- 10; ma <- 8; mb <- 8 nrep <- 100000 set.seed(123456) xa <- rpois(nrep,ma*n) xb <- rpois(nrep,mb*n) x <- cbind(xa,xb)/n m1 <- x[,1] m2 <- x[,2] y <- m1 - m2 c(mean(y),var(y)) # y1 <- y xlb <- expression(hat(delta)) hist(y1,breaks=50,xlim=c(-6,6),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() curve(dnorm(x,0,sqrt(1.6)),lwd=2,add=TRUE) # b n <- 10; ma <- 8; mb <- 4 nrep <- 100000 set.seed(123456) xa <- rpois(nrep,ma*n) xb <- rpois(nrep,mb*n) x <- cbind(xa,xb)/n m1 <- x[,1] m2 <- x[,2] y <- m1 - m2 c(mean(y),var(y)) # y2 <- y xlb <- expression(hat(delta)) hist(y2,breaks=50,xlim=c(-1,9),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() curve(dnorm(x,4,sqrt(1.2)),lwd=2,add=TRUE) # ------------------------------------------------------------------------------- # Figura 4.17 (pag. 226) # ------------------------------------------------------------------------------- n <- 10; ma <- 8; mb <- 8 nrep <- 100000 set.seed(123456) xa <- rpois(nrep,ma*n) xb <- rpois(nrep,mb*n) x <- cbind(xa,xb)/n m1 <- x[,1] m2 <- x[,2] y <- m1/m2 table(sign(y - 1)) # ly <- log(y) c(mean(ly),exp(mean(ly))) var(ly) ta <- ma*n tb <- mb*n v <- 1/ta + 1/tb mean(v) # a y1 <- y xlb <- expression(hat(phi)) hist(y1,breaks=50,xlim=c(0.5,1.8),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() # b ly1 <- log(y) xlb <- expression(log(hat(phi))) hist(ly1,breaks=50,xlim=c(-0.7,0.7),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() curve(dnorm(x,0,sqrt(0.025)),lwd=2,add=TRUE) # ------------------------------------------------------------------------------- # Figura 4.18 (pag. 228) # ------------------------------------------------------------------------------- n <- 10; ma <- 8; mb <- 4 nrep <- 100000 set.seed(123456) xa <- rpois(nrep,ma*n) xb <- rpois(nrep,mb*n) x <- cbind(xa,xb)/n m1 <- x[,1] m2 <- x[,2] y <- m1/m2 table(sign(y - 1)) table(sign(y - 2)) # ly <- log(y) c(mean(ly),exp(mean(ly))) var(ly) ta <- ma*n tb <- mb*n v <- 1/ta + 1/tb mean(v) # a y1 <- y c(min(y1),max(y1)) xlb <- expression(hat(phi)) hist(y1,breaks=50,xlim=c(0.9,4),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() # b ly1 <- log(y) c(min(ly1),max(ly1)) xlb <- expression(log(hat(phi))) hist(ly1,breaks=50,xlim=c(0,1.5),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() curve(dnorm(x,log(2),sqrt(0.0386)),lwd=2,add=TRUE) # ------------------------------------------------------------------------------- # La distribuzione campionaria di un coefficiente di regressione (pag. 230) # modello lineare x <- seq(150,190,5) m <- -85 + 0.88*x # set.seed non è stato innescato e <- rnorm(9,0,10) y <- m + e rbind(x,m,e=round(e,1),y=round(y,1)) fit <- lm(y ~ x) fit$coefficients # a <- -85 b <- 0.880 nrep <- 100000 set.seed(654321) x <- seq(150,190,5) n <- length(x) m <- a + b*x e <- rnorm(nrep*n,0,10) e <- matrix(e,nrow=n) y <- m + e y <- t(y) # fit <- function(y){ x <- seq(150,190,5) fit <- lm(y ~ x) b <- fit$coefficients b[2] } b <- apply(y,1,fit) c(mean(b),var(b)) # Figura 4.19 (pag. 231) # ------------------------------------------------------------------------------- # a xlb <- expression(hat(b)[1]) hist(b,breaks=50,xlim=c(-0.2,2),prob=TRUE,axes=FALSE,ylab="",xlab=xlb,main="") axis(1) box() curve(dnorm(x,0.88,sqrt(1/15)),lwd=2,add=TRUE) # b qqn(b, main="") # ------------------------------------------------------------------------------- # modello logistico y <- c(54,31) x <- c(1,0) n <- c(100,100) fit <- glm(y/n ~ x, family=binomial("logit"), weights=n) c(fit$coefficients[2],log(3726/1426)) # regressione di Poisson y <- c(87,44) x <- c(1,0) n <- c(10,10) fit <- glm(y ~ x, family=poisson("log"), offset=n) c(fit$coefficients[2],log(8.7/4.4)) # Il bootstrap u <- c(1:100) (x <- sample(u,10,replace=TRUE)) # set.seed non è innescato # n <- length(x) (xb <- sample(x,n,replace=TRUE)) # setdiff(x,xb) # Esempio 4.1 (pag. 236) u <- c(1:100) set.seed(123456) (x <- sample(u,10,replace=TRUE)) # B <- 10000 n <- length(x) set.seed(987654) tmp <- sample(x,n*B,replace=TRUE) xb <- matrix(tmp, nrow = B) dim(xb) # m <- apply(xb,1,mean) var(m) # il bootstrap della mediana (pag. 237) u <- c(1:100) n <- 10 nrep <- 100000 set.seed(123456) x <- sample(u,n*nrep,replace=TRUE) x <- matrix(x, nrow=nrep) m <- apply(x,1,median) var(m) # Esempio 4.2 (pag. 237) m <- 60; s <- 3 n <- 101 set.seed(123456) x <- rnorm(n,m,s) # B <- 10000 n <- length(x) set.seed(654321) tmp <- sample(x,n*B,replace=TRUE) xb <- matrix(tmp, nrow = B) dim(xb) # v <- apply(xb,1,var) var(v) # Esempio 4.3 (pag. 238) n <- 400; pa <- 0.5; pb <- 0.25 nrep <- 100000 set.seed(123456) xa <- rbinom(nrep,n,pa) xb <- rbinom(nrep,n,pb) p1 <- xa/n p2 <- xb/n num <- p1/(1 - p1) den <- p2/(1 - p2) y <- num/den var(y) # n <- 400; pa <- 0.5; pb <- 0.25 set.seed(123456) xa <- rbinom(n,1,pa) xb <- rbinom(n,1,pb) p1 <- sum(xa)/n p2 <- sum(xb)/n num <- p1/(1 - p1) den <- p2/(1 - p2) y <- num/den c(sum(xa),sum(xb),y) # B <- 10000 set.seed(123456) na <- length(xa) y <- sample(xa,na*B, replace=TRUE) ya <- matrix(y,nrow=B) nb <- length(xb) y <- sample(xb,nb*B, replace=TRUE) yb <- matrix(y,nrow=B) dim(ya) dim(yb) # pa <- apply(ya,1,sum)/na pb <- apply(yb,1,sum)/nb num <- pa/(1 - pa) den <- pb/(1 - pb) y <- num/den var(y) # Esempio 4.4 (pag. 240) n <- 100; ma <- 8; mb <- 4 nrep <- 100000 set.seed(123456) xa <- rpois(nrep,n*ma) xb <- rpois(nrep,n*mb) y <- xa/xb var(y) # n <- 100; ma <- 8; mb <- 4 set.seed(123456) xa <- rpois(n,ma) xb <- rpois(n,mb) y <- sum(xa)/sum(xb) c(sum(xa),sum(xb),y) # B <- 10000 set.seed(45612) na <- length(xa) y <- sample(xa,na*B, replace=TRUE) ya <- matrix(y,nrow=B) nb <- length(xb) y <- sample(xb,nb*B, replace=TRUE) yb <- matrix(y,nrow=B) dim(ya) dim(yb) # num <- apply(ya,1,mean) den <- apply(yb,1,mean) y <- num/den var(y)