library(rmf) setwd("c:/R-dati/") df <- read.delim("studenti.txt",header=TRUE,na.strings=-1) str(df) names(df) df$eta df[18,] sort(df$eta) sort(unique(df$eta)) df[18,4] write.table(df,file="output.txt") tmp <- apply(df,MARGIN=2,FUN=is.na) apply(tmp,MARGIN=2,FUN=sum) Missing(df) table(df$residenza) table(df$residenza,exclude=NULL) tmp <- map(df$residenza,c("TN","Nord","CSI","Estero"),factor=FALSE) table(tmp) table(tmp,df$residenza) df$residenza <- tmp class(df$residenza) table(df$sex) mode(df$sex) class(df$sex) str(df$sex) tmp <- as.character(df$sex) mode(tmp) class(tmp) tmp <- factor(df$residenza) class(tmp) levels(tmp) tmp <- factor(df$residenza,levels=c("TN","Nord","CSI","Estero")) levels(tmp) table(df$eta) tmp <- cutpoints(df$eta,c(19,20,22,24,56),destra=TRUE) table(tmp) class(tmp) df$etacat <- tmp names(df) table(df$anno) class(df$anno) tmp <- factor(df$anno) class(tmp) tmp <- as.numeric(as.character(tmp)) class(tmp) table(tmp) tmp <- df$altezza>170 table(tmp) class(tmp) tmp <- df$residenza=="TN" table(tmp,df$residenza) tmp <- df$residenza!="TN" table(tmp,df$residenza) df$bmi <- df$peso/((df$altezza/100)^2) tmp <- rowMeans(df[,34:45]) summ(tmp) tmp <- rowMeans(df[,34:45],na.rm=TRUE) summ(tmp) which(is.na(tmp)) df[c(142,172),34:45] df$voto <- NULL save(df,file="studenti.rdata") set.seed(123456) giorno <- sample(c(1:28),size=10,replace=TRUE) mese <- sample(c(1:12),size=10,replace=TRUE) anno <- sample(c(1997:2004),size=10,replace=TRUE) cbind(giorno,mese,anno) tmp <- paste(giorno,"/",mese,"/",anno,sep="") dcal <- as.Date(tmp,format="%d/%m/%Y") class(dcal) data.frame(giorno,mese,anno,tmp,dcal) filtro <- df$sex == "F" filtro <- df$sex == "F" & df$residenza == "TN" filtro <- df$residenza == "TN" | df$sex == "F" filtro <- df$residenza != "TN" | df$sex == "F" filtro <- df$eta >= 19 & df$eta <= 25 filtro <- df$peso>= 50 & df$peso <=70 table(filtro,exclude=NULL) tmp <- df[filtro,] dim(df) dim(tmp) tmp <- df[which(filtro),] dim(tmp)