#------------------------------------ # AAK: Sun 9 Aug 2014 23:38:09 PDT # Extracting data from a data frame #------------------------------------- # Reading a CSV file data <- read.csv("mydata.csv") cat("The data has the names:\n") print(names(data)) cat("The actual data is:\n") print(data) # R has built-in data sets, loading the library library(datasets) # airquality is one of them: data <- airquality # Getting rid of all NA values: # First creating a logical index goods <- complete.cases(data) # Using logical indexing: datag <- data[goods,] cat("Good data is:\n") print(datag) # Extracting all NA that occur in first column "Ozone" datawithbadozone <- data[is.na(data$Ozone),] cat("Data with NA Ozone is:\n") print(datawithbadozone) # Extracting all available Ozone values and ones with higher # Temperature than 80 datagoodozhigh80 <- data[!is.na(data$Ozone) & data$Temp >80,] cat("Data with avail Ozone and Temp > 80 is:\n") print(datagoodozhigh80) # Finding the mean of Ozone on the Month 8 after eliminating NA vals (na.rm=TRUE) meanval <- mean(data[data$Month ==8,]$Ozone,na.rm=TRUE) cat("Mean of Ozone on month 8 after eliminating NA vals is:",meanval)