Skip to contents
adult_data <- mldash::new_dataset(
    name = 'adult',
    type = 'classification',
    description = 'Prediction task is to determine whether a person makes over 50K a year.',
    source = 'https://archive.ics.uci.edu/ml/datasets/Adult',
    dir = 'inst/datasets',
    data = function() {
        destfile <- tempfile()
        download.file("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", destfile)
        df <- read.csv(destfile, header = FALSE)
        names(df) <- c('age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status',
                       'occupation', 'relationship', 'race', 'sex', 'capital_gain', 'captial_loss',
                       'hours_per_week', 'native_country', 'greater_than_50k')
        df$greater_than_50k <- df$greater_than_50k == ' >50K'
        return(df)
    },
    model = greater_than_50k ~ .,
    overwrite = TRUE
)

Results in creating the following file:

name: adult
type: classification
description: Prediction task is to determine whether a person makes over 50K a year.
source: https://archive.ics.uci.edu/ml/datasets/Adult
reference: APA reference for the dataset.
data: function () 
    {
        destfile <- tempfile()
        download.file("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data", 
            destfile)
        df <- read.csv(destfile, header = FALSE)
        names(df) <- c("age", "workclass", "fnlwgt", "education", 
            "education-num", "marital-status", "occupation", "relationship", 
            "race", "sex", "capital-gain", "captial-loss", "hours-per-week", 
            "native-country", "greater_than_50k")
        df$greater_than_50k <- df$greater_than_50k == " >50K"
        return(df)
    }
model: greater_than_50k ~ .
note: