Bioconductor Code: ClassifyR

Raw Blame Patch Log History
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/crissCrossValidate.R
\name{crissCrossValidate}
\alias{crissCrossValidate}
\title{A function to perform pairwise cross validation}
\usage{
crissCrossValidate(
  measurements,
  outcomes,
  nFeatures = 20,
  selectionMethod = "auto",
  selectionOptimisation = "Resubstitution",
  trainType = c("modelTrain", "modelTest"),
  performanceType = "auto",
  doRandomFeatures = FALSE,
  runTOP = FALSE,
  classifier = "auto",
  nFolds = 5,
  nRepeats = 20,
  nCores = 1,
  verbose = 0
)
}
\arguments{
\item{measurements}{A \code{list} of either \code{\link{DataFrame}}, \code{\link{data.frame}} or \code{\link{matrix}} class measurements.}

\item{outcomes}{A \code{list} of vectors that respectively correspond to outcomes of the samples in \code{measurements} list. /
Factors should be coded such that the control class is the first level.}

\item{nFeatures}{The number of features to be used for modelling.}

\item{selectionMethod}{Default: \code{"auto"}. A character keyword of the feature algorithm to be used. If \code{"auto"}, t-test (two categories) /
F-test (three or more categories) ranking and top \code{nFeatures} optimisation is done. Otherwise, the ranking method is per-feature Cox proportional
hazards p-value.}

\item{selectionOptimisation}{A character of "Resubstitution", "Nested CV" or "none" specifying the approach used to optimise nFeatures.}

\item{trainType}{Default: \code{"modelTrain"}. A keyword specifying whether a fully trained model is used to make predictions on the test
set or if only the feature identifiers are chosen using the training data set and a number of training-predictions are made by cross-validation
in the test set.}

\item{performanceType}{Default: \code{"auto"}. If \code{"auto"}, then balanced accuracy for classification or C-index for survival. Otherwise, any one of the
options described in \code{\link{calcPerformance}} may otherwise be specified.}

\item{doRandomFeatures}{Default: \code{FALSE}. Whether to perform random feature selection to establish a baseline performance. Either \code{FALSE} or \code{TRUE}
are permitted values.}

\item{runTOP}{Default: \code{FALSE}. If \code{TRUE}, perform the Transferable Omics Prediction (TOP) procedure in a leave-one-dataset-out manner.}

\item{classifier}{Default: \code{"auto"}. A character keyword of the modelling algorithm to be used. If \code{"auto"}, then a random forest is used
for a classification task or Cox proportional hazards model for a survival task.}

\item{nFolds}{A numeric specifying the number of folds to use for cross-validation.}

\item{nRepeats}{A numeric specifying the number of repeats or permutations to use for cross-validation.}

\item{nCores}{A numeric specifying the number of cores used if the user wants to use parallelisation.}

\item{verbose}{Default: 0. A number between 0 and 3 for the amount of progress messages to give.  A higher number will produce more messages.}
}
\value{
A list with elements \code{"real"} for the matrix of pairwise performance metrics using real
feature selection, \code{"random"} if \code{doRandomFeatures} is \code{TRUE} for metrics of random selection, 
\code{"top"} if \code{runTOP} is \code{TRUE}, and \code{"params"} for a list of parameters used.
}
\description{
This function has been designed to perform cross-validation and model prediction on datasets in a pairwise manner.
}
\author{
Harry Robertson
}