%**************************************************************
%* mex interface to Andy Liaw et al.'s C code (used in R package randomForest)
%* Added by Abhishek Jaiantilal ( [email protected] )
%* License: GPLv2
%* Version: 0.02
%
% Calls Classification Random Forest
% A wrapper matlab file that calls the mex file
% This does training given the data and labels
% Documentation copied from R-packages pdf
% http://cran.r-project.org/web/packages/randomForest/randomForest.pdf
% Tutorial on getting this working in tutorial_ClassRF.m
%**************************************************************
% function model = classRF_train(X,Y,ntree,mtry, extra_options)
%
% ___Options
% requires 2 arguments and the rest 3 are optional
% X: data matrix
% Y: target values
% ntree (optional): number of trees (default is 500). also if set to 0
% will default to 500
% mtry (default is floor(sqrt(size(X,2))) D=number of features in X). also if set to 0
% will default to 500
%
%
% Note: TRUE = 1 and FALSE = 0 below
% extra_options represent a structure containing various misc. options to
% control the RF
% extra_options.replace = 0 or 1 (default is 1) sampling with or without
% replacement
% extra_options.classwt = priors of classes. Here the function first gets
% the labels in ascending order and assumes the
% priors are given in the same order. So if the class
% labels are [-1 1 2] and classwt is [0.1 2 3] then
% there is a 1-1 correspondence. (ascending order of
% class labels). Once this is set the freq of labels in
% train data also affects.
% extra_options.cutoff (Classification only) = A vector of length equal to number of classes. The ?winning?
% class for an observation is the one with the maximum ratio of proportion
% of votes to cutoff. Default is 1/k where k is the number of classes (i.e., majority
% vote wins).
% extra_options.strata = (not yet stable in code) variable that is used for stratified
% sampling. I don't yet know how this works. Disabled
% by default
% extra_options.sampsize = Size(s) of sample to draw. For classification,
% if sampsize is a vector of the length the number of strata, then sampling is stratified by strata,
% and the elements of sampsize indicate the numbers to be
% drawn from the strata.
% extra_options.nodesize = Minimum size of terminal nodes. Setting this number larger causes smaller trees
% to be grown (and thus take less time). Note that the default values are different
% for classification (1) and regression (5).
% extra_options.importance = Should importance of predictors be assessed?
% extra_options.localImp = Should casewise importance measure be computed? (Setting this to TRUE will
% override importance.)
% extra_options.proximity = Should proximity measure among the rows be calculated?
% extra_options.oob_prox = Should proximity be calculated only on 'out-of-bag' data?
% extra_options.do_trace = If set to TRUE, give a more verbose output as randomForest is run. If set to
% some integer, then running output is printed for every
% do_trace trees.
% extra_options.keep_inbag Should an n by ntree matrix be returned that keeps track of which samples are
% 'in-bag' in which trees (but not how many times, if sampling with replacement)
%
% Options eliminated
% corr_bias which happens only for regression ommitted
% norm_votes - always set to return total votes for each class.
%
% ___Returns model which has
% importance = a matrix with nclass + 2 (for classification) or two (for regression) columns.
% For classification, the first nclass columns are the class-specific measures
% computed as mean decrease in accuracy. The nclass + 1st column is the
% mean decrease in accuracy over all classes. The last column is the mean decrease
% in Gini index. For Regression, the first column is the mean decrease in
% accuracy and the second the mean decrease in MSE. If importance=FALSE,
% the last measure is still returned as a vector.
% importanceSD = The ?standard errors? of the permutation-based importance measure. For classification,
% a p by nclass + 1 matrix corresponding to the first nclass + 1
% columns of the importance matrix. For regression, a length p vector.
% localImp = a p by n matrix containing the casewise importance measures, the [i,j] element
% of which is the importance of i-th variable on the j-th case. NULL if
% localImp=FALSE.
% ntree = number of trees grown.
% mtry = number of predictors sampled for spliting at each node.
% votes (classification only) a matrix with one row for each input data point and one
% column for each class, giving the fraction or number of ?votes? from the random
% forest.
% oob_times number of times cases are 'out-of-bag' (and thus used in computing OOB error
% estimate)
% proximity if proximity=TRUE when randomForest is called, a matrix of proximity
% measures among the input (based on the frequency that pairs of data points are
% in the same terminal nodes).
% errtr = first column is OOB Err rate, second is for class 1 and so on
function model = classRF_train(p_train, t_train, ntree, mtry, extra_options)
DEFAULTS_ON = 0;
TRUE = 1;
FALSE = 0;
DEBUG_ON = 0;
orig_labels = sort(unique(t_train));
Y_new = t_train;
new_labels = 1 : length(orig_labels);
for i = 1:length(orig_labels)
Y_new(t_train == orig_labels(i)) = Inf;
Y_new(isinf(Y_new)) = new_labels(i);
end
t_train = Y_new;
if exist('extra_options', 'var')
if isfield(extra_options, 'DEBUG_ON'); DEBUG_ON = extra_options.DEBUG_ON; end
if isfield(extra_options, 'replace'); replace = extra_options.replace; end
if isfield(extra_options, 'classwt'); classwt = extra_options.classwt; end
if isfield(extra_options, 'cutoff'); cutoff = extra_options.cutoff; end
if isfield(extra_options, 'strata'); strata = extra_options.strata; end
if isfield(extra_options, 'sampsize'); sampsize = extra_options.sampsize; end
if isfield(extra_options, 'nodesize'); nodesize = extra_options.nodesize; end
if isfield(extra_options, 'importance'); importance = extra_options.importance; end
if isfield(extra_options, 'localImp'); localImp = extra_options.localImp; end
if isfield(extra_options, 'nPerm'); nPerm = extra_options.nPerm; end
if isfield(extra_options, 'proximity'); proximity = extra_options.proximity; end
if isfield(extra_options, 'oob_prox'); oob_prox = extra_options.oob_prox; end
% if isfield(extra_options, 'norm_votes'); norm_votes = extra_options.norm_votes; end
if isfield(extra_options, 'do_trace'); do_trace = extra_options.do_trace; end
% if isfield(extra_options, 'corr_bias'); corr_bias = extra_options.corr_bias; end
if isfield(extra_options, 'keep_inbag'); keep_inbag = extra_options.keep_inbag; end
end
keep_forest = 1; % always save the trees :)
% set defaults if not already set
if ~exist('DEBUG_ON', 'var'); DEBUG_ON = FALSE; end
if ~exist('replace', 'var'); replace = TRUE; end
% if ~exist('classwt', 'var'); classwt = []; end % will handle these three later
% if ~exist('cutoff', 'var'); cutoff = 1; end
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
Matlab遗传算法(GA)优化随机森林(RF)的分类预测,GA-RF分类预测模型,多输入单输出模型,Matlab完整源码和数据 Matlab遗传算法(GA)优化随机森林(RF)的分类预测,GA-RF分类预测模型,多输入单输出模型 多特征输入单输出的二分类及多分类模型。程序内注释详细,直接替换数据就可以用。程序语言为matlab,程序可出分类效果图,混淆矩阵图。
资源推荐
资源详情
资源评论



























收起资源包目录





















共 20 条
- 1
资源评论

- Y199912012024-04-03这个资源对我启发很大,受益匪浅,学到了很多,谢谢分享~

前程算法屋
- 粉丝: 7321
上传资源 快速赚钱
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助


最新资源
- Java课程设计方案报告-酒店客房管理系统.doc
- 各国强化工业互联网战略标准化成重要切入点.docx
- ANSYS有限元软件建模基础.ppt
- 互联网+对高职学生思想政治教育的影响及其应对探析.docx
- 地铁弱电系统IP网络分配建议方案.docx
- 基于虚拟现实技术的网络会展发展展望.docx
- 数学物理化学生物地理常用软件介绍.doc
- 通信行业发展情况分析-行业集中度整体趋势上行.docx
- 大学设计方案松下FPC型PLC实现交通灯控制大学方案.doc
- 单片机乳化物干燥过程控制系统设计方案.docx
- 物联网工程专业C++程序设计教学改革探索.docx
- 单片机研究分析报告路抢答器.doc
- PLC控制的生活给水泵系统设计.doc
- 非授权移动接入在GSM网络应用中的安全分析.docx
- 2019年二级建造师建设工程项目管理精品小抄.doc
- 《数据库系统》教学设计.doc
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈



安全验证
文档复制为VIP权益,开通VIP直接复制
