function [come, rhoe, wgte, svme] = batch_data (dat, tdim, T, option, dir) %BATCH_DATA Run emphasizing/de-emphasizing test on given data % % BATCH(data, 500, 100, [], 'paper'); % 100 runs of AdaBoost (500 stumps), with all default settings. % % See also BATCH, WGT_CALC, RHO_CALC. % Ling Li, Caltech % $Id: batch_data.m 2078 2005-05-18 05:04:13Z ling $ wgt_range = 0.7; boost_type = 1; % AdaBoost wgt_alpha = 0; % don't use alpha [N, ni] = size(dat); ni = ni - 1; % normalize the data to [-1,1] for i = 1:ni mi = min(dat(:,i)); ma = max(dat(:,i)); if ma == mi, ma = mi+1; end dat(:,i) = (dat(:,i)-mi)/(ma-mi)*2-1; end % options: % for training if nargin < 4, option = []; end opt = options([0.6 2 -0.1 0.15 1.05 2.1 0.05 0.95], option); p_tr = opt(1); nh = ni+opt(2); rthr1 = opt(3); rthr2 = opt(4); wthr1 = opt(5); wthr2 = opt(6); sthr1 = opt(7); sthr2 = opt(8); if nargin < 5, dir = 'batch'; end if length(tdim) == 1, tdim = [1 ni tdim]; % boosting else tdim = [10 tdim]; end % NN with 10 trials rand('state',sum(100*clock)); while 1, prefix = num2str(rand); datf = [dir '/' prefix '.d']; outf = [dir '/' prefix '.o']; if ~fexist(datf), break; end end rhoidx2 = []; wgtidx2 = []; for t = 1:T disp(['Trial ' int2str(t) ':']); % randomly separate the data tr_idx = ~~randerr(1, N, round(p_tr*N)); z = dat(tr_idx, :); save(datf, '-ascii', 'z'); if ~fexist([dir '/nnrhor']) || ~fexist([dir '/nnboostr']) error(['nnrhor or nnboostr doesn''t exist in ' dir '/']); end [r,o] = dos([dir '/nnrhor ' datf ' ' ... num2str([ni nh wgt_range]) ' 100 > ' outf]); rho = rho_calc(load(outf)); rho_n = find(rho < rthr1); % noisy rho_c = find(rthr1 <= rho & rho <= rthr2); % critical [r,o] = dos([dir '/nnboostr ' datf ' ' int2str(ni) ' 0 1000 ' outf]); [wgt,alpha] = wgt_calc(load(outf), boost_type); if wgt_alpha, wgt = alpha'/sum(alpha)*wgt; else wgt = mean(wgt, 1); end wgt_n = find(wgt*N > wthr2); wgt_c = find(wgt*N <= wthr2 & wgt*N >= wthr1); [r,o] = dos(['svm-htlin/rungauss.sh ' datf ' ' outf]); svm = load(outf); svm_n = find(svm < sthr1); % noisy svm_c = find(sthr1 <= svm & svm <= sthr2); % critical tz = dat(~tr_idx, :); % common part: no removal and no emphasize come(t,:) = test_s(z, ones(size(z,1),1), tz, dir, tdim); disp(['normal: ' num2str(come(t,2)*100)]); ws = [0,1,1.5,2,3,5,7]; %ws = unique([1./fliplr(ws), ws]); % weight of special examples for i = 1:length(ws) e = test(z, rho_n, rho_c, ws(i), tz, dir, tdim); idx = ((i-1)*length(e)+1):(i*length(e)); disp(['rho errs (wgt ' sprintf('%0.2f',ws(i)) '): ' ... num2str(e(2:2:end)*100)]); rhoe(t,idx) = e; e = test(z, wgt_n, wgt_c, ws(i), tz, dir, tdim); disp(['ada errs (wgt ' sprintf('%0.2f',ws(i)) '): ' ... num2str(e(2:2:end)*100)]); wgte(t,idx) = e; e = test(z, svm_n, svm_c, ws(i), tz, dir, tdim); disp(['svm errs (wgt ' sprintf('%0.2f',ws(i)) '): ' ... num2str(e(2:2:end)*100)]); svme(t,idx) = e; end end delete(datf); delete(outf); function e = test (z, idx_n, idx_c, w, tz, dir, tdim) % train on z dw = ones(size(z,1), 1); dw(idx_n) = 0; dw(idx_c) = w; %dw(dw==1) = (rand(sum(dw==1),1)>0.5); e4 = test_s(z(dw~=0,:), dw(dw~=0,:), tz, dir, tdim); e = mean(e4, 1); function ee = test_s (z, w, tz, dir, tdim) while 1, prefix = num2str(rand); datf = [dir '/' prefix '.d']; wgtf = [dir '/' prefix '.w']; tstf = [dir '/' prefix '.t']; outf = [dir '/' prefix '.o']; if ~fexist(datf), break; end end assert(outf(1) < '0' || outf(1) > '9'); % nntest needs this w = w / sum(w); save(datf, '-ascii', 'z'); save(wgtf, '-ascii', 'w'); save(tstf, '-ascii', 'tz'); [r,o] = dos([dir '/nntestr ' datf ' ' wgtf ' ' tstf ' ' ... int2str(tdim) ' ' outf]); ee = load(outf); ee(:,1) = ee(:,1) / size(z,1); ee(:,2) = ee(:,2) / size(tz,1); delete(datf); delete(wgtf); delete(tstf); delete(outf);