% som a self-organizing-map using a circular network topology.
% w = som(m,init_n,dec_n,init_d,Input,File)
% returns w, the final weight vector for each node in a circular ring
% topology, t: the number of iterations and e the squared average distance
% at termination. Given input a p-by-n matrix of input vectors of dimension
% n, the network w is a 2-layer net constructed as a m-by-n matrix such
% that node i's weight vector is w(i,[1-n]). At each presentation of
% an input vector i(l), the winner w(j) is found and the winner and
% neighbors (according to the current distance function) are updated.
% Uses a circular ring topology such that node i is adjacent to node j if
% i = j+1 or i=j-1 and node 1 is adajacent to node m.
%
% NOTE: this version of som expects input to be of unit length and as
% such calculates the winner wj = max(i(l)*w). Furthermore, this som
% normalizes the weight vectors after each presentation such that they
% lie on a unit circle.
%
% Additionally, writes in File (binary format) the weight vectors for
% each time t=0..termination. The File has the following format:
% The header:
% - a 1-by-3 matrix [p,n,m] where p is the number of input vectors, n is
% the dimenionsion of the input vectors and m is the number of output
% nodes.
% - a p-by-n matrix defining the input vectors
% For each t=0..termination
% - a 1-by-5 matrix [t,nt,dt,e,j] where t is the time, nt is the learning
% rate, dt is the distance e is the squared euclidean distance of
% the selected input from the winner, and j is the winning node
% - a m-by-n matrix w, the weight vectors at time t
%
% Input:
% m - the number of nodes (output)
% init_n - the learning rate n at time t=0
% dec_n - amount to decrease n after each epoch
% init_d - the distance d at time t=0
% Input - the input vectors
% File - the name of a file to write the weights at each time to. This
% file will be written in binary.
%
% NOTES:
% - Expects a function dt = distance(t) to be defined on the path or
% in the local directory that determines the distance dt given the
% current time.
% - This SOM algorithm has been left as general as possible but for the
% problem description, (a unit circle), the function norm_circle expects
% 2-dimensional inputs.
% - Under the assumption that the number of nodes in the output layer is
% relatively small, som uses the following weight update rule:
% w = w + nt * (il-w) * nj
% where nj is 1 for a neighbor of winner or zero otherwise so that the
% effect of the rule can be defined as
% wj = wj + nt*(il-wj) if j is a neighbor of winner
% wj = wj otherwise
% By doing so, neighbors of the winner do not have to removed from w
% updated and reinserted. If however, the number of nodes is quite
% large, it may be less computationally expensive to only calculate
% il-w for neighbor nodes.
%
% Author: Dale Patterson
% $Version: 1.4.3 $ $Date: 3.30.06 $
%
function w = som(m,nt,dec_n,dt,Training,File)
% attempt to open the output file
[fid,msg] = fopen(File,'wb');
if fid == -1
error('project2:cannotOpenReportFile',fmsg);
end
% generate the initial weight vectors
% w is a m-by-n matrix such that w(i,[1-n] is the weight vector of node i
% weights are generated randomly in the range [-1,1] and normalized
[p,n] = size(Training); % get # of samples and dimension
w = norm_circle(-1 + 2.*rand(m,n)); % and generate w
% since we're using matrix subtraction, we'll make a multidimensional array
% I of size m-by-n-by-p where each I(:,:,p) is a m-by-n matrix A where for
% each row i (i=1..m) in A equals Training(p,:), that is, each input vector
% is repeated m times
I = ones(m,n,p);
for i=1:p
I(:,:,i) = ones(m,1) * Training(i,:);
end
% write network parameters, input and initial weight vectors
t = 0; % start at time t=0
err = Inf; % and err as infinity
fwrite(fid,[p,n,m]); % as a uchar
fwrite(fid,Training,'double'); % use double for precision
fwrite(fid,[t,nt,dt,err,NaN],'double');
fwrite(fid,w,'double');
% loop till computational bounds are exceeded
% we'll terminate after 120 epochs, for this problem, that means 20 epochs
% at d=0 and n=0.005
while t < p*120
% permutate the input so that there's a different order for each epoch
i = randperm(p); % i is a random permuatation of 1..p, use i to index I
err = 0;
% loop over the input (an epoch)
for l=1:p
% update the time
t = t + 1;
il = I(:,:,i(l)); % select an input sample
% calculate distance, from i to each node
dist = il-w; % distance from i to each node
[v,j] = min(sum(dist.^2,2)); % j: winner index, v: sq. euc. dist.
nj = neighbors(j,m,dt) * ones(1,n);% nj: 1s for neighbors, 0s otherwise
err = err + v; % tally the error
% update weight of winner and neighbors, and normalize
w = norm_circle(w + nt * dist .* nj);
% write period parameters and weight vectors to file
fwrite(fid,[t,nt,dt,v,j],'double');
fwrite(fid,w,'double');
end
% calculate the error;
err = err / p;
% update distance and learning rate
if nt - dec_n > 0
nt = nt - dec_n;
end
dt = distance(t);
end
% clean up
fclose(fid); % and close the file
%%%% END som %%%%
%%%% BEGIN SUB PROCEDURES %%%%
% neighbors calculates the neighbors of a given node in a cirular ring topology.
% n = neighbors(index,numNodes,distance)
% returns n a column vector of 0 or 1, {0=not a neighbor, 1=neighbor}
% for the node at index given a vector of numNodes with the given
% distance using a circular ring topology
function n = neighbors(i,ns,d)
n = zeros(ns,1); % initialize a vector of zeros
n(mod((-d:d)+(i-1),ns)+1) = 1; % and set to 1 each neighbor of i (inclusive 1)