Commit 4f11ce96 authored by Antoine Lucas's avatar Antoine Lucas
Browse files

more comment on C++ code

parent 21e3d1e8
Mon Sep 31 2007: 0.8-2
* add kendall distance.
* change afc man page.
Mon Sep 31 2007: 0.8-1
* add parameter labels to plot.acp
* use matrix instead of data.frame in internal pca data.
* remove #include <pthread.h> when built with windows.
* k-means hclust and dist use common functions for distance computation
* k-means possible with spearman distance
Sat Sep 29 2007: 0.8
* clustering possible with float precision; use of templates
* suppression of duplicated code(used for no thread / multiple thread)
......
Package: amap
Version: 0.8
Date: 2007-09-29
Version: 0.8-2
Date: 2007-10-03
Suggests: Biobase
Title: Another Multidimensional Analysis Package
Author: Antoine Lucas
......
#-------------------------------------------------------
#
# Created : 29/10/02
# Last Modified : Time-stamp: <2005-11-14 22:09:16 antoine>
# Last Modified : Time-stamp: <2007-10-02 19:07:26 antoine>
#
# Description : Principal component analysis
#
......@@ -13,7 +13,6 @@
#-------------------------------------------------------
acp <- function(x,center=TRUE,reduce=TRUE,wI=rep(1,nrow(x)),wV=rep(1,ncol(x)))
{
x <- as.matrix(x)
......@@ -33,8 +32,8 @@ acp <- function(x,center=TRUE,reduce=TRUE,wI=rep(1,nrow(x)),wV=rep(1,ncol(x)))
scores <- x %*% V
V <- as.matrix(V)
scores <- as.matrix(scores)
V <- as.matrix(Re(V))
scores <- as.matrix(Re(scores))
dimnames(V)[[2]] <- paste("Comp",1:dim(x)[2])
if(!is.null( dimnames(x)[[2]] ))
......
......@@ -17,3 +17,21 @@ month = "October",
pages = "237-252",
}
@ARTICLE{mpetitjean,
author = "M. Petitjean",
title = "Agr\'egation des similarit\'es: une solution oubli\'ee.",
journal = "RAIRO Oper. Res.",
year = 2002,
volume = 36,
number=1,
pages = "101-108",
}
@BOOK{R:writtingRExt,
author = {R core},
title = {Writing R Extensions},
publisher = {Unknown},
year = 2007,
address = {Unknown},
abstract = {covers how to create your own packages, write R help files, and the foreign language (C, C++, Fortran, ...) interfaces.}
}
\ No newline at end of file
......@@ -17,6 +17,7 @@ afc(x)
}
\examples{
\dontrun{
color <- as.factor(c('blue','red','red','blue','red'))
size <- as.factor(c('large','large','small','medium','large'))
x <- data.frame(color,size)
......@@ -26,7 +27,7 @@ afc.2 <- afc(matlogic(x))
plotAll(afc.1)
plotAll(afc.2)
}
}
\keyword{multivariate}
......
......@@ -55,8 +55,35 @@
#define MIN( A , B ) ( ( A ) < ( B ) ? ( A ) : ( B ) )
/** \brief Distance euclidean (i.e. sqrt(sum of square) )
*/
// ---------------------------------------------------------
// Distance euclidean (i.e. sqrt(sum of square) )
//
// Euclidean distance between 2 vectors a,b is
// d = sqrt[ sum_i (a_i - b_i)^2 ]
//
// This function compute distance between 2 vectors x[i1,] & y[i2,]
// x and y are matrix; we use here only line i1 from x and
// line i2 from y. Number of column (nc) is the same in x and y,
// number of column can differ (nr_x, nr_y).
//
// Flag will be set to 0 if NA value computed in distance
//
// When call by function distance or hclust, x and y are the same; it computes
// distance between vector x[i1,] and x[i2,]
//
// \param x matrix of size nr_x * nc; line i1 is of interest
// \param y matrix of size nr_y * nc; line i1 is of interest
// \param nr_x number of row in matrix x
// \param nr_y number of row in matrix y
// \param nc number of column in matrix x or y
// \param i1 row choosen in matrix x
// \param i2 row choosen in matrix y
// \param flag set to 0 if NA value computed in distance
// \param opt: unused
//
// Return: distance value
//
// ---------------------------------------------------------
template<class T> T distance_T<T>::R_euclidean(double * x, double * y , int nr_x, int nr_y, int nc,
int i1, int i2,
int * flag, T_tri & opt)
......@@ -75,7 +102,7 @@ template<class T> T distance_T<T>::R_euclidean(double * x, double * y , int nr_
i1 += nr_x;
i2 += nr_y;
}
if(count == 0)
if(count == 0) // NA for all j:
{
*flag = 0;
return NA_REAL;
......@@ -85,8 +112,36 @@ template<class T> T distance_T<T>::R_euclidean(double * x, double * y , int nr_
return sqrt(dist);
}
/** \brief Distance maximum (supremum norm)
*/
// ---------------------------------------------------------
//
// Distance maximum (supremum norm)
//
// Maximum distance between 2 vectors a,b is
// d = max |ai - bi |
//
// This function compute distance between 2 vectors x[i1,] & y[i2,]
// x and y are matrix; we use here only line i1 from x and
// line i2 from y. Number of column (nc) is the same in x and y,
// number of column can differ (nr_x, nr_y).
//
// Flag will be set to 0 if NA value computed in distance
//
// When call by function distance or hclust, x and y are the same; it computes
// distance between vector x[i1,] and x[i2,]
//
// \param x matrix of size nr_x * nc; line i1 is of interest
// \param y matrix of size nr_y * nc; line i1 is of interest
// \param nr_x number of row in matrix x
// \param nr_y number of row in matrix y
// \param nc number of column in matrix x or y
// \param i1 row choosen in matrix x
// \param i2 row choosen in matrix y
// \param flag set to 0 if NA value computed in distance
// \param opt: unused
//
// Return: distance value
//
// ---------------------------------------------------------
template<class T> T distance_T<T>::R_maximum(double * x, double * y , int nr_x, int nr_y, int nc,
int i1, int i2,
int * flag, T_tri & opt)
......@@ -114,8 +169,36 @@ template<class T> T distance_T<T>::R_maximum(double * x, double * y , int nr_x,
return dist;
}
/** \brief Distance manhattan
*/
// ---------------------------------------------------------
// Distance manhattan (i.e. sum of abs difference )
//
// manhattan distance between 2 vectors a,b is
// d = sum_i |a_i - b_i |
//
// This function compute distance between 2 vectors x[i1,] & y[i2,]
// x and y are matrix; we use here only line i1 from x and
// line i2 from y. Number of column (nc) is the same in x and y,
// number of column can differ (nr_x, nr_y).
//
// Flag will be set to 0 if NA value computed in distance
//
// When call by function distance or hclust, x and y are the same; it computes
// distance between vector x[i1,] and x[i2,]
//
// \param x matrix of size nr_x * nc; line i1 is of interest
// \param y matrix of size nr_y * nc; line i1 is of interest
// \param nr_x number of row in matrix x
// \param nr_y number of row in matrix y
// \param nc number of column in matrix x or y
// \param i1 row choosen in matrix x
// \param i2 row choosen in matrix y
// \param flag set to 0 if NA value computed in distance
// \param opt: unused
//
// Return: distance value
//
// ---------------------------------------------------------
template<class T> T distance_T<T>::R_manhattan(double * x, double * y , int nr_x, int nr_y, int nc,
int i1, int i2,
int * flag, T_tri & opt)
......@@ -142,8 +225,35 @@ template<class T> T distance_T<T>::R_manhattan(double * x, double * y , int nr_
return dist;
}
/** \brief Distance canberra
*/
// ---------------------------------------------------------
// Distance Camberra
//
// Camberra distance between 2 vectors a,b is
// d = sum_i | a_i - b_i | / | a_i + b_i |
//
// This function compute distance between 2 vectors x[i1,] & y[i2,]
// x and y are matrix; we use here only line i1 from x and
// line i2 from y. Number of column (nc) is the same in x and y,
// number of column can differ (nr_x, nr_y).
//
// Flag will be set to 0 if NA value computed in distance
//
// When call by function distance or hclust, x and y are the same; it computes
// distance between vector x[i1,] and x[i2,]
//
// \param x matrix of size nr_x * nc; line i1 is of interest
// \param y matrix of size nr_y * nc; line i1 is of interest
// \param nr_x number of row in matrix x
// \param nr_y number of row in matrix y
// \param nc number of column in matrix x or y
// \param i1 row choosen in matrix x
// \param i2 row choosen in matrix y
// \param flag set to 0 if NA value computed in distance
// \param opt: unused
//
// Return: distance value
//
// ---------------------------------------------------------
template<class T> T distance_T<T>::R_canberra(double * x, double * y , int nr_x, int nr_y, int nc,
int i1, int i2,
int * flag, T_tri & opt)
......
......@@ -12,6 +12,7 @@ template<class T> class distance_T
enum { EUCLIDEAN=1, MAXIMUM, MANHATTAN, CANBERRA, BINARY ,PEARSON, CORRELATION, SPEARMAN,
KENDALL};
struct T_tri
{
double * data_tri_x;
......@@ -25,6 +26,7 @@ template<class T> class distance_T
private:
/** \brief arguments sent to distance thread */
struct T_argument
{
short int id;
......@@ -77,24 +79,151 @@ template<class T> class distance_T
static void* thread_dist(void* arguments);
/** \brief Distance euclidean (i.e. sqrt(sum of square) )
*
* Euclidean distance between 2 vectors a,b is
* \f[ d = \sqrt{ \sum_i^n (a_i - b_i)^2}
* \f]
*
* When NA values found for a_i or b_i, both a_i and b_i are
* skipped. Number of values skipped is couned (#NA in next formula)
*
* \f[ d = \sqrt{\frac{n}{#NA} \sum_{i=1; a_i \in \Re; b_i \in \Re}^n (a_i - b_i)^2}
* \f]
*
* This function compute distance between 2 vectors x[i1,] & y[i2,]
* x and y are matrix; we use here only line i1 from x and
* line i2 from y. Number of column (nc) is the same in x and y,
* number of column can differ (nr_x, nr_y).
*
* Flag will be set to 0 if too many NA to compute distance
*
* When call by function distance or hclust, x and y are the same; it computes
* distance between vector x[i1,] and x[i2,]
*
* \param x matrix of size nr_x * nc; line i1 is of interest
* \param y matrix of size nr_y * nc; line i1 is of interest
* \param nr_x number of row in matrix x
* \param nr_y number of row in matrix y
* \param nc number of column in matrix x or y
* \param i1 row choosen in matrix x
* \param i2 row choosen in matrix y
* \param flag set to 0 if NA value computed in distance
* \param opt unused
*
* \return distance value
*
*/
static T R_euclidean(double * x, double * y , int nr_x, int nr_y, int nc,
int i1, int i2,
int * flag, T_tri & opt);
/** \brief Distance maximum (supremum norm)
*
* Maximum distance between 2 vectors a,b is
* \f[ d = \max_i |a_i - b_i|
* \f]
*
* NA values are omitted.
*
* This function compute distance between 2 vectors x[i1,] & y[i2,]
* x and y are matrix; we use here only line i1 from x and
* line i2 from y. Number of column (nc) is the same in x and y,
* number of column can differ (nr_x, nr_y).
*
* Flag will be set to 0 if too many NA to compute distance
*
* When call by function distance or hclust, x and y are the same; it computes
* distance between vector x[i1,] and x[i2,]
*
* \param x matrix of size nr_x * nc; line i1 is of interest
* \param y matrix of size nr_y * nc; line i1 is of interest
* \param nr_x number of row in matrix x
* \param nr_y number of row in matrix y
* \param nc number of column in matrix x or y
* \param i1 row choosen in matrix x
* \param i2 row choosen in matrix y
* \param flag set to 0 if NA value computed in distance
* \param opt unused
*
* \return distance value
*
*/
static T R_maximum(double * x, double * y , int nr_x, int nr_y, int nc,
int i1, int i2,
int * flag, T_tri & opt);
/** \brief Distance manhattan
/** \brief manhattan (i.e. sum of abs difference )
*
* manhattan distance between 2 vectors a,b is
* \f[ d = \sum_i^n |a_i - b_i|
* \f]
*
* When NA values found for a_i or b_i, both a_i and b_i are
* skipped. Number of values skipped is couned (#NA in next formula)
*
* \f[ d = \frac{n}{#NA} \sum_{i=1; a_i \in \Re; b_i \in \Re}^n |a_i - b_i|}
* \f]
*
* This function compute distance between 2 vectors x[i1,] & y[i2,]
* x and y are matrix; we use here only line i1 from x and
* line i2 from y. Number of column (nc) is the same in x and y,
* number of column can differ (nr_x, nr_y).
*
* Flag will be set to 0 if too many NA to compute distance
*
* When call by function distance or hclust, x and y are the same; it computes
* distance between vector x[i1,] and x[i2,]
*
* \param x matrix of size nr_x * nc; line i1 is of interest
* \param y matrix of size nr_y * nc; line i1 is of interest
* \param nr_x number of row in matrix x
* \param nr_y number of row in matrix y
* \param nc number of column in matrix x or y
* \param i1 row choosen in matrix x
* \param i2 row choosen in matrix y
* \param flag set to 0 if NA value computed in distance
* \param opt unused
*
* \return distance value
*
*/
static T R_manhattan(double * x, double * y , int nr_x, int nr_y, int nc,
int i1, int i2,
int * flag, T_tri & opt);
/** \brief Distance canberra
/** \brief Camberra distance
*
* Camberra distance between 2 vectors a,b is
* \f[ d = \sum_i^n |a_i - b_i| / |a_i + b_i|
* \f]
*
* When NA values found for a_i or b_i, both a_i and b_i are
* skipped. Number of values skipped is couned (#NA in next formula)
*
*
* This function compute distance between 2 vectors x[i1,] & y[i2,]
* x and y are matrix; we use here only line i1 from x and
* line i2 from y. Number of column (nc) is the same in x and y,
* number of column can differ (nr_x, nr_y).
*
* Flag will be set to 0 if too many NA to compute distance
*
* When call by function distance or hclust, x and y are the same; it computes
* distance between vector x[i1,] and x[i2,]
*
* \param x matrix of size nr_x * nc; line i1 is of interest
* \param y matrix of size nr_y * nc; line i1 is of interest
* \param nr_x number of row in matrix x
* \param nr_y number of row in matrix y
* \param nc number of column in matrix x or y
* \param i1 row choosen in matrix x
* \param i2 row choosen in matrix y
* \param flag set to 0 if NA value computed in distance
* \param opt unused
*
* \return distance value
*
*/
static T R_canberra(double * x, double * y , int nr_x, int nr_y, int nc,
int i1, int i2,
......@@ -120,15 +249,79 @@ template<class T> class distance_T
int i1, int i2,
int * flag, T_tri & opt);
/** \brief Spearman distance (rank base metric)
* \note Added by A. Lucas
*
* Spearman distance between 2 vectors a,b is
* \f[ d = \sum_i^n (rank(a_i) - rank(b_i)) ^ 2
* \f]
*
* If a NA found: return NA, flag is set to 0.
*
*
* This function compute distance between 2 vectors x[i1,] & y[i2,]
* x and y are matrix; we use here only line i1 from x and
* line i2 from y. Number of column (nc) is the same in x and y,
* number of column can differ (nr_x, nr_y).
*
*
* When call by function distance or hclust, x and y are the same; it computes
* distance between vector x[i1,] and x[i2,]
*
* \param x matrix of size nr_x * nc; line i1 is of interest
* \param y matrix of size nr_y * nc; line i1 is of interest
* \param nr_x number of row in matrix x
* \param nr_y number of row in matrix y
* \param nc number of column in matrix x or y
* \param i1 row choosen in matrix x
* \param i2 row choosen in matrix y
* \param flag set to 0 if NA value computed in distance
* \param opt a set of 6 vectors of size nc, allocated but uninitialised.
* aim of this parameter is to avoid several vector allocation
*
* \return distance value
*
*/
static T R_spearman(double * x, double * y , int nr_x, int nr_y, int nc,
int i1, int i2,
int * flag, T_tri & opt);
/** \brief Kendall distance (rank base metric)
* \note Added by A. Lucas
*
* Kendall distance between 2 vectors a,b is
* \f[ d = \sum_{i,j} K_{i,j}(x,y)
* \f]
*
* With \f$ K_{i,j}(x,y)\f$ is 0 if
* \f$ (x_i, x_j) \f$ in same order as \f$ ( y_i,y_j) \f$,
* 1 if not.
*
*
* If a NA found: return NA, flag is set to 0.
*
*
* This function compute distance between 2 vectors x[i1,] & y[i2,]
* x and y are matrix; we use here only line i1 from x and
* line i2 from y. Number of column (nc) is the same in x and y,
* number of column can differ (nr_x, nr_y).
*
*
* When call by function distance or hclust, x and y are the same; it computes
* distance between vector x[i1,] and x[i2,]
*
* \param x matrix of size nr_x * nc; line i1 is of interest
* \param y matrix of size nr_y * nc; line i1 is of interest
* \param nr_x number of row in matrix x
* \param nr_y number of row in matrix y
* \param nc number of column in matrix x or y
* \param i1 row choosen in matrix x
* \param i2 row choosen in matrix y
* \param flag set to 0 if NA value computed in distance
* \param opt a set of 6 vectors of size nc, allocated but uninitialised.
* aim of this parameter is to avoid several vector allocation
*
* \return distance value
*
*/
static T R_kendall(double * x, double * y , int nr_x, int nr_y, int nc,
int i1, int i2,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment