Commit 1ddd589d authored by Antoine Lucas's avatar Antoine Lucas
Browse files

Sat Sep 29 2007: 0.8

  * clustering possible with float precision; use of templates
  * suppression of duplicated code(used for no thread / multiple thread)
  * use of more safe parameter to thead function
  * correction on man page: only ascii characters
parent 34dd89a0
Sat Sep 29 2007: 0.8
* clustering possible with float precision; use of templates
* suppression of duplicated code(used for no thread / multiple thread)
* use of more safe parameter to thead function
* correction on man page: only ascii characters
Mon Jan 22 2007: 0.7-3
* dimnames(x)[[1]] <- NULL seems not to be allowed anymore.
-> changes in acp.R acprob.R
......
Package: amap
Version: 0.7-3
Date: 2007-01-21
Version: 0.8
Date: 2007-09-29
Suggests: Biobase
Title: Another Multidimensional Analysis Package
Author: Antoine Lucas
......
......@@ -5,9 +5,7 @@ afc Correspondance factorial analysis.
burt Compute burt table from a factor dataframe.
diss Compute a dissimilarity matrix
Dist Distance Matrix Computation
distpar Parallelized Distance Matrix Computation
hcluster Hierarchical Clustering
hclusterpar Parallelized Hierarchical Clustering
Kmeans K-Means Clustering
lubisch Dataset Lubischew
plot.acp Graphics for Principal component Analysis
......
useDynLib(amap)
export("Kmeans","acp","pca","K","W","WsansC","varrob",
"varrobsansC","acpgen","acprob","plot2","plotAll",
"plot.acp","print.acp","biplot.acp","pop","diss")
export("Kmeans","acp","pca","K","W","varrob",
"acpgen","acprob","plot2","plotAll",
"plot.acp","print.acp","biplot.acp","pop","diss")
S3method("print","acp")
S3method("plot","acp")
S3method("biplot","acp")
S3method("print","pop")
export("Dist","distpar","hcluster","hclusterpar")
export("Dist","hcluster","hclusterpar")
export("matlogic","burt","afc")
......@@ -27,6 +27,7 @@ acp <- function(x,center=TRUE,reduce=TRUE,wI=rep(1,nrow(x)),wV=rep(1,ncol(x)))
EIG <- eigen( (t(x)* wI) %*% (x * wV) ,symmetric=FALSE)
V <- EIG$vector # ou bien: V=svd(x)$v
EIG$values <- Re(EIG$values)
V <- V %*% diag(sign(EIG$values))
val <- sqrt(abs(EIG$values))
......
......@@ -17,19 +17,19 @@ matlogic <- function(x)
res <- as.integer(matrix(0,ncol=k,nrow=n))
x <- c(x,recursive=TRUE)
res <- .C("matind",
as.integer(nblev),
as.integer(x),
res=res,
n,
m,
as.integer(k),
PACKAGE="amap")
res <- matrix(res$res,ncol=k)
rownames(res) <- rownames
colnames(res) <- colnamesnew
res
result <- .C("matind",
as.integer(nblev),
as.integer(x),
res=res,
as.integer(n),
as.integer(m),
as.integer(k),
PACKAGE="amap")
result <- matrix(result$res,ncol=k)
rownames(result) <- rownames
colnames(result) <- colnamesnew
result
}
......
......@@ -10,7 +10,7 @@
hcluster <- function (x, method = "euclidean", diag = FALSE, upper = FALSE, link = "complete", members = NULL, nbproc = 2, doubleprecision = TRUE)
hclusterpar <- hcluster <- function (x, method = "euclidean", diag = FALSE, upper = FALSE, link = "complete", members = NULL, nbproc = 2, doubleprecision = TRUE)
{
if(class(x) == "exprSet")
......
......@@ -271,7 +271,7 @@ PACKAGE_VERSION=
PACKAGE_STRING=
PACKAGE_BUGREPORT=
ac_unique_file="src/distancepar.c"
ac_unique_file="src/distance.cpp"
ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT PKG_CPPFLAGS PKG_LIBS LIBOBJS LTLIBOBJS'
ac_subst_files=''
......
dnl Process this file with autoconf to produce a configure script.
AC_INIT(src/distancepar.c)
AC_INIT(src/distance.cpp)
dnl People often put stuff in /usr/local.
......
......@@ -43,7 +43,7 @@ heatmap(as.matrix(USArrests),
@
On a multiprocessor computer:
<<echo=TRUE>>=
h = hclusterpar(USArrests,nbproc=4)
h = hcluster(USArrests,nbproc=4)
@
The K-means clustering:
<<echo=TRUE,results=hide>>=
......
......@@ -47,7 +47,7 @@ print.acp(x, ...)
\references{
A. Carlier
Analyse des données Multidimensionnelles
Analyse des donn\'ees Multidimensionnelles
\url{http://www.lsp.ups-tlse.fr/Carlier/enseignement.html}
}
......
......@@ -110,7 +110,7 @@ class(p)<- "acp"
Caussinus, H and Ruiz-Gazen, A. (1993): \emph{Projection Pursuit and
Generalized Principal Component Analyses, in New Directions in
Statistical Data Analysis and Robustness} (eds. Morgenthaler et
al.), pp. 35-46. Birkuser Verlag Basel.
al.), pp. 35-46. Birk\"auser Verlag Basel.
Caussinus, H. and Ruiz-Gazen, A. (1995). \emph{Metrics for Finding Typical
Structures by Means of Principal Component Analysis. In Data Science
......
......@@ -49,7 +49,7 @@ divide each column by standard deviation}
Caussinus, H and Ruiz-Gazen, A. (1993): \emph{Projection Pursuit and
Generalized Principal Component Analyses, in New Directions in
Statistical Data Analysis and Robustness} (eds. Morgenthaler et
al.), pp. 35-46. Birkäuser Verlag Basel.
al.), pp. 35-46. Birk\"auser Verlag Basel.
Caussinus, H. and Ruiz-Gazen, A. (1995). \emph{Metrics for Finding Typical
Structures by Means of Principal Component Analysis. In Data Science
......
......@@ -42,7 +42,7 @@ matrix(c(1,1,1,1,1
diss(data)
##With weights
## With weights
diss(data,w=c(1,1,2,2,3))
}
......
\name{hcluster}
\title{Hierarchical Clustering}
\alias{hcluster}
\alias{hclusterpar}
\description{
Hierarchical cluster analysis.
}
......@@ -125,7 +126,7 @@ plot(hc1, labels = FALSE, hang = -1, main = "Re-start from 10 clusters")
par(opar)
## other combinaison are possible
## other combinaison are possible
hc <- hcluster(USArrests,method = "euc",link = "ward", nbproc= 1,
doubleprecision = TRUE)
......
......@@ -23,12 +23,12 @@ or from an Array of Signed Similarities.
Theory is explained at \url{http://petitjeanmichel.free.fr/itoweb.petitjean.class.html}
Marcotorchino F.
\emph{Agrgation des similarits en classification automatique.}
Thse de Doctorat d'Etat en Mathmatiques,
Universit Paris VI, 25 June 1981.
\emph{Agr\'egation des similarit\'es en classification automatique.}
Th\'ese de Doctorat d'Etat en Math\'ematiques,
Universit\'e Paris VI, 25 June 1981.
Petitjean M.
\emph{Agrgation des similarits: une solution oublie.}
\emph{Agr\'egation des similarit\'es: une solution oubli\'ee.}
RAIRO Oper. Res. 2002,36[1],101-108.
}
......@@ -51,7 +51,7 @@ matrix(c(1,1,1,1,1
pop(diss(data))
##pop from a dissimilarity matrix
## pop from a dissimilarity matrix
d <-2 * matrix(c(9, 8, 5, 7, 7, 2
, 8, 9, 2, 5, 1, 7
......
......@@ -41,7 +41,7 @@ A matrix
\references{
H. Caussinus, S. Hakam, A. Ruiz-Gazen
Projections rvlatrices contrles: groupements et structures
Projections r\'ev\'elatrices contr\^ol\'ees: groupements et structures
diverses.
2002, to appear in Rev. Statist. Appli.
}
......
......@@ -20,9 +20,9 @@
void R_distance(double *x, int *nr, int *nc, double *d, int *diag, int *method,int *nbprocess, int * ierr)
{
distance_T::distance<double>(x,nr,nc, d,diag,method,
nbprocess, ierr);
distance_T<double>::distance(x,nr,nc, d,diag,method,
nbprocess, ierr);
}
......
......@@ -48,14 +48,10 @@
#define MAX( A , B ) ( ( A ) > ( B ) ? ( A ) : ( B ) )
#define MIN( A , B ) ( ( A ) < ( B ) ? ( A ) : ( B ) )
#define __MINGW_H 1
namespace distance_T
{
/** \brief Distance euclidean (i.e. sqrt(sum of square) )
*/
template <class T> T R_euclidean(double *x, int nr, int nc, int i1, int i2,int * flag, void ** opt)
/** \brief Distance euclidean (i.e. sqrt(sum of square) )
*/
template<class T> T distance_T<T>::R_euclidean(double *x, int nr, int nc, int i1, int i2,int * flag, void ** opt)
{
T dev, dist;
int count, j;
......@@ -83,7 +79,7 @@ namespace distance_T
/** \brief Distance maximum (supremum norm)
*/
template <class T> T R_maximum(double *x, int nr, int nc, int i1, int i2, int * flag, void ** opt)
template<class T> T distance_T<T>::R_maximum(double *x, int nr, int nc, int i1, int i2, int * flag, void ** opt)
{
T dev, dist;
int count, j;
......@@ -111,7 +107,7 @@ namespace distance_T
/** \brief Distance manhattan
*/
template< class T> T R_manhattan(double *x, int nr, int nc, int i1, int i2, int * flag, void ** opt)
template<class T> T distance_T<T>::R_manhattan(double *x, int nr, int nc, int i1, int i2, int * flag, void ** opt)
{
T dist;
int count, j;
......@@ -137,7 +133,7 @@ namespace distance_T
/** \brief Distance canberra
*/
template <class T> T R_canberra(double *x, int nr, int nc, int i1, int i2, int * flag, void ** opt)
template<class T> T distance_T<T>::R_canberra(double *x, int nr, int nc, int i1, int i2, int * flag, void ** opt)
{
T dist, sum, diff;
int count, j;
......@@ -167,7 +163,7 @@ namespace distance_T
/** \brief Distance binary
*/
template <class T> T R_dist_binary(double *x, int nr, int nc, int i1, int i2,int * flag, void ** opt)
template<class T> T distance_T<T>::R_dist_binary(double *x, int nr, int nc, int i1, int i2,int * flag, void ** opt)
{
int total, count, dist;
int j;
......@@ -200,7 +196,7 @@ namespace distance_T
/** \brief Pearson / Pearson centered (correlation)
* \note Added by A. Lucas
*/
template<class T> T R_pearson(double *x, int nr, int nc, int i1, int i2,int * flag, void ** opt)
template<class T> T distance_T<T>::R_pearson(double *x, int nr, int nc, int i1, int i2,int * flag, void ** opt)
{
T num,sum1,sum2, dist;
int count,j;
......@@ -233,7 +229,7 @@ namespace distance_T
/** \brief Distance correlation (Uncentered Pearson)
* \note Added by A. Lucas
*/
template<class T> T R_correlation(double *x, int nr, int nc, int i1, int i2,int * flag, void ** opt)
template<class T> T distance_T<T>::R_correlation(double *x, int nr, int nc, int i1, int i2,int * flag, void ** opt)
{
T num,denum,sumx,sumy,sumxx,sumyy,sumxy;
int count,j;
......@@ -271,7 +267,7 @@ namespace distance_T
/** \brief Spearman distance (rank base metric)
* \note Added by A. Lucas
*/
template <class T> T R_spearman(double *x, int nr, int nc, int i1, int i2,int * flag, void ** opt)
template<class T> T distance_T<T>::R_spearman(double *x, int nr, int nc, int i1, int i2,int * flag, void ** opt)
{
int j;
double * data_tri;
......@@ -320,8 +316,6 @@ namespace distance_T
}
/**
* R_distance: compute parallelized distance. Function called direclty by R
* \brief compute distance and call function thread_dist
......@@ -335,16 +329,19 @@ namespace distance_T
* \param nbprocess: number of threads to create
* \param ierr error return; 1 good; 0 missing values
*/
template <class T > void distance(double *x, int *nr, int *nc, T *d, int *diag, int *method,int *nbprocess, int * ierr)
template<class T> void distance_T<T>::distance(double *x, int *nr,
int *nc, T *d, int *diag,
int *method,int *nbprocess,
int * ierr)
{
int dc;
short int * jobs;
int i;
void ** arguments;
dc = (*diag) ? 0 : 1; /* diag=1: we do the diagonal */
int i;
T_argument * arguments;
bool dc = (*diag) ? 0 : 1; /* diag=1: we do the diagonal */
/*
* Arguments sent to thread (adress):
* number of thread
......@@ -356,27 +353,23 @@ namespace distance_T
* *method
* *ierr
*/
/* numero_thread=0;*/
arguments = (void **) malloc (9 * (*nbprocess) * sizeof( void *));
jobs = (short int *) malloc ( *nbprocess * sizeof(short int));
arguments = (T_argument * ) malloc ((*nbprocess) * sizeof( T_argument ));
for(i=0; i< *nbprocess; i++){jobs[i]=i;}
//printf("nb processs %d\n",*nbprocess);
for(i=0; i< *nbprocess; ++i)
{
arguments[0 + 9*i] = jobs+i;
arguments[1 + 9*i] = nr ;
arguments[2 + 9*i] = nc ;
arguments[3 + 9*i] = &dc ;
arguments[4 + 9*i] = x ;
arguments[5 + 9*i] = d ;
arguments[6 + 9*i] = method ;
arguments[7 + 9*i] = nbprocess;
arguments[8 + 9*i] = ierr;
arguments[i].id =i;
arguments[i].x=x;
arguments[i].nr = nr;
arguments[i].nc = nc;
arguments[i].dc = dc;
arguments[i].d = d;
arguments[i].method = method;
arguments[i].nbprocess= *nbprocess;
arguments[i].ierr=ierr;
}
*ierr = 1; /* res = 1 => no missing values
res = 0 => missings values */
......@@ -387,7 +380,7 @@ namespace distance_T
for (i=0; i < *nbprocess ; i++)
{
pthread_create(th+i,0,thread_dist<T>,(void *)(arguments+(i*9)));
pthread_create(th+i,0,distance_T<T>::thread_dist,(void *)(arguments+i));
}
/* Attends la fin */
......@@ -400,13 +393,13 @@ namespace distance_T
#else
// p_thread not yet used on windows.
int nombre=1;
arguments[7 ] = &nombre;
thread_dist<T>((void *)arguments);
arguments[0].nbprocess = 1;
thread_dist((void *)arguments);
#endif
free( arguments );
free( jobs);
......@@ -417,11 +410,11 @@ namespace distance_T
/** thread_dist function that compute distance.
*
*/
template <class T> void* thread_dist(void* arguments)
template <class T> void* distance_T<T>::thread_dist(void* arguments_void)
{
int nbprocess,nr,nc,i,j,dc,ij;
void ** arguments2;
T_argument * arguments = static_cast<T_argument*>(arguments_void);
T * d;
double * x;
int * method;
......@@ -432,46 +425,43 @@ namespace distance_T
int * order_tri;
int * rank_tri;
T (*distfun)(double*, int, int, int, int, int *, void **) = NULL;
arguments2 = (void **) arguments;
short int no = * (short int *) arguments2[0];
nr = * (int *) arguments2[1];
nc = * (int *) arguments2[2];
dc = * (int *) arguments2[3];
x = (double *) arguments2[4];
d = (T *) arguments2[5];
method = (int *) arguments2[6];
nbprocess = * (int *) arguments2[7];
ierr = (int *) arguments2[8];
short int no = arguments[0].id;
nr = *arguments[0].nr;
nc = *arguments[0].nc;
dc = arguments[0].dc;
x = arguments[0].x;
d = arguments[0].d;
method = arguments[0].method;
nbprocess = arguments[0].nbprocess;
ierr = arguments[0].ierr;
switch(*method) {
case EUCLIDEAN:
distfun = R_euclidean<T>;
distfun = R_euclidean;
break;
case MAXIMUM:
distfun = R_maximum<T>;
distfun = R_maximum;
break;
case MANHATTAN:
distfun = R_manhattan<T>;
distfun = R_manhattan;
break;
case CANBERRA:
distfun = R_canberra<T>;
distfun = R_canberra;
break;
case BINARY:
distfun = R_dist_binary<T>;
distfun = R_dist_binary;
break;
case PEARSON:
distfun = R_pearson<T>;
distfun = R_pearson;
break;
case CORRELATION:
distfun = R_correlation<T>;
distfun = R_correlation;
break;
case SPEARMAN:
distfun = R_spearman<T>;
distfun = R_spearman;
opt = (void ** ) malloc ( 3 * sizeof(void*));
data_tri = (double * ) malloc (2* (nc) * sizeof(double));
order_tri = (int * ) malloc (2 * (nc) * sizeof(int));
......@@ -499,12 +489,13 @@ namespace distance_T
int debut = (int) floor( ((nr+1.)*nbprocess - sqrt( (nr+1.)*(nr+1.) * nbprocess * nbprocess - (nr+1.)*(nr+1.) * nbprocess * no ) )/nbprocess);
int fin = (int) floor(((nr+1.)*nbprocess - sqrt( (nr+1.)*(nr+1.) * nbprocess * nbprocess - (nr+1.)*(nr+1.) * nbprocess * (no+1.) ) )/nbprocess);
printf("Thread %d debut %d fin %d\n",no,debut,fin);
//printf("Thread %d debut %d fin %d\n",no,debut,fin);
// for(j = 0 ; j <= nr ; j++)
// here: the computation !
// for(j = 0 ; j <= nr ; j++)
for(j = debut ; j < fin ; j++)
{
ij = (2 * (nr-dc) - j +1) * (j) /2 ;
......@@ -527,7 +518,7 @@ namespace distance_T
}
}
......
......@@ -5,57 +5,78 @@
/* == 1,2,..., defined by order in the r function dist */
enum { EUCLIDEAN=1, MAXIMUM, MANHATTAN, CANBERRA, BINARY ,PEARSON, CORRELATION, SPEARMAN};
namespace distance_T
template<class T> class distance_T
{
template <class T> struct arguments
{
double i;
T j;
private:
struct T_argument
{
short int id;
double * x;
int * nr;
int * nc;
bool dc;
T * d;
int * method;
int nbprocess;
int * ierr;
};
// only static functions; no attributes
distance_T();
~distance_T();
public:
static void distance(double *x, int *nr, int *nc, T *d, int *diag,
int *method,int *nbprocess, int * ierr);
private:
static void* thread_dist(void* arguments);
/** \brief Distance euclidean (i.e. sqrt(sum of square) )
*/
template <class T> T R_euclidean(double * x, int nr, int nc, int i1, int i2,int * flag, void ** opt);
static T R_euclidean(double * x, int nr, int nc, int i1, int i2,int * flag, void ** opt);
/** \brief Distance maximum (supremum norm)
*/
template <class T> T R_maximum(double * x, int nr, int nc, int i1, int i2, int * flag, void ** opt);
static T R_maximum(double * x, int nr, int nc, int i1, int i2, int * flag, void ** opt);
/** \brief Distance manhattan
*/
template< class T> T R_manhattan(double * x, int nr, int nc, int i1, int i2, int * flag, void ** opt);
static T R_manhattan(double * x, int nr, int nc, int i1, int i2, int * flag, void ** opt);
/** \brief Distance canberra
*/
template <class T> T R_canberra(double * x, int nr, int nc, int i1, int i2, int * flag, void ** opt);
static T R_canberra(double * x, int nr, int nc, int i1, int i2, int * flag, void ** opt);
/** \brief Distance binary
*/
template <class T> T R_dist_binary(double * x, int nr, int nc, int i1, int i2,int * flag, void ** opt);
static T R_dist_binary(double * x, int nr, int nc, int i1, int i2,int * flag, void ** opt);
/** \brief Pearson / Pearson centered (correlation)
* \note Added by A. Lucas
*/
template<class T> T R_pearson(double * x, int nr, int nc, int i1, int i2,int * flag, void ** opt);
static T R_pearson(double * x, int nr, int nc, int i1, int i2,int * flag, void ** opt);
/** \brief Distance correlation (Uncentered Pearson)
* \note Added by A. Lucas
*/
template<class T> T R_correlation(double * x, int nr, int nc, int i1, int i2,int * flag, void ** opt);
static T R_correlation(double * x, int nr, int nc, int i1, int i2,int * flag, void ** opt);
/** \brief Spearman distance (rank base metric)
* \note Added by A. Lucas
*/
template <class T> T R_spearman(double * x, int nr, int nc, int i1, int i2,int * flag, void ** opt);
static T R_spearman(double * x, int nr, int nc, int i1, int i2,int * flag, void ** opt);
template <class T > void distance(double *x, int *nr, int *nc, T *d, int *diag, int *method,int *nbprocess, int * ierr);
template <class T> void* thread_dist(void* arguments);
}
};
#endif