Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sylvain Jasson
amap
Commits
21e3d1e8
Commit
21e3d1e8
authored
Oct 01, 2007
by
Antoine Lucas
Browse files
add kendall distance
parent
ae1d7dea
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
R/Kmeans.R
View file @
21e3d1e8
...
...
@@ -23,7 +23,7 @@ function(x, centers, iter.max = 10, nstart = 1,
METHODS
<-
c
(
"euclidean"
,
"maximum"
,
"manhattan"
,
"canberra"
,
"binary"
,
"pearson"
,
"correlation"
)
"binary"
,
"pearson"
,
"correlation"
,
"spearman"
,
"kendall"
)
method
<-
pmatch
(
method
,
METHODS
)
if
(
is.na
(
method
))
stop
(
"invalid distance method"
)
...
...
R/dist.R
View file @
21e3d1e8
...
...
@@ -10,7 +10,7 @@ Dist <- function(x, method="euclidean", nbproc = 1, diag=FALSE, upper=FALSE)
method
<-
"euclidean"
METHODS
<-
c
(
"euclidean"
,
"maximum"
,
"manhattan"
,
"canberra"
,
"binary"
,
"pearson"
,
"correlation"
,
"spearman"
)
"binary"
,
"pearson"
,
"correlation"
,
"spearman"
,
"kendall"
)
method
<-
pmatch
(
method
,
METHODS
)
if
(
is.na
(
method
))
stop
(
"invalid distance method"
)
...
...
R/hcluster.R
View file @
21e3d1e8
...
...
@@ -20,7 +20,7 @@ hclusterpar <- hcluster <- function (x, method = "euclidean", diag = FALSE, uppe
if
(
!
is.na
(
pmatch
(
method
,
"euclidian"
)))
method
<-
"euclidean"
METHODS
<-
c
(
"euclidean"
,
"maximum"
,
"manhattan"
,
"canberra"
,
"binary"
,
"pearson"
,
"correlation"
,
"spearman"
)
"binary"
,
"pearson"
,
"correlation"
,
"spearman"
,
"kendall"
)
method
<-
pmatch
(
method
,
METHODS
)
if
(
is.na
(
method
))
stop
(
"invalid distance method"
)
...
...
man/Kmeans.Rd
View file @
21e3d1e8
...
...
@@ -28,8 +28,8 @@ Kmeans(x, centers, iter.max = 10, nstart = 1,
should be chosen?}
\item{method}{the distance measure to be used. This must be one of
\code{"euclidean"}, \code{"maximum"}, \code{"manhattan"},
\code{"canberra"}, \code{"binary"}, \code{"pearson"}
or
\code{"correlation"}.
\code{"canberra"}, \code{"binary"}, \code{"pearson"}
,
\code{"correlation"}
, \code{"spearman"} or \code{"kendall"}
.
Any unambiguous substring can be given.}
}
\details{
...
...
man/dist.Rd
View file @
21e3d1e8
...
...
@@ -13,7 +13,7 @@ Dist(x, method = "euclidean", nbproc = 1, diag = FALSE, upper = FALSE)
\item{method}{the distance measure to be used. This must be one of
\code{"euclidean"}, \code{"maximum"}, \code{"manhattan"},
\code{"canberra"}, \code{"binary"}, \code{"pearson"},
\code{"correlation"}
or
\code{"spearman"}.
\code{"correlation"}
,
\code{"spearman"}
or \code{"kendall"}
.
Any unambiguous substring can be given.}
\item{nbproc}{integer, Number of subprocess for parallelization}
\item{diag}{logical value indicating whether the diagonal of the
...
...
@@ -68,6 +68,12 @@ Dist(x, method = "euclidean", nbproc = 1, diag = FALSE, upper = FALSE)
\code{cor.test(x[i,],x[j,],method="spearman")$statistic}
}
\item{\code{kendall}:}{Compute a distance based on rank.
\eqn{\sum_{i,j} K_{i,j}(x,y)} with \eqn{K_{i,j}(x,y)}
is 0 if \eqn{x_i, x_j} in same order as \eqn{y_i,y_j},
1 if not.
}
}
Missing values are allowed, and are excluded from all computations
...
...
@@ -107,6 +113,10 @@ Dist(x, method = "euclidean", nbproc = 1, diag = FALSE, upper = FALSE)
\references{
Mardia, K. V., Kent, J. T. and Bibby, J. M. (1979)
\emph{Multivariate Analysis.} London: Academic Press.
Wikipedia
\url{http://en.wikipedia.org/wiki/Kendall_tau_distance}
}
\seealso{
\code{\link[cluster]{daisy}} in the \file{cluster} package with more
...
...
man/hcluster.Rd
View file @
21e3d1e8
...
...
@@ -19,7 +19,7 @@ hcluster(x, method = "euclidean", diag = FALSE, upper = FALSE,
\item{method}{the distance measure to be used. This must be one of
\code{"euclidean"}, \code{"maximum"}, \code{"manhattan"},
\code{"canberra"} \code{"binary"} \code{"pearson"},
\code{"correlation"}
or
\code{"spearman"}.
\code{"correlation"}
,
\code{"spearman"}
or \code{"kendall"}
.
Any unambiguous substring can be given.}
\item{diag}{logical value indicating whether the diagonal of the
distance matrix should be printed by \code{print.dist}.}
...
...
man/plot.acp.Rd
View file @
21e3d1e8
...
...
@@ -8,7 +8,7 @@
\description{Graphics for Principal component Analysis}
\usage{
plot.acp(x,i=1,j=2,text=TRUE,label='Composants',col='darkblue',
main='Individuals PCA',variables=TRUE,...)
main='Individuals PCA',variables=TRUE,
labels=NULL,
...)
biplot.acp(x,i=1,j=2,label='Composants',col='darkblue',length=0.1,
main='Variables PCA',circle=TRUE,...)
plot2(x,pourcent=FALSE,eigen=TRUE,label='Comp.',col='lightgrey',
...
...
@@ -26,6 +26,7 @@ values}
\item{eigen}{a logical value indicating whether we use eigen values or
standard deviation}
\item{label}{label for X and Y axis}
\item{labels}{labels naming individuals}
\item{col}{Color of plot}
\item{main}{Title of graphic}
\item{ylab}{Y label}
...
...
src/distance_T.cpp
View file @
21e3d1e8
This diff is collapsed.
Click to expand it.
src/distance_T.h
View file @
21e3d1e8
...
...
@@ -2,12 +2,27 @@
#ifndef _AMAP_DISTANCE_TEMPLATE
#define _AMAP_DISTANCE_TEMPLATE 1
/* == 1,2,..., defined by order in the r function dist */
enum
{
EUCLIDEAN
=
1
,
MAXIMUM
,
MANHATTAN
,
CANBERRA
,
BINARY
,
PEARSON
,
CORRELATION
,
SPEARMAN
};
template
<
class
T
>
class
distance_T
{
public:
/* == 1,2,..., defined by order in the r function dist */
enum
{
EUCLIDEAN
=
1
,
MAXIMUM
,
MANHATTAN
,
CANBERRA
,
BINARY
,
PEARSON
,
CORRELATION
,
SPEARMAN
,
KENDALL
};
struct
T_tri
{
double
*
data_tri_x
;
int
*
order_tri_x
;
int
*
rank_tri_x
;
double
*
data_tri_y
;
int
*
order_tri_y
;
int
*
rank_tri_y
;
};
private:
struct
T_argument
...
...
@@ -36,44 +51,88 @@ template<class T> class distance_T
static
void
distance
(
double
*
x
,
int
*
nr
,
int
*
nc
,
T
*
d
,
int
*
diag
,
int
*
method
,
int
*
nbprocess
,
int
*
ierr
);
/** \brief R_distance_kms: compute distance between individual i1 and
* centroid i2
*
* compute distance and call one of function R_euclidean or R_...
* This function is called by kmeans_Lloyd2
*
* \param x input matrix (individuals)
* \param y input matrix (centroids)
* \param nr1,nr2,nc number of row (nr1:x, nr2:y) and columns
* nr individuals with nc values.
* \param i1, i2: indice of individuals (individual i1, centroid i2)
* \param method 1, 2,... method used
* \param ierr for NA 0 if no value can be comuted due to NA
* \param opt optional parameter required for spearman
*/
static
T
distance_kms
(
double
*
x
,
double
*
y
,
int
nr1
,
int
nr2
,
int
nc
,
int
i1
,
int
i2
,
int
*
method
,
int
*
ierr
,
T_tri
&
opt
);
private:
static
void
*
thread_dist
(
void
*
arguments
);
/** \brief Distance euclidean (i.e. sqrt(sum of square) )
*/
static
T
R_euclidean
(
double
*
x
,
int
nr
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
void
**
opt
);
static
T
R_euclidean
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
);
/** \brief Distance maximum (supremum norm)
*/
static
T
R_maximum
(
double
*
x
,
int
nr
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
void
**
opt
);
static
T
R_maximum
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
);
/** \brief Distance manhattan
*/
static
T
R_manhattan
(
double
*
x
,
int
nr
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
void
**
opt
);
static
T
R_manhattan
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
);
/** \brief Distance canberra
*/
static
T
R_canberra
(
double
*
x
,
int
nr
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
void
**
opt
);
static
T
R_canberra
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
);
/** \brief Distance binary
*/
static
T
R_dist_binary
(
double
*
x
,
int
nr
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
void
**
opt
);
static
T
R_dist_binary
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
);
/** \brief Pearson / Pearson centered (correlation)
* \note Added by A. Lucas
*/
static
T
R_pearson
(
double
*
x
,
int
nr
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
void
**
opt
);
static
T
R_pearson
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
);
/** \brief Distance correlation (Uncentered Pearson)
* \note Added by A. Lucas
*/
static
T
R_correlation
(
double
*
x
,
int
nr
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
void
**
opt
);
static
T
R_correlation
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
);
/** \brief Spearman distance (rank base metric)
* \note Added by A. Lucas
*/
static
T
R_spearman
(
double
*
x
,
int
nr
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
void
**
opt
);
static
T
R_spearman
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
);
/** \brief Kendall distance (rank base metric)
* \note Added by A. Lucas
*/
static
T
R_kendall
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
);
};
...
...
src/distance_kms.c
deleted
100644 → 0
View file @
ae1d7dea
/*! \file distance_kms.c
* \brief all functions requiered for C function kmeans_Lloyd2.
*
* \date Created: 2005
* \date Last modified: Time-stamp: <2005-10-09 13:38:54 antoine>
*
* \author Adapted from distance.c (R core members) by Antoine Lucas
*
* R : A Computer Language for Statistical Data Analysis
* Copyright (C) 1995, 1996 Robert Gentleman and Ross Ihaka
* Copyright (C) 1998, 2001 Robert Gentleman, Ross Ihaka and the
* R Development Core Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifdef HAVE_CONFIG_H
#include
<config.h>
#endif
#include
<float.h>
#include
<R_ext/Arith.h>
#include
<R_ext/Error.h>
#include
"mva.h"
/** \brief Distance euclidean (i.e. sqrt(sum of square) )
*/
double
R_euclidean_kms
(
double
*
x
,
double
*
y
,
int
nr1
,
int
nr2
,
int
nc
,
int
i1
,
int
i2
)
{
double
dev
,
dist
;
int
count
,
j
;
count
=
0
;
dist
=
0
;
for
(
j
=
0
;
j
<
nc
;
j
++
)
{
if
(
R_FINITE
(
x
[
i1
])
&&
R_FINITE
(
y
[
i2
]))
{
dev
=
(
x
[
i1
]
-
y
[
i2
]);
dist
+=
dev
*
dev
;
count
++
;
}
i1
+=
nr1
;
i2
+=
nr2
;
}
if
(
count
==
0
)
return
NA_REAL
;
if
(
count
!=
nc
)
dist
/=
((
double
)
count
/
nc
);
return
sqrt
(
dist
);
}
/** \brief Distance maximum (supremum norm)
*/
double
R_maximum_kms
(
double
*
x
,
double
*
y
,
int
nr1
,
int
nr2
,
int
nc
,
int
i1
,
int
i2
)
{
double
dev
,
dist
;
int
count
,
j
;
count
=
0
;
dist
=
-
DBL_MAX
;
for
(
j
=
0
;
j
<
nc
;
j
++
)
{
if
(
R_FINITE
(
x
[
i1
])
&&
R_FINITE
(
y
[
i2
]))
{
dev
=
fabs
(
x
[
i1
]
-
y
[
i2
]);
if
(
dev
>
dist
)
dist
=
dev
;
count
++
;
}
i1
+=
nr1
;
i2
+=
nr2
;
}
if
(
count
==
0
)
return
NA_REAL
;
return
dist
;
}
/** \brief Distance manhattan
*/
double
R_manhattan_kms
(
double
*
x
,
double
*
y
,
int
nr1
,
int
nr2
,
int
nc
,
int
i1
,
int
i2
)
{
double
dist
;
int
count
,
j
;
count
=
0
;
dist
=
0
;
for
(
j
=
0
;
j
<
nc
;
j
++
)
{
if
(
R_FINITE
(
x
[
i1
])
&&
R_FINITE
(
y
[
i2
]))
{
dist
+=
fabs
(
x
[
i1
]
-
y
[
i2
]);
count
++
;
}
i1
+=
nr1
;
i2
+=
nr2
;
}
if
(
count
==
0
)
return
NA_REAL
;
if
(
count
!=
nc
)
dist
/=
((
double
)
count
/
nc
);
return
dist
;
}
/** \brief Distance canberra
*/
double
R_canberra_kms
(
double
*
x
,
double
*
y
,
int
nr1
,
int
nr2
,
int
nc
,
int
i1
,
int
i2
)
{
double
dist
,
sum
,
diff
;
int
count
,
j
;
count
=
0
;
dist
=
0
;
for
(
j
=
0
;
j
<
nc
;
j
++
)
{
if
(
R_FINITE
(
x
[
i1
])
&&
R_FINITE
(
y
[
i2
]))
{
sum
=
fabs
(
x
[
i1
]
+
y
[
i2
]);
diff
=
fabs
(
x
[
i1
]
-
y
[
i2
]);
if
(
sum
>
DBL_MIN
||
diff
>
DBL_MIN
)
{
dist
+=
diff
/
sum
;
count
++
;
}
}
i1
+=
nr1
;
i2
+=
nr2
;
}
if
(
count
==
0
)
return
NA_REAL
;
if
(
count
!=
nc
)
dist
/=
((
double
)
count
/
nc
);
return
dist
;
}
/** \brief Distance binary
*/
double
R_dist_binary_kms
(
double
*
x
,
double
*
y
,
int
nr1
,
int
nr2
,
int
nc
,
int
i1
,
int
i2
)
{
int
total
,
count
,
dist
;
int
j
;
total
=
0
;
count
=
0
;
dist
=
0
;
for
(
j
=
0
;
j
<
nc
;
j
++
)
{
if
(
R_FINITE
(
x
[
i1
])
&&
R_FINITE
(
y
[
i2
]))
{
if
(
x
[
i1
]
||
y
[
i2
]){
count
++
;
if
(
!
(
x
[
i1
]
&&
y
[
i2
])
)
dist
++
;
}
total
++
;
}
i1
+=
nr1
;
i2
+=
nr2
;
}
if
(
total
==
0
)
return
NA_REAL
;
if
(
count
==
0
)
return
0
;
return
(
double
)
dist
/
count
;
}
/** \brief Pearson / Pearson centered (correlation)
* \note Added by A. Lucas
*/
double
R_pearson_kms
(
double
*
x
,
double
*
y
,
int
nr1
,
int
nr2
,
int
nc
,
int
i1
,
int
i2
)
{
double
num
,
sum1
,
sum2
,
dist
;
int
count
,
j
;
count
=
0
;
num
=
0
;
sum1
=
0
;
sum2
=
0
;
for
(
j
=
0
;
j
<
nc
;
j
++
)
{
if
(
R_FINITE
(
x
[
i1
])
&&
R_FINITE
(
y
[
i2
]))
{
num
+=
(
x
[
i1
]
*
y
[
i2
]);
sum1
+=
(
x
[
i1
]
*
x
[
i1
]);
sum2
+=
(
y
[
i2
]
*
y
[
i2
]);
count
++
;
}
i1
+=
nr1
;
i2
+=
nr2
;
}
if
(
count
==
0
)
return
NA_REAL
;
dist
=
1
-
(
num
/
sqrt
(
sum1
*
sum2
)
);
return
dist
;
}
/** \brief Distance correlation (Uncentered Pearson)
* \note Added by A. Lucas
*/
double
R_correlation_kms
(
double
*
x
,
double
*
y
,
int
nr1
,
int
nr2
,
int
nc
,
int
i1
,
int
i2
)
{
double
num
,
denum
,
sumx
,
sumy
,
sumxx
,
sumyy
,
sumxy
;
int
count
,
j
;
count
=
0
;
sumx
=
0
;
sumy
=
0
;
sumxx
=
0
;
sumyy
=
0
;
sumxy
=
0
;
for
(
j
=
0
;
j
<
nc
;
j
++
)
{
if
(
R_FINITE
(
x
[
i1
])
&&
R_FINITE
(
y
[
i2
]))
{
sumxy
+=
(
x
[
i1
]
*
y
[
i2
]);
sumx
+=
x
[
i1
];
sumy
+=
y
[
i2
];
sumxx
+=
x
[
i1
]
*
x
[
i1
];
sumyy
+=
y
[
i2
]
*
y
[
i2
];
count
++
;
}
i1
+=
nr1
;
i2
+=
nr2
;
}
if
(
count
==
0
)
return
NA_REAL
;
num
=
sumxy
-
(
sumx
*
sumy
/
count
);
denum
=
sqrt
(
(
sumxx
-
(
sumx
*
sumx
/
count
)
)
*
(
sumyy
-
(
sumy
*
sumy
/
count
)
)
);
return
1
-
(
num
/
denum
);
}
enum
{
EUCLIDEAN
=
1
,
MAXIMUM
,
MANHATTAN
,
CANBERRA
,
BINARY
,
PEARSON
,
CORRELATION
};
/* == 1,2,..., defined by order in the R function dist */
/**
* R_distance_kms: compute distance between individual i1 and
* centroid i2
* \brief compute distance and call one of function R_euclidean or R_...
* \brief This function is called by kmeans_Lloyd2
* \param x input matrix (individuals)
* \param y input matrix (centroids)
* \param nr1,nr2,nc number of row (nr1:x, nr2:y) and columns
* nr individuals with nc values.
* \param i1, i2: indice of individuals (individual i1, centroid i2)
* \param method 1, 2,... method used
*/
double
R_distance_kms
(
double
*
x
,
double
*
y
,
int
nr1
,
int
nr2
,
int
nc
,
int
i1
,
int
i2
,
int
*
method
)
{
/*
* compute distance x[i1,*] - y[i2,*]
* x matrix n x p
* y matrix m x p
* nr1 = n; nr2 = m; nc =p
*/
double
res
;
double
(
*
distfun
)(
double
*
,
double
*
,
int
,
int
,
int
,
int
,
int
)
=
NULL
;
switch
(
*
method
)
{
case
EUCLIDEAN
:
distfun
=
R_euclidean_kms
;
break
;
case
MAXIMUM
:
distfun
=
R_maximum_kms
;
break
;
case
MANHATTAN
:
distfun
=
R_manhattan_kms
;
break
;
case
CANBERRA
:
distfun
=
R_canberra_kms
;
break
;
case
BINARY
:
distfun
=
R_dist_binary_kms
;
break
;
case
PEARSON
:
distfun
=
R_pearson_kms
;
break
;
case
CORRELATION
:
distfun
=
R_correlation_kms
;
break
;
default:
error
(
"distance(): invalid distance"
);
}
res
=
distfun
(
x
,
y
,
nr1
,
nr2
,
nc
,
i1
,
i2
);
return
(
res
);
}
src/hclust.cpp
View file @
21e3d1e8
...
...
@@ -44,7 +44,6 @@
#include
<stdlib.h>
#include
<math.h>
#include
"mva.h"
#include
"hclust.h"
#include
"distance.h"
#include
"hclust_T.h"
...
...
src/hclust_T.cpp
View file @
21e3d1e8
...
...
@@ -5,6 +5,7 @@
#include
"hclust_T.h"
#include
"distance_T.h"
#include
"hclust.h"
#include
<stdio.h>
namespace
hclust_T
...
...
src/pop.f
View file @
21e3d1e8
...
...
@@ -300,7 +300,7 @@ C
C
DOUBLE PRECISION
COUTS
(
N
*
N
)
,
,
BORNTH
,
Z0
,
Z
,
DELTAZ
,
ZSAVE
,
ZNEW
,
,
FLOAT
,
ABS
,
ABS
C
INTEGER
FMBVR
,
TRIABS
,
ALLSOL
C
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment