Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sylvain Jasson
amap
Commits
3e46b8d9
Commit
3e46b8d9
authored
Nov 13, 2008
by
Antoine Lucas
Browse files
new modif
parent
68568b65
Changes
9
Hide whitespace changes
Inline
Side-by-side
R/dist.R
View file @
3e46b8d9
Dist
<-
function
(
x
,
method
=
"euclidean"
,
nbproc
=
1
,
diag
=
FALSE
,
upper
=
FALSE
)
Dist
<-
function
(
x
,
method
=
"euclidean"
,
nbproc
=
2
,
diag
=
FALSE
,
upper
=
FALSE
)
{
if
(
class
(
x
)
==
"exprSet"
)
...
...
man/dist.Rd
View file @
3e46b8d9
\name{Dist}
\title{Distance Matrix Computation}
\usage{
Dist(x, method = "euclidean", nbproc =
1
, diag = FALSE, upper = FALSE)
Dist(x, method = "euclidean", nbproc =
2
, diag = FALSE, upper = FALSE)
}
\alias{Dist}
...
...
@@ -118,6 +118,8 @@ Dist(x, method = "euclidean", nbproc = 1, diag = FALSE, upper = FALSE)
\url{http://en.wikipedia.org/wiki/Kendall_tau_distance}
}
\note{Multi-thread (parallelisation) is disable on Windows.}
\seealso{
\code{\link[cluster]{daisy}} in the \file{cluster} package with more
possibilities in the case of \emph{mixed} (contiuous / categorical)
...
...
man/hcluster.Rd
View file @
3e46b8d9
...
...
@@ -31,7 +31,8 @@ hcluster(x, method = "euclidean", diag = FALSE, upper = FALSE,
\code{"average"}, \code{"mcquitty"}, \code{"median"} or
\code{"centroid"}.}
\item{members}{\code{NULL} or a vector with length size of \code{d}.}
\item{nbproc}{integer, number of subprocess for parallelization}
\item{nbproc}{integer, number of subprocess for parallelization [Linux
& Mac only]}
\item{doubleprecision}{True: use of double precision for distance
matrix computation; False: use simple precision}
...
...
@@ -87,6 +88,9 @@ hcluster(x, method = "euclidean", diag = FALSE, upper = FALSE,
For more details, see documentation of \code{hclust} and \code{dist}.
}
\note{Multi-thread (parallelisation) is disable on Windows.}
\author{
The \code{hcluster} function is based on C code adapted from Cran
Fortran routine
...
...
src/distance_T.cpp
View file @
3e46b8d9
...
...
@@ -2,7 +2,7 @@
* \brief all functions requiered for R dist function and C hcluster function.
*
* \date Created: probably in 1995
* \date Last modified: Time-stamp: <200
5
-10-0
9 13:12:06
antoine>
* \date Last modified: Time-stamp: <200
7
-10-0
3 20:05:41
antoine>
*
* \author R core members, and lately: Antoine Lucas
*
...
...
@@ -393,9 +393,38 @@ template<class T> T distance_T<T>::R_correlation(double * x, double * y , int n
return
1
-
(
num
/
denum
);
}
/** \brief Spearman distance (rank base metric)
* \note Added by A. Lucas
*/
// ---------------------------------------------------------
// Distance Spearman
//
// Spearman distance between 2 vectors a,b is
// d = sum_i (rank(a_i) - rank(b_i) )^2
//
// If one NA found: return NA
//
// This function compute distance between 2 vectors x[i1,] & y[i2,]
// x and y are matrix; we use here only line i1 from x and
// line i2 from y. Number of column (nc) is the same in x and y,
// number of column can differ (nr_x, nr_y).
//
// Flag will be set to 0 if NA value computed in distance
//
// When call by function distance or hclust, x and y are the same; it computes
// distance between vector x[i1,] and x[i2,]
//
// \param x matrix of size nr_x * nc; line i1 is of interest
// \param y matrix of size nr_y * nc; line i1 is of interest
// \param nr_x number of row in matrix x
// \param nr_y number of row in matrix y
// \param nc number of column in matrix x or y
// \param i1 row choosen in matrix x
// \param i2 row choosen in matrix y
// \param flag set to 0 if NA value computed in distance
// \param opt: a set of 6 vectors of size nc, allocated but uninitialised.
// aim of this parameter is to avoid several vector allocation
//
// Return: distance value
//
// ---------------------------------------------------------
template
<
class
T
>
T
distance_T
<
T
>::
R_spearman
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
)
...
...
@@ -503,11 +532,41 @@ template<class T> T distance_T<T>::R_kendall_corr(double * x, double * y , int
}
*/
/** \brief Kendall distance (rank base metric)
*
*
* \note Added by A. Lucas
*/
// ---------------------------------------------------------
// Distance Kendall
//
// Kendall distance between 2 vectors a,b is
// d = sum_i Kij (x,y)
//
// With Kij(x,y) is 0 if xi,xj in same order as yi,yj;
// 1 if not
//
// If one NA found: return NA
//
// This function compute distance between 2 vectors x[i1,] & y[i2,]
// x and y are matrix; we use here only line i1 from x and
// line i2 from y. Number of column (nc) is the same in x and y,
// number of column can differ (nr_x, nr_y).
//
// Flag will be set to 0 if NA value computed in distance
//
// When call by function distance or hclust, x and y are the same; it computes
// distance between vector x[i1,] and x[i2,]
//
// \param x matrix of size nr_x * nc; line i1 is of interest
// \param y matrix of size nr_y * nc; line i1 is of interest
// \param nr_x number of row in matrix x
// \param nr_y number of row in matrix y
// \param nc number of column in matrix x or y
// \param i1 row choosen in matrix x
// \param i2 row choosen in matrix y
// \param flag set to 0 if NA value computed in distance
// \param opt: a set of 6 vectors of size nc, allocated but uninitialised.
// aim of this parameter is to avoid several vector allocation
//
// Return: distance value
//
// ---------------------------------------------------------
template
<
class
T
>
T
distance_T
<
T
>::
R_kendall
(
double
*
x
,
double
*
y
,
int
nr_x
,
int
nr_y
,
int
nc
,
int
i1
,
int
i2
,
int
*
flag
,
T_tri
&
opt
)
...
...
@@ -562,19 +621,21 @@ template<class T> T distance_T<T>::R_kendall(double * x, double * y , int nr_x,
}
/**
* R_distance: compute parallelized distance. Function called direclty by R
* \brief compute distance and call function thread_dist
* that call one of function R_euclidean or R_...
* \param x input matrix
* \param nr,nc number of row and columns
* nr individuals with nc values.
* \param d distance half matrix.
* \param diag if we compute diagonal of dist matrix (usualy: no).
* \param method 1, 2,... method used
* \param nbprocess: number of threads to create
* \param ierr error return; 1 good; 0 missing values
*/
// ---------------------------------------------------------
//
// R_distance: compute parallelized distance. Function called direclty by R
// \brief compute distance and call function thread_dist
// that call one of function R_euclidean or R_...
// \param x input matrix
// \param nr,nc number of row and columns
// nr individuals with nc values.
// \param d distance half matrix.
// \param diag if we compute diagonal of dist matrix (usualy: no).
// \param method 1, 2,... method used
// \param nbprocess: number of threads to create
// \param ierr error return; 1 good; 0 missing values
//
// ---------------------------------------------------------
template
<
class
T
>
void
distance_T
<
T
>::
distance
(
double
*
x
,
int
*
nr
,
int
*
nc
,
T
*
d
,
int
*
diag
,
int
*
method
,
int
*
nbprocess
,
...
...
@@ -777,24 +838,24 @@ template <class T> void* distance_T<T>::thread_dist(void* arguments_void)
/
**
* R_distance_kms: compute distance between individual i1 and
* centroid i2
* \brief compute distance and call one of function R_euclidean or R_...
* \brief This function is called by kmeans_Lloyd2
*
\param
x
input matrix (
individual
s)
*
\param
y input matrix (centroids)
* \param nr1,nr2,nc number of row (nr1:x, nr2:y) and columns
* nr individuals with nc values.
*
\param
i1, i2: indice of individuals (individual i1, centroid i2)
*
\param
method 1, 2,... method used
*
\param
ierr for NA 0 if no value can be comuted due to NA
* \param opt optional parameter send to spearman dist.
*/
// ---------------------------------------------------------
//
// R_distance_kms: compute distance between individual i1 and
// centroid i2
/
/
// compute distance and call one of function R_euclidean or R_...
// This function is called by kmeans_Lloyd2
//
// \param x input matrix (individuals)
//
\param
y
input matrix (
centroid
s)
//
\param
nr1,nr2,nc number of row (nr1:x, nr2:y) and columns
// nr individuals with nc values.
// \param i1, i2: indice of individuals (individual i1, centroid i2)
//
\param
method 1, 2,... method used
//
\param
ierr for NA 0 if no value can be comuted due to NA
//
\param
opt optional parameter send to spearman dist.
//
// ---------------------------------------------------------
template
<
class
T
>
T
distance_T
<
T
>::
distance_kms
(
double
*
x
,
double
*
y
,
int
nr1
,
int
nr2
,
int
nc
,
int
i1
,
int
i2
,
int
*
method
,
int
*
ierr
,
T_tri
&
opt
)
{
...
...
@@ -809,7 +870,7 @@ template <class T> T distance_T<T>::distance_kms(double *x,double *y, int nr1,in
T
(
*
distfun
)(
double
*
,
double
*
,
int
,
int
,
int
,
int
,
int
,
int
*
,
T_tri
&
)
=
NULL
;
// choice of distance
switch
(
*
method
)
{
case
EUCLIDEAN
:
distfun
=
R_euclidean
;
...
...
@@ -842,7 +903,8 @@ template <class T> T distance_T<T>::distance_kms(double *x,double *y, int nr1,in
default:
error
(
"distance(): invalid distance"
);
}
// here: distance computation
res
=
distfun
(
x
,
y
,
nr1
,
nr2
,
nc
,
i1
,
i2
,
ierr
,
opt
);
return
(
res
);
}
src/distance_T.h
View file @
3e46b8d9
...
...
@@ -50,6 +50,20 @@ template<class T> class distance_T
public:
/** \brief R_distance compute parallelized distance.
*
* compute distance and call function thread_dist
* that call one of function R_euclidean or R_...
*
* \param x input matrix
* \param nr,nc number of row and columns
* nr individuals with nc values.
* \param d distance half matrix.
* \param diag if we compute diagonal of dist matrix (usualy: no).
* \param method 1, 2,... method used (correspond to the enum)
* \param nbprocess: number of threads to create
* \param ierr error return; 1 good; 0 missing values
*/
static
void
distance
(
double
*
x
,
int
*
nr
,
int
*
nc
,
T
*
d
,
int
*
diag
,
int
*
method
,
int
*
nbprocess
,
int
*
ierr
);
...
...
@@ -65,7 +79,7 @@ template<class T> class distance_T
* \param nr1,nr2,nc number of row (nr1:x, nr2:y) and columns
* nr individuals with nc values.
* \param i1, i2: indice of individuals (individual i1, centroid i2)
* \param method 1, 2,... method used
* \param method 1, 2,... method used
(correspond to the enum)
* \param ierr for NA 0 if no value can be comuted due to NA
* \param opt optional parameter required for spearman
*/
...
...
src/hclust.cpp
View file @
3e46b8d9
...
...
@@ -3,7 +3,7 @@
* \brief Hierarchical Clustering.
*
* \date Created : 14/11/02
* \date Last Modified : Time-stamp: <200
5
-10-0
9 14:4
3:1
4
antoine>
* \date Last Modified : Time-stamp: <200
7
-10-0
3 20:3
3:1
1
antoine>
*
* \author F. Murtagh, ESA/ESO/STECF, Garching, February 1986.
...
...
@@ -122,20 +122,6 @@ void hcluster(double *x, int *nr, int *nc, int *diag, int *method, int *iopt ,in
/** Return indice
* \brief The upper half diagonal distance matrix is stored as a vector...
* so distance between individual i and j is stored at postion ioffst(i,j)
* \param n number of individuals (distance matrix is nxn)
* \param i,j: indices in matrix
*/
int
ioffst
(
int
n
,
int
i
,
int
j
)
/* Map row I and column J of upper half diagonal symmetric matrix
* onto vector. i < j
*/
{
return
j
+
i
*
n
-
(
i
+
1
)
*
(
i
+
2
)
/
2
;
}
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* */
...
...
@@ -162,18 +148,7 @@ int ioffst(int n,int i,int j)
/* */
/*-------------------------------------------------------------*/
/** Hierachical clustering subroutine
* \brief compute hierachical clustering from a distance matrix
* This routine is called by hclust
* \param n number of individuals
* \param ia, ib result (merge)
* \param iia, iib result (merge)
* \param iorder result (order)
*
* \note this is an adaptation of the fortran function designed from the
* R core team.
*/
void
hcass2
(
int
*
n
,
int
*
ia
,
int
*
ib
,
int
*
iorder
,
int
*
iia
,
int
*
iib
)
void
hierclust
::
hcass2
(
int
*
n
,
int
*
ia
,
int
*
ib
,
int
*
iorder
,
int
*
iia
,
int
*
iib
)
{
int
i
,
j
,
k
,
k1
,
k2
,
loc
;
...
...
src/hclust.h
View file @
3e46b8d9
...
...
@@ -6,8 +6,20 @@ extern "C"
void
hcluster
(
double
*
x
,
int
*
nr
,
int
*
nc
,
int
*
diag
,
int
*
method
,
int
*
iopt
,
int
*
ia
,
int
*
ib
,
int
*
iorder
,
double
*
crit
,
double
*
membr
,
int
*
nbprocess
,
int
*
precision
,
int
*
result
);
int
ioffst
(
int
n
,
int
i
,
int
j
);
};
namespace
hierclust
{
/** Hierachical clustering subroutine
* \brief compute hierachical clustering from a distance matrix
* This routine is called by hclust
* \param n number of individuals
* \param ia, ib result (merge)
* \param iia, iib result (merge)
* \param iorder result (order)
*
* \note this is an adaptation of the fortran function designed from the
* R core team.
*/
void
hcass2
(
int
*
n
,
int
*
ia
,
int
*
ib
,
int
*
iorder
,
int
*
iia
,
int
*
iib
);
}
}
;
src/hclust_T.cpp
View file @
3e46b8d9
...
...
@@ -296,7 +296,7 @@ namespace hclust_T
hcass2
(
n
,
ia
,
ib
,
iorder
,
iia
,
iib
);
hierclust
::
hcass2
(
n
,
ia
,
ib
,
iorder
,
iia
,
iib
);
/*
...
...
src/hclust_T.h
View file @
3e46b8d9
...
...
@@ -17,6 +17,19 @@ namespace hclust_T
double
*
membr
,
T
*
diss
,
int
*
result
);
/** \brief Return indice
*
* The upper half diagonal distance matrix is stored as a vector...
* so distance between individual i and j is stored at postion ioffst(i,j)
*
* \param n number of individuals (distance matrix is nxn)
* \param i,j: indices in matrix
*/
inline
int
ioffst
(
int
n
,
int
i
,
int
j
)
{
return
j
+
i
*
n
-
(
i
+
1
)
*
(
i
+
2
)
/
2
;
}
}
#endif
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment