Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sylvain Jasson
amap
Commits
7b439bb3
Commit
7b439bb3
authored
Nov 13, 2008
by
Antoine Lucas
Browse files
add
parent
3e46b8d9
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/kmeans.cpp
0 → 100644
View file @
7b439bb3
/*! \file : kmeans.c
*
*
* \brief K-means clustering
*
* \date Created : before 2005
* \date Last Modified : Time-stamp: <2005-10-09 14:06:38 antoine>
*
* \author R core team. Modified by A. Lucas for distance choice.
*
* R : A Computer Language for Statistical Data Analysis
* Copyright (C) 2004 The R Development Core Team.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include
<R.h>
/*#include "modreg.h" */
/* for declarations for registration */
#include
"kmeans.h"
#include
"distance_T.h"
/** K-means clustering using Lloyd algorithm.
* \brief compute k-nearest centroid of our dataset.
* \param x matrix of size nxp: input data
* \param pn nb of individual (pn=n)
* \param pp number of observation by individual (pp=p)
* \param cen matrix of size k*p: centroids
* \param pk number of centroids (k)
* \param cl vector of flag of size n
* \param pmaxiter integer: maximum iteration
* \param nc vector of size k: number of individuals in cluster k.
* \param wss vector of size k: sum of square in each cluster.
* \param method: which method to use.
*/
void
kmeans_Lloyd2
(
double
*
x
,
int
*
pn
,
int
*
pp
,
double
*
cen
,
int
*
pk
,
int
*
cl
,
int
*
pmaxiter
,
int
*
nc
,
double
*
wss
,
int
*
method
)
{
/* n: nb of individuals
* k: nb of clusters
* p: number of abservations by individuals
* x: matrix of size nxp
* cen: matrix of size kxp
*/
int
n
=
*
pn
,
k
=
*
pk
,
p
=
*
pp
,
maxiter
=
*
pmaxiter
;
int
iter
,
i
,
j
,
c
,
it
,
inew
=
0
;
double
best
,
dd
;
Rboolean
updated
;
distance_T
<
double
>::
T_tri
opt
;
int
ierr
[
0
];
double
*
data_tri
;
int
*
order_tri
;
int
*
rank_tri
;
if
(
(
*
method
==
distance_T
<
double
>::
SPEARMAN
)
||
(
*
method
==
distance_T
<
double
>::
KENDALL
))
{
opt
.
data_tri_x
=
(
double
*
)
malloc
(
p
*
sizeof
(
double
));
opt
.
order_tri_x
=
(
int
*
)
malloc
(
p
*
sizeof
(
int
));
opt
.
rank_tri_x
=
(
int
*
)
malloc
(
p
*
sizeof
(
int
));
opt
.
data_tri_y
=
(
double
*
)
malloc
(
p
*
sizeof
(
double
));
opt
.
order_tri_y
=
(
int
*
)
malloc
(
p
*
sizeof
(
int
));
opt
.
rank_tri_y
=
(
int
*
)
malloc
(
p
*
sizeof
(
int
));
if
(
(
opt
.
data_tri_x
==
NULL
)
||
(
opt
.
order_tri_x
==
NULL
)
||
(
opt
.
rank_tri_x
==
NULL
)
||
(
opt
.
data_tri_y
==
NULL
)
||
(
opt
.
order_tri_y
==
NULL
)
||
(
opt
.
rank_tri_y
==
NULL
))
error
(
"distance(): unable to alloc memory"
);
}
for
(
i
=
0
;
i
<
n
;
i
++
)
cl
[
i
]
=
-
1
;
for
(
iter
=
0
;
iter
<
maxiter
;
iter
++
)
{
updated
=
FALSE
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
/* find nearest centre for each point */
best
=
R_PosInf
;
for
(
j
=
0
;
j
<
k
;
j
++
)
{
dd
=
distance_T
<
double
>::
distance_kms
(
x
,
cen
,
n
,
k
,
p
,
i
,
j
,
method
,
ierr
,
opt
);
/*printf("| %f",dd);
*/
if
(
dd
<
best
)
{
best
=
dd
;
inew
=
j
+
1
;
}
}
if
(
cl
[
i
]
!=
inew
)
{
updated
=
TRUE
;
cl
[
i
]
=
inew
;
}
}
if
(
!
updated
)
break
;
/* update each centre */
for
(
j
=
0
;
j
<
k
*
p
;
j
++
)
cen
[
j
]
=
0.0
;
for
(
j
=
0
;
j
<
k
;
j
++
)
nc
[
j
]
=
0
;
for
(
i
=
0
;
i
<
n
;
i
++
)
{
it
=
cl
[
i
]
-
1
;
nc
[
it
]
++
;
for
(
c
=
0
;
c
<
p
;
c
++
)
cen
[
it
+
c
*
k
]
+=
x
[
i
+
c
*
n
];
}
for
(
j
=
0
;
j
<
k
*
p
;
j
++
)
cen
[
j
]
/=
nc
[
j
%
k
];
}
*
pmaxiter
=
iter
+
1
;
/* for(j = 0; j < k; j++) wss[j] = 0.0; */
for
(
i
=
0
;
i
<
n
;
i
++
)
{
it
=
cl
[
i
]
-
1
;
wss
[
it
]
=
distance_T
<
double
>::
distance_kms
(
x
,
cen
,
n
,
k
,
p
,
i
,
it
,
method
,
ierr
,
opt
);
wss
[
it
]
=
wss
[
it
]
*
(
wss
[
it
])
;
/*
for(c = 0; c < p; c++) {
tmp = x[i+n*c] - cen[it+k*c];
wss[it] += tmp * tmp;
}*/
}
if
(
(
*
method
==
distance_T
<
double
>::
SPEARMAN
)
||
(
*
method
==
distance_T
<
double
>::
KENDALL
))
{
free
(
opt
.
data_tri_x
);
free
(
opt
.
rank_tri_x
);
free
(
opt
.
order_tri_x
);
free
(
opt
.
data_tri_y
);
free
(
opt
.
rank_tri_y
);
free
(
opt
.
order_tri_y
);
}
}
src/kmeans.h
0 → 100644
View file @
7b439bb3
#ifndef KMEANS_HEADER_AMAP
#define KMEANS_HEADER_AMAP 1
extern
"C"
{
void
kmeans_Lloyd2
(
double
*
x
,
int
*
pn
,
int
*
pp
,
double
*
cen
,
int
*
pk
,
int
*
cl
,
int
*
pmaxiter
,
int
*
nc
,
double
*
wss
,
int
*
method
);
}
#endif
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment