实验报告聚类分析报告.docx
- 文档编号:27950452
- 上传时间:2023-07-06
- 格式:DOCX
- 页数:25
- 大小:1.03MB
实验报告聚类分析报告.docx
《实验报告聚类分析报告.docx》由会员分享,可在线阅读,更多相关《实验报告聚类分析报告.docx(25页珍藏版)》请在冰豆网上搜索。
实验报告聚类分析报告
实验报告聚类分析
实验原理:
K均值聚类、中心点聚类、系统聚类和EM算法聚类分析技术。
实验题目:
用鳶尾花的数据集,进行聚类挖掘分析。
实验要求:
探索直尾花数据的基本特征,利用不同的聚类挖掘方法,获得基本
结论并筒明解释。
实验题目一分析报告:
data(iris)
>rm(list=ls())
>gc()
used(Mb)gctrigger(Mb)maxused(Mb)
Neelis43173023.192971849.760759132.5
Vcells7876056.1838860864.0159240312.2
>
data(iris)
>
data<-iris
>
head(data)
Sepa1・LengthSepal.
WidthPetal.Length
Petal.Width
Species
1
5.1
3.5
1.4
0.2
setosa
2
4.9
3.0
1.4
0.2
setosa
3
4.7
3.2
1.3
0.2
setosa
4
4.6
3.1
1.5
0.2
setosa
5
5.0
3.6
1.4
0.2
setosa
6
5.4
3.9
1.7
0.4
setosa
ftKmean聚类分析
>newiris〈一iris
>newiris$Species<-NULL
>(kc〈-kmeans(newiris,3))
>
22222222222222222222
K-meansclusteringwith3clustersofsizes62,50.38
1
5.901613
2.748387
4.393548
1.433871
2
5.006000
3.428000
1.462000
0.246000
3
6.850000
3.073684
5.742105
2.071053
Clustermeans:
Sepal.LengthSepal.WidthPetal.LengthPetal.Width
Clusteringvector:
[1]22222222222222222
>table(iris$Species,kc$cluster)
1
23
setosa
0
500
versicolor
48
02
virginica
14
036
>plot(newiristc(,rSepa1.Length","Sepa1.Width")],col=kc$cluster)
>
points(kc$centers[tc(,rSepa1.Length",,fSepa1.Width")],col=1:
3,pch=8,cex=2)
SepalLength
#K-Mediods进行聚类分析
>instal1・packages(,fcluster,r)
>library(cluster)
>iris・pain〈一pam(iris.3)
#hc
>iris.he<-hclust(dist(iris[,1:
4]))
>plot(iris・hc.hang=T)
>plclust(iris・hc,labels=FALSE,hang=-1)
>re〈一rect.hclust(iris・hc.k=3)
>iris・id<-cutree(iris・hc,3)
>
[36]
virginica
virginica
virginica
virginica
virginica
virginica
virginica
[43]
virginica
virginica
virginica
virginica
virginica
virginica
virginica
[50]
virginica
virginica
virginica
virginica
virginica
virginica
virginica
[57]
virginica
virginica
virginica
virginica
virginica
virginica
virginica
⑹]
virginica
virginica
virginica
virginica
virginica
virginica
virginica
[71]
virginica
virginica
Levels:
setosa
versicolor
virginica
[[3]]
Levels:
setosaversicolorvirginica
>plot(iris・he)
>rect.hclust(iris.he,k=4,border=wlightgrey")#用浅灰色矩形框出4分类聚类结果
>rect.hclust(iris.hc,k=3fborder=,Fdarkgrey")#用浅灰色矩形框出3分类聚类结果
>rect・hclust(iris・he,k=7,which二c(2,6).border二"darkgrey")
ClusterDendrogram
distlinst1:
4Ji
#DBSCAN#基于密度的聚类
>instal1.packages(,rfpc,r)
>library(fpc)
>dsl=dbscan(iris[,1:
4],eps=l,MinPts=5)#半径参数为1,密度阈值为5
>dsl
dbscanPts=150MinPts=5eps=l
12
border01
seed5099
total50100
>ds2=dbscan(iris[,1:
4],eps=4,MinPts=5)
>ds3=dbscan(iris[,1:
4],eps=4,MinPts=2)
>ds4=dbscan(iris[,1:
4],eps=8tMinPts=2)
>
par(mfcol=c(2,2))
>plot(dsl,iris[,1:
4],main=w1:
>plot(ds3tiris[t1:
4]tmain=,r3:
>plot(ds2tiris[,1:
4]tmain=,r2:
>plot(ds4,iris[11:
4]tmain=,r4:
4:
MinPts=2eps=8
2.03.04.00.51.52.5
4.55.56.57.5
Petal.Width
>d=dist(iris[,l:
4])#计算数据集的距离矩阵d
>max(d);min(d)#计算数据集样本的距离的最值[1]7.085196
[1]0
>instal1.packages(ggp1o12,r)
>library(ggplot2)
>interval=cut_interval(dt30)
>table(interval)
interval
[0.0.236]
(0.236.0.472]
(0.472.0.709]
(0.709.0.945]
(0.945.1.18]
(1.18.1.42]
88
585
876
891
831
688
(1.42.1.65]
(1.65.1.89]
(1.89.2.13]
(2.13.2.36]
(2.36.2.6]
(2.6.2.83]
543
369
379
339
335
106
(2.83.3.07]
(3.07.3.31]
(3.31.3.54]
(3.54.3,78]
(3.78.4.01]
(4.01.4.25]
458
459
465
480
468
505
(4.25.4.49]
(4.49,4.72]
(4.72,4.96]
(4.96.5.2]
(5.2.5.43]
(5.43.5.67]
349
385
321
291
187
138
(5.67.5.9]
(5.9.6,14]
(6.14.6.38]
(6.38.6.61]
(6.61.6.85]
(6.85.7.09]
>which.max(table(interval))
(0.709,0.945]
>for(iin3:
5)
+{for(jin1:
10)
=iFMinPts=j)
+{ds=dbscan(iris[,1:
4],eps
+print(ds)
+}
+)
dbscanPts=150MinPts=leps=3
1
seed150
total150
dbscanPts=150MinPts=2eps=3
1
seed150
total150
dbscanPts=150MinPts=3eps=3
1
seed150
total150
dbscanPts=150MinPtsNeps=3
1
seed150
total150
dbscanPts=150MinPts=5eps=3
1
seed150
total150
dbscanPts=150
1
MinPts=6
eps=3
seed150
total150
dbscanPts=150MinPts=7eps=3
1
seed150
total150
dbscanPts=150MinPts=8eps=3
1
seed150total150
dbscanPts=150
1
MinPts=9eps=3
seed150
total150
dbscanPts=150MinPts=10eps=3
1
seed150
total150
dbscanPts=150MinPts=leps=4
1
seed150
total150
dbscanPts=150MinPts=2eps=4
1
seed150
total150
dbscanPts=150MinPts=3eps=4
1
seed150
total150
dbscanPts=150MinPts=4eps=4
1
seed150
total150
dbscanPts=150MinPts=5eps=4
1
seed150
total150
dbscanPts=150MinPts=6eps=4
1
seed150
total150
dbscanPts=150MinPts=7eps=4
1
seed150
total150
dbscanPts=150MinPts=8eps=4
1
seed150
total150
dbscanPts=150MinPts=9eps=4
1
seed150
total150
dbscanPts=150MinPts=10eps=4
1
seed150total150dbscanPts=150MinPts=leps=5
1
seed150
total150
dbscanPts=150MinPts=2eps=5
1
seed150
total150
dbscanPts=150MinPts=3eps=5
1
seed150
total150
dbscanPts=150MinPts=4eps=5
1
seed150
total150dbscanPts=150MinPts=5eps=5
1
seed150total150dbscanPts=150MinPts=6eps=5
1
seed150
total150
dbscanPts=150MinPts=7eps=5
1
seed150
total150
eps=5
eps=5
dbscanPts=150MinPts=8
1
seed150
total150
dbscanPts=150MinPts=9
1
seed150total150
dbscanPts=150MinPts=10eps=5
1
seed150total150#30次dbscan的聚类结果
>ds5二dbscan(iris[,1:
4],eps=3,MinPts=2)
>ds6=dbscan(iris[,1:
4],eps=4,MinPts=5)
>ds7=dbscan(iris[,1:
4],eps=5,MinPts=9)
>par(mfcol=c(1,3))
>plot(ds5,iris[,1:
4],main=wl:
MinPts=2eps=3'r)
>plot(ds6,iris[,1:
4],main=M3:
MinPts=5eps=4")
>plot(ds7tiris[,1:
4]tmain=M2:
MinPts二9eps二5")
2:
MinPts=9eps=5
#EM期望最大化聚类
>instal1.packages(,rmclust,r)
>1ibrary(mclust)
>fit_EM=Mclust(iris[,1:
4])
fitting・・・
_一=二二二二二二==二二二二====================|100%
>summary(fit_EM)
GaussianfinitemixturemodelfittedbyEMalgorithm
MclustVEV(ellipsoidal,equalshape)modelwith2components:
log.likelihoodndfBICICL
-215.72615026-561.7285-561.7289
Clusteringtable:
12
50100
>summary(fit_EM,parameters=TRUE)
GaussianfinitemixturemodelfittedbyEMalgorithm
MclustVEV(ellipsoidal,equalshape)modelwith2components:
log.likelihoodndfBICICL
-215.72615026-561.7285-561.7289
Clusteringtable:
12
50100
Mixingprobabilities:
12
0.33333190.6666681
Means:
[J]
[■2]
Sepa1.Length
5.00600226.261996
Sepal.Width
3.42800492.871999
Peta1.Length
1.46200074.
.905992
Petal.Width
0.24599981.
.675997
Variances:
[,,1]
Sopsl.LengthSepa1.WidthPeta1.Length
Petal.Width
Sepa1・Length
0.15065114
0.13080115
0.02084463
0.01309107
Sepal.Width
0.13080115
0.17604529
0.01603245
0.01221458
Peta1・Length
0.02084463
0.01603245
0.02808260
0.00601568
Petal.Width
0.01309107
0.01221458
0.00601568
0.01042365
[,,2]
Sepal.LengthSepal.WidthPeta1.Length
Petal.Width
Sepa1.Length
0.4000438
0.10865444
0.3994018
0.14368256
Sepal.Width
0.1086544
0.10928077
0.1238904
0.07284384
Peta1.Length
0.3994018
0.12389040
0.6109024
0.25738990
Petal.Width
0.1436826
0.07284384
0.2573899
0.16808182
>plot(fit_EM)#对EM聚类结果作图Model-basedclusteringplots:
1:
BIC
2:
classification
3:
uncertainty
4:
density
Selection:
(下面显示选项)
#选1
#选2
IIIII
□□g
□
□
cd
■
$轿
Sepal.Width
•£□
•a—「o—■un
uqnSBS-WrVn
□
□
Petal.Length
严
■
『魏吕
□cS
Petal.Width
■
圭
Sepal.Length
2.53.03.54.0
#选3
Sepal.Length
••1■
••••
•*.
.;.妙•・•g
援亠』
匕后T:
Sepal.Width
•・
直.•••・・
:
旷•强詡・「•
•・
:
.討“
•
•••
■■
Petal.Length
..誇:
:
!
・
—・・
•••
•:
.—:
••瀝:
•
••••
•••
••••••
•
••••.滋■
■••
••
••••・••
••••
■M
4:
Petal.Width
2.02.53.03.54.0
9
N
ID
x—
S
O
#选4
2.02.53.03.54.00.51.01.52.02.5
4.55.56.57.51234567
Selection:
0
>irisBIC=mclustBIC(iris[,1:
4])
fitting.・.
>irisBICsum=summary(iris_BIC,data=iris[T1:
4J)
>iris_BICsum#获取数1据藁iris在各模型和类别数下的BIC值BestBICvalues:
VEV.2VEV,3VW,2
BIC-561.7285-562.5522369-574.01783
BICdiff0.0000-0.8237748-12.28937
Classificationtableformodel(VEVt2):
50100>irisBIC
BayesianInformationCriterion(BIC):
5
-782.6441
-742.6083
-74L9185
-688.3463
-766.8158
-711.4502
-604.8131
6
-715.7136
-705.7811
-693.7908
-676.1697
-774.0673
-707.2901
-609.8543
7
-731.8821
-698.5413
-713.1823
-680.7377
-813.5220
-766.6500
-632.4947
8
-725.0805
-701.4806
-691.4133
-679.4640
-740.4068
-764.1969
-639.2640
9
-694.5205
-700.0276
-696.2607
-702.0143
-767.8044
-755.8290
-653.0878
EVE
VEE
WE
EEV
VEV
EVV
VW
1
-829.9782
-829.9782-829.9782-829.9782
-829.9782
-829.9782-829.9782
Top3modelsbasedontheBICcriterion:
VEV.2VEV.3VW.2-561.7285-562.5522-574.0178
>par(mfcol=c(l,1))
>plot(iris_BIC,G=l:
7.col二"ye1low")
Numberofcomponents
>mclust2Dplot(iris[,1:
2]F
+classification=iris_BICsum$classification,
+parameters=irisBICsum$parameters,col=,ryellow")
Sepal.Length
>irisDens=densityMclust(iris[T1:
2])#对每一个样本进行密度估计fitting・・・
>irisDens
'densityMclust'modelobject:
(VEV,2)
Availablecomponents:
>plot(iris_Dens,iris[,1:
2],col=,ryellow,r,nlevels=55)##输入1或2
Model-baseddensityestimationplots:
1:
BIC
2:
density
Selection:
(下面显示选项)
#选1
o
<£>—
2_
<0
123456
Numberofcomponents
#选2
o
4.55.05.56.06.57.07.58.0
Sepal.Length
Selection:
0
>plot(iris_Dens,type="perspjcol=grey(0.8)
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- 实验 报告 聚类分析