SAS程序汇总.docx
- 文档编号:6687576
- 上传时间:2023-01-09
- 格式:DOCX
- 页数:42
- 大小:234.01KB
SAS程序汇总.docx
《SAS程序汇总.docx》由会员分享,可在线阅读,更多相关《SAS程序汇总.docx(42页珍藏版)》请在冰豆网上搜索。
SAS程序汇总
SAS基础
创建数据集
其他软件产生的标准格式文件与SAS数据集之间的互相转换。
libnamec"F:
\郑亚_32620151151279";
Procimportdatafile='E:
\data1_1.xls'dbms=excelreplaceout=c.data1_1_32620151151279;
/*sheet=”sheet1$”*/
Run;
从外部文件(文本文件)读取数据
DATAb;
INFILE'E:
\SYS.TXT';
INPUTIDGENDER$AGEBLOOD$SURT;
RUN;
DATAA;
INPUTIDGENDER$AGEBLOOD$SURT;
/*ID等为变量名,$表示其前面的那个变量为字符型变量*/
DATALINES;/*引出数据行*/
1M41A368
2M26B745
3F35B401
4M47AB552
5F37A478
6F39O628
7M28O549
8M31B128
9M43AB463
10M29A512
;
ODSHTMLFILE='E:
\RESULT.xls';
/*将以下SAS输出结果存放到EXCEL文件中*/
PROCPRINTDATA=A;
RUN;
ODSHTMLCLOSE;/*关闭对外输出系统*/
PROCFORMAT;
VALUEGE1='M'2='F';/*定义值标签*/
VALUEBL1='A'2='B'3='AB'4='O';
DATAA;
INPUTIDGENDERAGEBLOODSURT;
DATALINES;
11411368
21262745
32352401
41473552
52371478
62394628
71284549
81312128
91433463
101291512
;
RUN;
ODSHTMLFILE='E:
\RESULT.xls';
PROCPRINTDATA=A;
FORMATGENDERGE.;/*借用值标签*/
FORMATBLOODBL.;
RUN;
ODSHTMLCLOSE;
排序
Procsortdata=aout=b;byage;
Run;
Procsortdata=aout=c;bydescendingage;
Run;
计算产生新变量
DATAB;SETA;
打开数据集B,并从数据集A读入数据。
DATAD;
FORMATSURT_Y5.3;
SETA;
SURT_Y=SURT/365;
IFAGE>40THENAGROUP=1;
ELSEAGROUP=2;
RUN;
数据集的拆分
DATAA;
INPUTIDGENDER$AGEBLOOD$SURT;
DATALINES;/*引出数据行*/
1M41A368
2M26B745
3F35B401
4M47AB552
5F37A478
6F39O628
7M28O549
8M31B128
9M43AB463
10M29A512
;DATABC;
SETA;
IFGENDER='F'THENOUTPUTB;
ELSEOUTPUTC;
RUN;DATAB(DROP=GENDER)C(DROP=BLOOD);
SETA;
RUN;
数据集的合并
纵向连接
datast1;
inputxy@@;/*@@表示连续读入数据*/
cards;
120240;
procprint;
datast2;
inputxy@@;
cards;
3804160;
procprint;
datast;
setst1st2;
procprint;
run;
横向合并
dataa;
inputnumx@@;
cards;
101121021410315
;
procsortout=aa;
bynum;
datab;
inputnumy@@;
cards;
101100102134103145
;
procsortout=bb;
bynum;
datac;
mergeaabb;
bynum;
procprint;
run;
计量资料的描述
计算几何均数
dataex2_5;
inputxf@@;
y=log10(x);
cards;
104
203
4010
8010
16011
32015
64014
12802
;
procmeansnoprint;
vary;
freqf;
outputout=bmean=logmean;
run;
datac;
setb;
g=10**logmean;
procprintdata=c;
varg;
run;
制作频数表
dataex2_1;
inputx@@;
low=3.07;
dis=0.2;
z=x-mod(x-low,dis);
cards;
3.964.234.423.595.124.024.323.724.764.164.614.26
3.774.204.363.074.893.974.283.644.664.044.554.25
4.633.914.413.525.034.014.304.194.754.144.574.26
4.563.793.894.214.953.984.293.674.694.124.564.26
4.664.283.834.205.244.024.333.764.814.173.963.27
4.614.263.964.233.764.014.293.673.394.124.273.61
4.984.243.834.203.714.034.344.693.624.184.264.36
5.284.214.424.363.664.024.314.833.593.973.964.49
5.114.204.364.543.723.974.284.763.214.044.564.25
4.924.234.473.605.234.024.324.684.763.694.614.26
3.894.214.363.425.014.014.293.684.714.134.574.26
4.035.464.163.644.163.76
;
procfreq;
tablesz;
run;
单变量描述
/*用MEANS过程计算例数、均数、标准差、标准误*/
procmeansdata=ex2_1;
varx;
run;
/*用MEANS过程计算例数、均数、标准差、标准误和95%可信区间*/
procmeansdata=ex2_1nmeanstdstderrcvclm;
varx;
run;
/*用MEANS过程对频数表计算例数、均数、标准差、最小值和最大值*/
dataprg4_4;
inputxf@@;
cards;
3.172
3.373
3.579
3.7714
3.9722
4.1730
4.3721
4.5715
4.7710
4.976
5.174
5.372
;
procmeans;
freqf;
varx;
run;
procmeansmaxdec=2data=prg4_4;
freqf;
varx;
run;
多变量描述
/*用UNIVARIATE过程进行描述性统计*/
procunivariatedata=ex2_1;
freqf;
varx;
run;
/*计算2.5%和97.5%分位数*/
procunivariatedata=ex2_1;
freqf;
varx;
outputout=pctpctlpre=ppctlpts=2.597.5;
run;
procprintdata=pct;
run;
/*作正态性检验、茎叶图、箱式图和正态概率图*/
procunivariatedata=ex2_1
Normal
plot;
varx;
run;
两样本均数的比较
单一总体均数的可信区间
dataprg5_1;
n=10;
mean=166.95;
std=3.64;
t=tinv(0.975,n-1);
in=t*std/sqrt(n);
lclm=mean-in;
uclm=mean+in;
procprint;
varlclmuclm;
run;
两总体均数相差的可信区间
dataprg5_2;
n1=29;
n2=32;
m1=20.10;
m2=16.89;
s1=7.02;
s2=8.46;
ss1=s1**2*(n1-1);
ss2=s2**2*(n2-1);
sc2=(ss1+ss2)/(n1+n2-2);
se=sqrt(sc2*(1/n1+1/n2));
t=tinv(0.975,n1+n2-2);
lclm=(m1-m2)-t*se;
uclm=(m1-m2)+t*se;
procprint;
vartselclmuclm;
run;
单样本均数的t检验
原始数据已知-ttest
dataprg5_1;
inputx@@;
datalines;
;
Run;
Procttesth0=140;
Varx
Run;
原始数据未知
某医生测量了36名从事铅作业男性工人的血红蛋白含量,算得其均数为130.83g/L,标准差为25.74g/L。
问从事铅作业工人的血红蛋白是否不同于正常成年男性平均值140g/L?
dataprg5_3;
n=36;
s_m=130.83;
std=25.74;
p_m=140;
df=n-1;
t=abs(s_m-p_m)/(std/sqrt(n));
p=(1-probt(t,df))*2;
procprint;
vartp;
run;
配对资料两样本均数比较的t检验
dataprg5_2;
n1=29;
n2=32;
m1=20.10;
m2=16.89;
s1=7.02;
s2=8.46;
ss1=s1**2*(n1-1);
ss2=s2**2*(n2-1);
sc2=(ss1+ss2)/(n1+n2-2);
se=sqrt(sc2*(1/n1+1/n2));
t=tinv(0.975,n1+n2-2);
lclm=(m1-m2)-t*se;
uclm=(m1-m2)+t*se;
procprint;
vartselclmuclm;
run;
dataprg5_3;
n=36;
s_m=130.83;
std=25.74;
p_m=140;
df=n-1;
t=abs(s_m-p_m)/(std/sqrt(n));
p=(1-probt(t,df))*2;
procprint;
vartp;
run;
dataprg5_4;
inputx@@;
datalines;
1717913578118175122105111140
138132142140168113145128124
134116129155135134136113119132
;
procttesth0=140;
varx;
run;
MEANS
/*用MEANS作配对资料两个样本均数比较的t检验*/
dataprg5_5;
inputx1x2@@;
d=x1-x2;
cards;
0.8400.580
0.5910.509
0.6740.500
0.6320.316
0.6870.337
0.9780.517
0.7500.454
0.7300.512
1.2000.997
0.8700.506
;
procmeansnmeanstdstderrtprt;
vard;
run;
UNIVARIATE
/*用UNIVARIATE过程作配对资料两样本均数比较的t检验*/
procunivariatedata=prg5_5;
vard;
run;
TTest
/*用TTest过程作配对资料两样本均数比较的t检验*/
procttestdata=prg5_5;
vard;
run;
成组资料两样本均数比较的t检验
dataprg5_6;
inputxc@@;
datalines;
-0.71-5.61212.810.71
3.51415.817.11-0.51
2.51-1.611.71310.41
4.514.612.5161-1.41
3.726.52525.220.82
0.220.623.426.62-1.12
623.82221.6222
2.221.223.121.72-22
;
procttest;
varx;
classc;
run;
dataprg5_7;
inputx@@;
if_n_<21thenc=1;
elsec=2;
cards;
-0.70-5.602.002.800.703.504.005.807.10-0.50
2.50-1.601.703.000.404.504.602.506.00-1.40
3.706.505.005.200.800.200.603.406.60-1.10
6.003.802.001.602.002.201.203.101.70-2.00
;
procttest;
varx;
classc;
run;
两样本均数的等效性检验
dataprg5_8;
n1=102;
n2=100;
m1=0.87;
m2=0.48;
s1=0.89;
s2=0.82;
delta=0.52;
ss1=s1**2*(n1-1);
ss2=s2**2*(n2-1);
sc2=(ss1+ss2)/(n1+n2-2);
se=sqrt(sc2*(1/n1+1/n2));
t=(m1-m2-delta)/se;
p=probt(t,n1+n2-2)*2;
procprint;
vartp;
run;
完全随机设计资料的方差分析
/*完全随机设计资料的方差分析*/
datapro_251;
doc=1to3;
doi=1to12;
inputx@@;
output;
end;
end;
datalines;
332.96297.64312.57295.47284.25307.97292.12244.61261.46286.46322.49282.42
253.21235.87269.30258.90254.39200.87227.79237.05216.85238.03238.19243.49
232.55217.71216.15220.72219.46247.47280.75196.01208.24198.41240.35219.56
;
run;
odshtmlfile='G:
\新建文件夹\result.xls';
procanova;
classc;
modelx=c;
meansc;
meansc/lsdbonhovtest;
run;
odshtmlclose;
/*随机区组设计资料的方差分析*/
dataproc_2541;
doa=1to10;
dob=1to3;
inputx@@;
output;
end;
end;
datalines;
2.212.914.25
2.322.644.56
3.153.674.33
1.863.293.89
2.562.453.78
1.982.744.62
2.373.154.71
2.883.443.56
3.052.613.77
3.422.864.23
;
run;
odshtmlfile='G:
\新建文件夹\result2.xls';
procanova;
classab;
modelx=ab;
meansb/dunnett('3');/*meansb/snk*/
run;
odshtmlclose;
相关
绘制散点图
libnamec"E:
\20121225\教学\2014-2015第一学期\研究生\SAS应用\二、相关与回归分析";/*定义关联库*/
datac.t1;
labelx='Height(cm)'
y='Weight(Kg)';
inputxy@@;
datalines;
149.430.8167.642.6146.333.1170.744.0
161.536.3164.640.8155.532.7158.535.4
149.433.1152.431.8
;
/*建立数据集并输入数据*/
/*filenameoutput"E:
\20121225\教学\2014-2015第一学期\研究生\SAS应用\二、相关与回归分析\t1scatter.jpeg";*/
Goptionshtext=0.25inctext=blackdevice=gif;
/*************************************/
/*定义绘图区格式*/
/*'htext=0.25in'定义字体大小0.25inche*/
/*'ctext=black'定义字体颜色*/
/*************************************/
AXIS1label=(h=0.25inf=zapfbij=r"HEIGHT(cm)")offset=(0.2in)order=(145to175by10);
AXIS2label=(a=90h=0.25inf=zapfbij=l"WEIGHT(Kg)")order=(30to45by5)offset=(0.2in);
/*************************************/
/*定义坐标轴的格式*/
/*'h=0.25in'定义坐标轴标目的字体大小0.25inche*/
/*'f=zapfbi'定义坐标轴标目的字体zapfbi*/
/*'j=r'定义坐标轴标目靠右对齐*/
/*'HEIGHT(cm)'定义坐标轴标目为'HEIGHT(cm)'*/
/*'offset=0.2in'定义坐标轴起点处空0.2inche*/
/*'order=(145to175by10)'定义坐标轴刻度*/
/*************************************/
Odshtmlpath="d:
\"body="散点图.xls";
Procgplotdata=c.t1;
Ploty*x/HAXIS=AXIS1VAXIS=AXIS2noframe;
Symbolh=0.25inc=black;
run;
/*************************************/
/*调用gplot过程绘制散点图*/
/*'plot'定义变量*/
/*'Haxis'定义横坐标轴的格式*/
/*'Vaxis'定义纵坐标轴的格式*/
/*'noframe'定义散点图无框*/
/*'SYMBOL'定义数据点的格式*/
/*'h=0.25in'定义数据点的大小为0.25inche*/
/*'c=black'定义数据点的颜色为黑色*/
/*************************************/
odshtmlclose;
绘制分层散点图
datac.t2;
labelx='Height(cm)'
y='Weight(Kg)';
inputxy@@;
IF_N_<=5THENG=1;
ELSEG=2;
datalines;
149.430.8167.642.6146.333.1170.744.0
161.536.3164.640.8155.532.7158.535.4
149.433.1152.431.8
;
/*建立具有分层变量的数据*/
goptionshtext=0.25inctext=blackdevice=gif;
AXIS1label=(h=0.25inf=zapfbij=r"HEIGHT(cm)")offset=(0.2in)order=(145to175by10);
AXIS2label=(a=90h=0.25inf=zapfbij=r"WEIGHT(Kg)")order=(30to45by5)offset=(0.2in);odshtmlpath="d:
\"body="分层散点图.xls";procgplotdata=c.t2;
ploty*x=G/HAXIS=AXIS1VAXIS
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- SAS 程序 汇总