Transcript run

Output语句:输出观测到新创
建的数据集中
语句格式:
output out=sas-data-set keyword=name
<…keyword=name>;
关于选项:
cookd=name /*cook’d 统计量*/
h=name /*杠杆值h_ii */
L95=name /*因变量预测值的95%置信下限*/
h ii
U95=name /*因变量预测值的95%置信上限*/
L95M=name /*因变量期望值的95%置信下限*/
U95M=name /*因变量期望值的95%置信上限*/
p=name /*预测值*/
r=name /*残差*/
student=name /*学生化残差*/
Page 80 例子3.3
data p80;
input x y;
cards;
…
;
run;
proc print;
run;
proc plot;
plot y*x;
run;
proc reg;
model y=x;
run;
proc reg;
model y=x/noint p r cli clm dw;
output out=p80result p=yhat r=yresid cookd=cookd h=level student=stdr;
plot r.*p.;
plot r.*x;
run;
proc univariate plot normal data=p80result;
var yresid;
run;
proc plot data=p80result;
plot stdr* yhat='*';
run;
proc print data=p80result;
run;
data p80ttest;
set p80result;
do i=1 to 53 by 1;
t=sqrt((50*stdr**2)/(51-stdr**2));
end;
run;
proc print data=p80ttest;
run;
检查数据有无输错
Obs
x
1 679
2 292
3 1012
4 493
5 582
6 1156
7 997
8 2189
9 1097
10 2078
…..
y
0.79
0.44
0.56
0.79
2.70
3.64
4.73
9.50
5.34
6.85
模型有截距时方程显著性检验
结论:截距不显著
修改reg模型
proc reg;
model y=x/noint p r cli clm dw;
output out=s.p80result p=yhat r=yresid
cookd=cookd h=level student=stdr;
plot r.*p.;
plot r.*x;
run;
其余不变
F值变大,模型更显著
回归系数显著
随机误差无1阶自回归
d L  1.50, dU  1.59
结论:方差齐性假设不合理
作变换z=sqrt(y),再进行诊断
data p80II;
set p80;
do i=1 to 53 by 1;
z=sqrt(y);
end;
run;
proc print;
run;
proc reg;
model z=x/p r cli clm dw;
output out=p80result2 p=yhat r=yresid cookd=cookd h=level student=stdr;
plot r.*p.;
plot r.*x;
run;
proc univariate plot normal data=p80result2;
var yresid stdr;
run;
proc plot data=p80result2;
plot stdr* yhat='*';
run;
proc print data=p80result2;
run;
data p80ttest2;
set p80result2;
do i=1 to 53 by 1;
t=sqrt((50*stdr**2)/(51-stdr**2));
end;
run;
proc print data=p80ttest2;
run;
得到z的数据
Obs
x
y
1 679 0.79
2 292 0.44
3 1012 0.56
4 493 0.79
5 582 2.70
6 1156 3.64
7 997 4.73
8 2189 9.50
9 1097 5.34
10 2078 6.85
11 1818 5.84
12 1700 5.21
13 747 3.25
14 2030 4.43
。。。。。。。
i
54
54
54
54
54
54
54
54
54
54
54
54
54
54
z
0.88882
0.66332
0.74833
0.88882
1.64317
1.90788
2.17486
3.08221
2.31084
2.61725
2.41661
2.28254
1.80278
2.10476
结论:模型显著
结论:回归系数显著
y  (0.58223  0.00095286x)2
DW检验,残差无一阶自回归
• 结论:从标准化残差分析,序号为26的样
本点为异常点。从COOK距离分析,序号为
26,52的样本点为强影响点。
用t分布来检验异常点
t0.975 (50)  2.00856
• 结论:序号为26,50,52的数据为异常点。对
策:检查原始数据的抄录是否有误。不然,
从原始数据中剔除它们。
对yresid作正态性检验
• 结论:接受0均值正态分布的假设。
残差图
z = 0.5822 +0.001 x
1.0
N
53
Rsq
0.6485
0.5
AdjRsq
0.6416
RMSE
0.464
0.0
-0.5
-1.0
-1.5
0.5
1.0
1.5
2.0
2.5
Predicted Value
3.0
3.5
4.0
z = 0.5822 +0.001 x
1.0
N
53
Rsq
0.6485
0.5
AdjRsq
0.6416
RMSE
0.464
0.0
-0.5
-1.0
-1.5
0
500
1000
1500
2000
x
2500
3000
3500
4000
Studentized Residual
2
1
0
-1
-2
-3
-4
0
1
2
3
4
Predicted Value of z
结论:已满足回归函数线性假设、
方差齐性假设
预测
data example1;
input x y;
cards;
0.03 40.5
0.04 39.5
0.05 41
0.07 41.5
0.09 43
0.1 42
0.12 45
0.15 47.5
0.17 53
0.2 56
0.06 .
;
run;
proc plot data=example1;
plot y*x;
run;
proc reg data=example1;
model y=x/p r cli clm;
output out=aa p=phat L95=a U95=b;
run;
proc print data=aa; run;
从外部调入数据
先在d:\user目录下建立一个txt文件,内容为
Zhao 65 84 72 78
Qian 85 90 86 76
Sun 78 65 60 88
Li
71 60 55 72
Zhou 89 80 79 82
Wu 73 80 79 82
libname y'd:\user';
data y.chengji;
infile 'd:\user\chengji.txt';
input name$ x1-x4;
y=max(x1,x2,x3,x4);
z=mean(of x1-x4);
run;
proc print;
run;
libname y'd:\user';
data y.chengji;
infile 'd:\user\chengji.txt';
input name$ x1-x4;
y=max(x1,x2,x3,x4);
z=mean(of x1-x4);
run;
proc print;
run;
data y.newset;
set y.chengji(keep=z);
run;
proc print data=y.newset;
run;
If语句取子集
libname y'd:\user';
data a;
set y.chengji;
if x3<60;
run;
proc print data=a;
run;
If …then(或else)语句
libname y'd:\user';
data a b;
set y.chengji;
if x3<60 then output a;
else output b;
run;
proc print data=a;
run;
proc print data=b;
run;
Delete语句
libname y'd:\user';
data a;
set y.chengji;
if x3>=60 then delete;
run;
proc print data=a;
run;
retain语句
data a;
input x@@;
retain sx1 0 sx2 0;
sx1=sx1+x;
sx2=sx2+x*x;
cards;
254367
;
run;
proc print data=a;
run;
计算pi值
data a;
retain m 0;
do n=1 to 100000;
x=uniform(0); /*或x=ranuni(0); */
y=uniform(0); /*或y=ranuni(0); */
if y<1/(1+x*x) then m=m+1;
if n>99900 then pi=4*m/n; /*计算最后100个值*/
output;
end;
proc print;
run;
…………….
99991 78585 99991
99992 78586 99992
99993 78587 99993
99994 78588 99994
99995 78588 99995
99996 78588 99996
99997 78589 99997
99998 78589 99998
99999 78590 99999
100000 78591 100000
0.81735
0.11826
0.29977
0.14505
0.79454
0.70701
0.16529
0.84441
0.09851
0.71970
0.07297
0.16715
0.58634
0.22211
0.98015
0.84419
0.08583
0.99073
0.70711
0.18154
3.14368
3.14369
3.14370
3.14371
3.14368
3.14365
3.14365
3.14362
3.14363
3.14364