This section contains brief description for a list of some program, which are available as a zip file and a PDF documentation.
The master program
It includes and calls individuals components and accumulates outputs.
/*9-5-2007 MRC-Epid JHZ*/
%include setup; /* set up options and paths
*/
%include map; /* map information */
%include
trait; /* trait information */
%include desc; /* summary
statistics */
%include exclude;/* a list of individuals to be
excluded*/
%include data; /* genotype data */
%include
split; /* data splitting */
%include code; /* minor and
major alleles */
%include pg; /* merge of phenotype
and genotype */
%include bt; /* binary strait
analysis (LOGISTIC) */
%include qt; /* quantitative
trait analysis */
%include hwe; /* Hardy-Weinberg
equilibrium tests*/
%include ld; /* LD plot
*/
%include tidy; /* space release */
%include
fdr; /* false discovery rate */
%include allele; /*
chromosome X analysis */
%include xhwe; /* Hardy-Weinberg
equilibrium tests for chromosome X */
%macro
do_chr(i,ns=30);
%map(&i);
%data(&i);
%split(&i,ns=&ns);
%do j=1 %to
&ns;
proc
sql;
create table
scratch.data&i as select a.rsn, a.id, substr(a1a2,1,1) as a1,
substr(a1a2,2,1) as
a2
from obesity.data&i as a, rsn&i as b where a.rsn=b.rsn and
group=&j;
create table
scratch.map&i (drop=nn group rsn) as select
*
from rsn&i where group=&j;
quit;
%if %upcase(&i)=X %then
%do;
%allele(&i);
%hwe(&i,where=wheresex=2);<br> %xhwe(data=scratch.data&i,where=where
cohort="1");
%if (&j=1) %then
%do;
proc
sql;
create table scratch.chisq&i as select * from
chisq;
quit;
%end;
%else
%do;
proc
append base=scratch.chisq&i
data=chisq;
run;
%end;
%end;
%else
%do;
%code(&i);
%pg(&i);
%hwe(&i);
%desc(&i);
%bt(&i,trait=case);
%qt(&i,trait=bmi);
%end;
proc
datasets;
append
base=obesity.long&i
data=scratch.long&i;
append base=scratch.cma&i
data=ma&i;
append
base=scratch.cpm&i
data=pm&i;
append
base=scratch.clpm&i
data=clpm&i;
append
base=scratch.canova&i
data=anova&i;
append
base=scratch.cparms&i
data=parms&i;
append
base=scratch.caf&i
data=af&i;
append
base=scratch.cms&i
data=ms&i;
append
base=scratch.cmeans&i
data=means&i;
append
base=scratch.cfreq&i data=freq&i;
quit;
%tidy(&i);
%end;
%mend
do_chr;
%do_chr(01);
%do_chr(02);
%do_chr(03);
%do_chr(04);
%do_chr(05);
%do_chr(06);
%do_chr(07);
%do_chr(08);
%do_chr(09);
%do_chr(10);
%do_chr(11);
%do_chr(12);
%do_chr(13);
%do_chr(14);
%do_chr(15);
%do_chr(16);
%do_chr(17);
%do_chr(18);
%do_chr(19);
%do_chr(20);
%do_chr(21);
%do_chr(22);
%do_chr(X);
/* Command to run with space in /scratch: sas -work /scratch go.sas & */
This program sets up the libnames which contain location of data. It also defines two macros to be used.
/*21-4-2007 MRC-Epid JHZ*/
options replace=yes mprint spool nocenter ps=max ls= max;
libname affy500k
'/data/genetics/GWA/annotate';
libname obesity
'/data/genetics/scratch/obesity500k';
libname scratch
'/data/genetics/scratch';
libname epic5k '.';
%let home=/data/genetics/GWA/Obesity/apr-2007/;
%global nobs cases controls;
%macro count(dsn);
data
_null_;
if 0 then set &dsn
nobs=count;
call
symput('nobs',left(put(count,16.)));
stop;
run;
%mend count;
%include ds2text;
proc
format;
value agefmt low-<50='1' 50-<60='2'
60-<70='3' 70-high='4';
run;
/*4-5-2007 MRC-Epid JHZ*/
%macro data(i);
title2 genotypic information;
%if
%upcase(&i)=X %then %do;
%let j=X;
%end;
%else
%do;
%let j=%eval(&i);
%end;
filename EPIC5k
pipe "gunzip -c
&home.Affx_20070307fs1_gt_OB_BRLMM96_&i..txt.gz";
data case
(drop=score b1b2);
infile EPIC5k
dlm='09'x;
format rsn $15. id
$15.;
input rsn$ id$ b1b2$
score;
a1a2=translate(b1b2,'
','N');
run;
%count(case);
data _null_;
nn=&nobs;
call
symput('cases',left(put(nn,16.)));
put
nn=;
run;
filename EPIC5k pipe "gunzip -c
&home.Affx_20070307fs1_gt_OBC_BRLMM96_&i..txt.gz";
data control
(drop=score b1b2);
infile EPIC5k
dlm='09'x;
format rsn $15. id
$15.;
input rsn$ id$ b1b2$
score;
a1a2=translate(b1b2,'
','N');
run;
%count(control);
data _null_;
nn=&nobs;
call
symput('controls',left(put(nn,16.)));
put
nn=;
run;
data data&i;
set case
control;
run;
proc sql;
create table exclude
as select * from epic5k.exclude;
create table
obesity.data&i as select * from
data&i
where id not in (select wtccc_id from exclude);
quit;
%mend
data;
/*27-3-2007 MRC-Epid JHZ*/
title2 map
information;
%macro map(i);
data
epic5k.map&i;
%if %upcase(&i)=X %then
%do;
infile
"('Affx_20061113fa1_snp_23.txt', 'Affx_20061113fa1_snp_X.txt')"
dlm='09'x;
%end;
%else
%do;
infile
"Affx_20061113fa1_snp_&i..txt" dlm='09'x;
%end;
format rsn $15.;
input idn rsn$ chr location allele1$ allele2$;
drop
idn;
name=rsn;
run;
proc sort
data=epic5k.map&i;
by rsn;
run;
%mend map;
The data sets contains age, sex, case-control labels and bosdy mass index (BMI).
/*4-5-2007 MRC-Epid JHZ*/
title phenotypic
information;
proc sort data= epic5k.newtrait(rename=(wtccc_id=id
case_20070321=case cohort_20070321=cohort))
out=newtrait;
format wtccc_id
$15.;
by sex age;
run;
data
stdtrait;
set newtrait;
zbmi=log10(bmi);
zheight=height;
run;
proc
standard data=stdtrait out=stdheight mean=0 std=1;
by sex age;
var zheight;
format age agefmt.;
run;
proc sort
data=stdtrait;
by cohort sex age;
run;
proc
standard data=stdtrait out=stdbmi mean=0 std=1;
by
cohort sex age;
var
zbmi;
format age agefmt.;
run;
proc
sql;
create table epic5k.trait as select a.cohort,
a.id, a.case, a.age, a.sex, a.hba1c, a.hdl, a.height,
a.zheight,
a.bmi, b.zbmi from stdheight a left join stdbmi b on a.id=b.id;
quit;
proc
sort data=epic5k.trait;
by id cohort case age sex
bmi zbmi height zheight hba1c hdl;
run;
/*
data
epic5k.trait;
format sample_id
$15.;
set epic5k.qc (keep=sample_id
sample_selected_to_genotype case bmi age sex
cohort);
rename sample_id=id
sample_selected_to_genotype=typed;
run;
proc sort
data=epic5k.trait;
by id typed case bmi age
sex;
run;
*/s
This program will code the character labels for additive, dominant and recessive models.
/*8-2-2007 MRC-Epid JHZ*/
%macro code(i);
data
a1a2;
set
scratch.data&i;
ab=a1;output;
ab=a2;output;
run;
ods select
none;
proc sort data=a1a2;
by
rsn;
run;
proc freq data=a1a2;
by
rsn;
ods output
onewayfreqs=ab;
table ab;
run;
ods select
all;
proc sql;
create table ma0 as select rsn,
table, ab, frequency, percent from
ab
group
by rsn having frequency=min(frequency);
create table
mb0 as select rsn, table, ab, frequency, percent from
ab
group
by rsn having frequency=max(frequency);
quit;
proc sort
data=ma0;
by rsn ab;
run;
data
ma;
set ma0;
by rsn
ab;
if percent=50 & last.rsn then
delete;
run;
proc sort data=mb0;
by rsn ab;
run;
data mb;
set mb0;
if percent=50& first.rsn then delete;
run;
data
ma&i(drop=table);<br> mergema(rename=(ab=a1 frequency=n1 percent= p1))mb(rename=(ab=a2 frequency=n2 percent=p2));
by rsn;
run;
proc sort data=scratch.data&i out=data&i;
by rsn;
run;
data scratch.g&i (drop=table frequency ab percent);
merge ma data&i;
by rsn;
if (a1 ne
'')&(a2ne'')thendo;<br> rec0=(a1=ab & a2=ab);
dom0=(a1=ab|a2=ab);
add0=(a1=ab)+(a2=ab);
end;
run;
%mend code;
/*3-5-2007 MRC-Epid JHZ*/
%macro pg(i);
proc
sql;
create table long&i as select * from
scratch.g&i order by id;
quit;
data long&i
(drop=t);
set
long&i;
a1a2='
';
t=' ';
if a1 ne ' '
& a2 ne ' ' then do;
t=a1;
if a1>a2 then
do;
a1=a2;
a2=t;
end;
a1a2=compress(a1||'/'||a2);
end;
drop a1 a2;
run;
proc transpose
data=long&i out=wide&i;
by
id;
id rsn;
var add0
dom0 rec0 a1a2;
run;
data scratch.wide&i (where=(compress(id) ne
""));
merge epic5k.trait
wide&i;
by id;
run;
proc transpose
data=scratch.wide&i out=long&i (drop=
_LABEL_rename=(_NAME_=rsn));
by id cohort case age sex bmi
zbmi height zheight hba1c hdl;
var
_all_;
run;
proc sql;
create table long2&i
(where=(upcase(rsn) not
in('_NAME_','ID','COHORT','CASE','AGE','SEX','BMI','ZBMI','HEIGHT','ZHEIGHT$
as select * from long&i order by rsn, id;
quit;
data
scratch.long&i (drop=add0 dom0 rec0 where=(compress(rsn) ne
""));
set long2&i;
add=input(compress(add0),3.);
dom=input(compress(dom0),3.);
rec=input(compress(rec0),3.);
run;
%mend pg;
/*4-5-2007 MRC-Epid JHZ*/
%macro hwe(i,where=);
ods
select none;
proc sort data= scratch.long&i(where=(cohort="1") keep=rsn
id sex a1a2 cohort) out=control&i (drop=cohort
sex);
&where;
by id
rsn;
run;
proc transpose data=control&i out=control2&i
(drop=_NAME_ id);
id
rsn;
by id;
var
a1a2;
run;
proc allele data=control2&i
genocol;
var
_CHARACTER_;
ods output markersumm=ms&i
allelefreq=af&i;
run;
proc sort data=scratch.long&i (keep=rsn id
sex a1a2 cohort) out=control&i (drop=cohort
sex);
&where;
by id
rsn;
run;
proc transpose data=control&i out=control2&i
(drop=_NAME_ id);
id
rsn;
by id;
var
a1a2;
run;
proc allele data=control2&i
genocol;
var
_CHARACTER_;
ods output markersumm=ams&i
allelefreq=aaf&i;
run;
ods select all;
%mend hwe;s
/*26-4-2007 MRC-Epid JHZ*/
title2 quantitative trait in cohort;
%macro qt(i,trait=bmi);
ods
select none;
proc reg data=
scratch.long&i(where=(cohort="1"));
ods output ANOVA=anova&i
ParameterEstimates=parms&i;
by
rsn;
add: model &trait=add /b
stb;
dom: model &trait=dom /b
stb;
rec: model &trait=rec /b
stb;
quit;
data parms&i (drop=variable);
set parms&i (where=(variable ne "Intercept") drop=label);
run;
%mend qt;s
It will splits the raw data into pieces each to be analysed
/*5-3-2007 MRC-Epid JHZ*/
%macro split(i,ns=2);
proc
sql;
create table rsn&i as select unique(rsn)
from case;
create table rsn2&i as select
unique(rsn) from control;
quit;
data
rsn&i;s
set rsn&i rsn2&i;
run;
proc
sort data=rsn&i (keep=rsn) nodupkey;
by
rsn;
run;
%count(rsn&i);
data
rsn&i;
set
rsn&i;
by rsn;
nn=&nobs;
group=ceil(&ns/nn*_n_);
name=rsn;
run;
%mend split;
/*
proc sort data=obesity.data&i (keep=rsn)
out=rsn&i nodupkey;
by rsn;
run;
proc
sql;
create table rsn&i as select unique(rsn)
from obesity.data&i;
quit;
proc sql;
create table rsn&i as select rsn from
case
union select rsn from control;
quit;
*/
/*1-5-2007MRC-EpidJHZ*/
%include setup; /* set up
options and paths */
%include map; /* map information
*/
%include data; /* genotype data */
%include
ld; /* LDplot*/
%macro extract(i);
proc
sql;
create table long as select
*
from
obesity.long&i where rsn in (select rsn from
rsn);
create table scratch.long as select
*
from
long as a left join rsn as b on
a.rsn=b.rsn
order by pos;
quit;
proc sort
data=scratch.long;
by id age sex bmi case
cohort;
run;
proc transpose data=scratch.long
out=scratch.a1a2&i;
var
a1a2;
id rsn;
by id age
sex bmi case cohort;
run;
proc transpose data=scratch.long
out=scratch.add&i;
var
add;
id rsn;
by id age
sex bmi case cohort;
run;
proc transpose data=scratch.long
out=scratch.dom&i;
var
dom;
id rsn;
by id age
sex bmi case cohort;
run;
proc transpose data=scratch.long
out=scratch.rec&i;
var
rec;
id rsn;
by id age
sex bmi case cohort;
run;
%mendextract;
%*map(16);
%*data(16);
%*include exclude;/*
individuals tobeexcluded*/
libname obesity
'/data/genetics/GWA/1-5-7/obesity500k';
libname scratch
'/data/genetics/scratch';
proc format;s
value
agefmt low-<40='1' 40-<50='2' 50-<60='3' 60-<70='4'
70-high='5';
run;
%ld(16,startpos=52290376,endpos=52710882);
data
rsn;
infile 'ld16_rs7202836-rs2075205.info'
dlm='09'x;
format rsn
$15.;
input rsn$ pos;
rsn=compress(rsn);
run;
%extract(16);
/*9-5-2007 MRC-Epid JHZ*/
title2 quantitative trait in cases;
%macroqlim(i,data=data,trait=bmi,x=add);
ods select none;
proc
qlim data= &data(where=(case="1" and
cohort="0"));
ods output
parameterEstimates=&x;
by
rsn;
model
&trait=&x;
hetero &trait ~ &x /
link=linear noconst;
endogenous &trait ~
truncated (lb=30);
output out=
m&x(rename=(mills_&trait=mills resid_&trait=residual)) mills
residual;
run;
%mend
qlim;