The programs

This section contains brief description for a list of some program, which are available as  a zip file and  a PDF documentation.

 

The master program

 

It includes and calls individuals components and accumulates outputs.

 

/*9-5-2007 MRC-Epid JHZ*/

%include setup;  /* set up options and paths */
%include map;    /* map information */
%include trait;  /* trait information */
%include desc;   /* summary statistics */
%include exclude;/* a list of individuals to be excluded*/
%include data;   /* genotype data */
%include split;  /* data splitting */
%include code;   /* minor and major alleles */
%include pg;     /* merge of phenotype and genotype */
%include bt;     /* binary strait analysis (LOGISTIC) */
%include qt;     /* quantitative trait analysis */
%include hwe;    /* Hardy-Weinberg equilibrium tests*/
%include ld;     /* LD plot */
%include tidy;   /* space release */
%include fdr;    /* false discovery rate */
%include allele; /* chromosome X analysis */
%include xhwe;   /* Hardy-Weinberg equilibrium tests for chromosome X */

%macro do_chr(i,ns=30);
%map(&i);
%data(&i);

%split(&i,ns=&ns);
%do j=1 %to &ns;
    proc sql;
         create table scratch.data&i as select a.rsn, a.id, substr(a1a2,1,1) as a1, substr(a1a2,2,1) as a2
                from obesity.data&i as a, rsn&i as b where a.rsn=b.rsn and group=&j;
         create table scratch.map&i (drop=nn group rsn) as select *
                from rsn&i where group=&j;
    quit;
    %if %upcase(&i)=X %then %do;
        %allele(&i);
       %hwe(&i,where=wheresex=2);<br>       %xhwe(data=scratch.data&i,where=where cohort="1");
        %if (&j=1) %then %do;
            proc sql;
                 create table scratch.chisq&i as select * from chisq;
            quit;
        %end;
        %else %do;
            proc append base=scratch.chisq&i data=chisq;
            run;
        %end;
    %end;
    %else %do;
        %code(&i);
        %pg(&i);        %hwe(&i);
        %desc(&i);
        %bt(&i,trait=case);
        %qt(&i,trait=bmi);
    %end;
    proc datasets;
         append base=obesity.long&i data=scratch.long&i;
         append base=scratch.cma&i data=ma&i;
         append base=scratch.cpm&i data=pm&i;
         append base=scratch.clpm&i data=clpm&i;
         append base=scratch.canova&i data=anova&i;
         append base=scratch.cparms&i data=parms&i;
         append base=scratch.caf&i data=af&i;
         append base=scratch.cms&i data=ms&i;
         append base=scratch.cmeans&i data=means&i;
         append base=scratch.cfreq&i data=freq&i;
    quit;
    %tidy(&i);
%end;
%mend do_chr;
%do_chr(01);
%do_chr(02);
%do_chr(03);
%do_chr(04);
%do_chr(05);
%do_chr(06);
%do_chr(07);
%do_chr(08);
%do_chr(09);
%do_chr(10);
%do_chr(11);
%do_chr(12);
%do_chr(13);
%do_chr(14);
%do_chr(15);
%do_chr(16);
%do_chr(17);
%do_chr(18);
%do_chr(19);
%do_chr(20);
%do_chr(21);
%do_chr(22);
%do_chr(X);

/* Command to run with space in /scratch: sas -work /scratch go.sas & */

 

 

The setup

This program sets up the libnames which contain location of data. It also defines two macros to be used.

/*21-4-2007 MRC-Epid JHZ*/

options replace=yes mprint spool nocenter ps=max ls= max;

libname affy500k '/data/genetics/GWA/annotate';
libname obesity '/data/genetics/scratch/obesity500k';
libname scratch '/data/genetics/scratch';
libname epic5k '.';

%let home=/data/genetics/GWA/Obesity/apr-2007/;

%global nobs cases controls;

%macro count(dsn);
data _null_;
     if 0 then set &dsn nobs=count;
     call symput('nobs',left(put(count,16.)));
     stop;
run;
%mend count;

%include ds2text;

proc format;
     value agefmt low-<50='1' 50-<60='2' 60-<70='3' 70-high='4';
run;

The data

It reads zipped file directly and store as SAS databases. It also excludes individuals who fail the quality controls.

 

/*4-5-2007 MRC-Epid JHZ*/

%macro data(i);
title2 genotypic information;
%if %upcase(&i)=X %then %do;
    %let j=X;
%end;
%else %do;
    %let j=%eval(&i);
%end;
filename EPIC5k pipe "gunzip -c &home.Affx_20070307fs1_gt_OB_BRLMM96_&i..txt.gz";
data case (drop=score b1b2);
     infile EPIC5k dlm='09'x;
     format rsn $15. id $15.;
     input rsn$ id$ b1b2$ score;
     a1a2=translate(b1b2,' ','N');
run;
%count(case);
data _null_;
     nn=&nobs;
     call symput('cases',left(put(nn,16.)));
     put nn=;
run;
filename EPIC5k pipe "gunzip -c &home.Affx_20070307fs1_gt_OBC_BRLMM96_&i..txt.gz";
data control (drop=score b1b2);
     infile EPIC5k dlm='09'x;
     format rsn $15. id $15.;
     input rsn$ id$ b1b2$ score;
     a1a2=translate(b1b2,' ','N');
run;
%count(control);
data _null_;
     nn=&nobs;
     call symput('controls',left(put(nn,16.)));
     put nn=;
run;
data data&i;
     set case control;
run;
proc sql;
     create table exclude as select * from epic5k.exclude;
     create table obesity.data&i as select * from data&i
                  where id not in (select wtccc_id from exclude);
quit;
%mend data;

The map information

/*27-3-2007 MRC-Epid JHZ*/

title2 map information;
%macro map(i);
data epic5k.map&i;
     %if %upcase(&i)=X %then %do;
         infile "('Affx_20061113fa1_snp_23.txt', 'Affx_20061113fa1_snp_X.txt')" dlm='09'x;
     %end;
     %else %do;
         infile "Affx_20061113fa1_snp_&i..txt" dlm='09'x;
     %end;
     format rsn $15.;
     input idn rsn$ chr location allele1$ allele2$;
     drop idn;
     name=rsn;
run;
proc sort data=epic5k.map&i;
     by rsn;
run;
%mend map;

The phenotypes

The data sets contains age, sex, case-control labels and bosdy mass index (BMI).

/*4-5-2007 MRC-Epid JHZ*/

title phenotypic information;
proc sort data= epic5k.newtrait(rename=(wtccc_id=id case_20070321=case cohort_20070321=cohort)) out=newtrait;
     format wtccc_id $15.;
     by sex age;
run;
data stdtrait;
     set newtrait;
     zbmi=log10(bmi);
     zheight=height;
run;
proc standard data=stdtrait out=stdheight mean=0 std=1;
     by sex age;
     var zheight;
     format age agefmt.;
run;
proc sort data=stdtrait;
     by cohort sex age;
run;
proc standard data=stdtrait out=stdbmi mean=0 std=1;
     by cohort sex age;
     var zbmi;
     format age agefmt.;
run;
proc sql;
     create table epic5k.trait as select a.cohort, a.id, a.case, a.age, a.sex, a.hba1c, a.hdl, a.height, a.zheight,
                                         a.bmi, b.zbmi from stdheight a left join stdbmi b on a.id=b.id;
quit;
proc sort data=epic5k.trait;
     by id cohort case age sex bmi zbmi height zheight hba1c hdl;
run;

/*
data epic5k.trait;
     format sample_id $15.;
     set epic5k.qc (keep=sample_id sample_selected_to_genotype case bmi age sex cohort);
     rename sample_id=id sample_selected_to_genotype=typed;
run;
proc sort data=epic5k.trait;
     by id typed case bmi age sex;
run;
*/s

The coding

This program will code the character labels for additive, dominant and recessive models.

/*8-2-2007 MRC-Epid JHZ*/

%macro code(i);
data a1a2;
     set scratch.data&i;
     ab=a1;output;
     ab=a2;output;
run;
ods select none;
proc sort data=a1a2;
     by rsn;
run;
proc freq data=a1a2;
     by rsn;
     ods output onewayfreqs=ab;
     table ab;
run;
ods select all;
proc sql;
     create table ma0 as select rsn, table, ab, frequency, percent from ab
            group by rsn having frequency=min(frequency);
     create table mb0 as select rsn, table, ab, frequency, percent from ab
            group by rsn having frequency=max(frequency);
quit;
proc sort data=ma0;
     by rsn ab;
run;
data ma;
     set ma0;
     by rsn ab;
     if percent=50 & last.rsn then delete;
run;
proc sort data=mb0;
by rsn ab; run;
data mb;
set mb0; by rsn ab;
if percent=50& first.rsn then delete;
run;
data ma&i(drop=table);<br>    mergema(rename=(ab=a1 frequency=n1 percent= p1))mb(rename=(ab=a2 frequency=n2 percent=p2));
     by rsn;
run;
proc sort data=scratch.data&i out=data&i;
     by rsn;
run;
data scratch.g&i (drop=table frequency ab percent);
     merge ma data&i;
     by rsn;
     if (a1 ne '')&(a2ne'')thendo;<br>       rec0=(a1=ab & a2=ab);
       dom0=(a1=ab|a2=ab);
       add0=(a1=ab)+(a2=ab);
     end;
run;
%mend code;

Combining genotypes and phenotype

/*3-5-2007 MRC-Epid JHZ*/

%macro pg(i);
proc sql;
     create table long&i as select * from scratch.g&i order by id;
quit;
data long&i (drop=t);
     set long&i;
     a1a2='   ';
     t=' ';
     if a1 ne ' ' & a2 ne ' ' then do;
        t=a1;
        if a1>a2 then do;
           a1=a2;
           a2=t;
        end;
        a1a2=compress(a1||'/'||a2);
     end;
     drop a1 a2;
run;
proc transpose data=long&i out=wide&i;
     by id;
     id rsn;
     var add0 dom0 rec0 a1a2;
run;
data scratch.wide&i (where=(compress(id) ne ""));
     merge epic5k.trait wide&i;
     by id;
run;
proc transpose data=scratch.wide&i out=long&i (drop= _LABEL_rename=(_NAME_=rsn));
     by id cohort case age sex bmi zbmi height zheight hba1c hdl;
     var _all_;
run;
proc sql;
     create table long2&i (where=(upcase(rsn) not in('_NAME_','ID','COHORT','CASE','AGE','SEX','BMI','ZBMI','HEIGHT','ZHEIGHT$
     as select * from long&i order by rsn, id;
quit;
data scratch.long&i (drop=add0 dom0 rec0 where=(compress(rsn) ne ""));
     set long2&i;
     add=input(compress(add0),3.);
     dom=input(compress(dom0),3.);
     rec=input(compress(rec0),3.);
run;
%mend pg;

Marker information and Hardy-Weinberg equilibrium tests

/*4-5-2007 MRC-Epid JHZ*/

%macro hwe(i,where=);
ods select none;
proc sort data= scratch.long&i(where=(cohort="1") keep=rsn id sex a1a2 cohort) out=control&i (drop=cohort sex);
     &where;
     by id rsn;
run;
proc transpose data=control&i out=control2&i (drop=_NAME_ id);
     id rsn;
     by id;
     var a1a2;
run;
proc allele data=control2&i genocol;
     var _CHARACTER_;
     ods output markersumm=ms&i allelefreq=af&i;
run;
proc sort data=scratch.long&i (keep=rsn id sex a1a2 cohort) out=control&i (drop=cohort sex);
     &where;
     by id rsn;
run;
proc transpose data=control&i out=control2&i (drop=_NAME_ id);
     id rsn;
     by id;
     var a1a2;
run;
proc allele data=control2&i genocol;
     var _CHARACTER_;
     ods output markersumm=ams&i allelefreq=aaf&i;
run;
ods select all;
%mend hwe;s

Analysis on case-control labels

/*8-2-2006 MRC-Epid JHZ*/
 
stitle2 binary trait;
%macro bt0(i,trait=case);
ods select none;
proc casecontrol data=scratch.long&i tall marker=rsn indiv=id genotype trend outstat=scratch.pm&i;
     trait &trait;
     var a1 a2;
run;
ods select all;
%mend bt0;
%macro bt(i,trait=case);
options nonotes;
ods select none;
proc logistic data=scratch.long&i descending;
     ods output parameterestimates=pm&i CLOddsPL=clpm&i;
     model &trait=add / expb clodds=pl;
     by rsn;
run;
proc logistic data=scratch.long&i descending;
     ods output parameterestimates=dom&i CLOddsPL=cldom&i;
     model &trait=dom / expb clodds=pl;
     by rsn;
run;
proc logistic data=scratch.long&i descending;
     ods output parameterestimates=rec&i CLOddsPL=clrec&i;
     model &trait=rec / expb clodds=pl;
     by rsn;
run;
data pm&i (where=(variable ne "Intercept"));
     set pm&i dom&i rec&i;
run;
proc sort data=pm&i;
     by rsn;
run;
data clpm&i;
     set clpm&i cldom&i clrec&i;
run;
proc sort data=clpm&i;
     by rsn;
run;
ods select all;
options notes;
%mend bt;

Analysis as continuous trait

/*26-4-2007 MRC-Epid JHZ*/

title2 quantitative trait in cohort;

%macro qt(i,trait=bmi);
ods select none;
proc reg data= scratch.long&i(where=(cohort="1"));
     ods output ANOVA=anova&i ParameterEstimates=parms&i;
     by rsn;
     add: model &trait=add /b stb;
     dom: model &trait=dom /b stb;
     rec: model &trait=rec /b stb;
quit;
data parms&i (drop=variable);
     set parms&i (where=(variable ne "Intercept") drop=label);
run;
%mend qt;s


The data partitioning

It will splits the raw data into pieces each to be analysed

/*5-3-2007 MRC-Epid JHZ*/

%macro split(i,ns=2);
proc sql;
     create table rsn&i as select unique(rsn) from case;
     create table rsn2&i as select unique(rsn) from control;
quit;
data rsn&i;s
     set rsn&i rsn2&i;
run;
proc sort data=rsn&i (keep=rsn) nodupkey;
     by rsn;
run;
%count(rsn&i);
data rsn&i;
     set rsn&i;
     by rsn;
     nn=&nobs;
     group=ceil(&ns/nn*_n_);
     name=rsn;
run;
%mend split;

/*
proc sort data=obesity.data&i (keep=rsn) out=rsn&i nodupkey;
     by rsn;
run;
proc sql;
     create table rsn&i as select unique(rsn) from obesity.data&i;
quit;
proc sql;
     create table rsn&i as select rsn from case
                  union select rsn from control;
quit;
*/

Extraction of FTO SNPs

/*1-5-2007MRC-EpidJHZ*/

%include setup;  /* set up options and paths */
%include map;    /* map information */
%include data;   /* genotype data */
%include ld;     /* LDplot*/

%macro extract(i);
proc sql;
     create table long as select *
            from obesity.long&i where rsn in (select rsn from rsn);
     create table scratch.long as select *
            from long as a left join rsn as b on a.rsn=b.rsn
            order by pos;
quit;
proc sort data=scratch.long;
     by id age sex bmi case cohort;
run;
proc transpose data=scratch.long out=scratch.a1a2&i;
     var a1a2;
     id rsn;
     by id age sex bmi case cohort;
run;
proc transpose data=scratch.long out=scratch.add&i;
     var add;
     id rsn;
     by id age sex bmi case cohort;
run;
proc transpose data=scratch.long out=scratch.dom&i;
     var dom;
     id rsn;
     by id age sex bmi case cohort;
run;
proc transpose data=scratch.long out=scratch.rec&i;
     var rec;
     id rsn;
     by id age sex bmi case cohort;
run;
%mendextract;

%*map(16);
%*data(16);
%*include exclude;/* individuals tobeexcluded*/

libname obesity '/data/genetics/GWA/1-5-7/obesity500k';
libname scratch '/data/genetics/scratch';
proc format;s
     value agefmt low-<40='1' 40-<50='2' 50-<60='3' 60-<70='4' 70-high='5';
run;
%ld(16,startpos=52290376,endpos=52710882);

data rsn;
     infile 'ld16_rs7202836-rs2075205.info' dlm='09'x;
     format rsn $15.;
     input rsn$ pos;
     rsn=compress(rsn);
run;

%extract(16);

Quantitative traits in cases

/*9-5-2007 MRC-Epid JHZ*/

title2 quantitative trait in cases;

%macroqlim(i,data=data,trait=bmi,x=add);
ods select none;
proc qlim data= &data(where=(case="1" and cohort="0"));
     ods output parameterEstimates=&x;
     by rsn;
     model &trait=&x;
     hetero &trait ~ &x / link=linear noconst;
     endogenous &trait ~ truncated (lb=30);
     output out= m&x(rename=(mills_&trait=mills resid_&trait=residual)) mills residual;
run;
%mend qlim;