/*************** SAS code for Microarray cDNA data analysis with two Dyes **************/

options ls=80 ps=70 nodate nocenter pageno=1 formdlim="-";

* A library folder will be created with the name "DemokA" in Libraries and all the output files will be saved in the 
  folder/directory given ("C:\.....\folder_name"); 
libname DemokA "C:\temp\Demo\SAS_Analysis";

* Goal: Read the input file into SAS and change the column headers to correspond to the Array IDs used in the design file 
  and give it new name (i.e. "Data"); 
* Read the *.csv input data file (the "forsasinput.csv" file generated from the filtering/normalization procedures) that 
  contains the geneIDs and the intensity values (for both dyes) for each of the genes in each Array, and create the file 
  "Data" which will be used for SAS analysis;
* Type the Array IDs corresponding to your experimental design (e.i. Array011 Array012 Array021 Array022 Array031...so on). 
  The # means number of Array and the % means the Dye used depending on the Dye that was used first in the experimental 
  design (i.e. 1 for Cy3 and 2 for Cy5);
* This code example is written for an experiment with 7 Arrays, 4 replications, hence we have 28 Arrays;
data DemokA.Data;
infile "C:\temp\Demo\SAS_Analysis\forsasinput.csv" LRECL=1000 firstobs=2 dlm=",";
input metarow metacolumn gridrow gridcolumn geneID: $15. Array011 Array012 Array021 Array022 Array031 Array032 Array041 Array042 Array051 Array052 Array061 Array062 Array071 Array072 Array081 Array082 Array091 Array092 Array101 Array102 Array111 Array112 Array121 Array122 Array131 Array132 Array141 Array142 Array151 Array152 Array161 Array162 Array171 Array172 Array181 Array182 Array191 Array192 Array201 Array202 Array211 Array212 Array221 Array222 Array231 Array232 Array241 Array242 Array251 Array252 Array261 Array262 Array271 Array272 Array281 Array282;
run;

* Goal: Read the replicates file ("replicatesforeachgene.csv") into SAS;
* Read a .csv input data file that contains the number of biological replicates for each gene in each array, and create 
  "Reps" file to be used at the end of the SAS analysis;
data DemokA.Reps;
infile "C:\temp\Demo\SAS_Analysis\replicatesforeachgene.csv" LRECL=1000 firstobs=2 dlm=",";
input metarow metacolumn gridrow gridcolumn geneID: $15. Array1 Array2 Array3 Array4 Array5 Array6 Array7;
run;

* Clean "Data" by removing unnecessary variables;
data DemokA.Data;
set DemokA.Data;
drop metarow metacolumn gridrow gridcolumn;
output;
run;

* Clean "Reps" by removing unnecessary variables;
data DemokA.Reps;
set DemokA.Reps;
drop metarow metacolumn gridrow gridcolumn;
output;
run;

* Sort "Data" file by geneID;
proc sort data=DemokA.Data;
by geneID;
run;

* Transpose "Data" table by geneID to get the adequate format for the analysis and create a new file "Data1";
proc transpose data=DemokA.Data out=DemokA.Data1;
by geneID;
run;

* Rename _NAME_ and COL1 variables as Array and log_intensity, respectively. Drop the unnecessary variables;
data DemokA.Data1;
set DemokA.Data1;
Array=_NAME_;					* Array = Number of Array and Dye information (1=Cy3 and 2=Cy5); 
log_intensity=COL1;				* log_intensity = column 1;
drop _NAME_ COL1;
output;
run;

* The design file used in the pre-analysis steps (filtering and normalization) needs to be revised to include columns for 
  Treatment and Time variables and to rename arrays as Array011, Array012, Array021, Array022,... etc. This step is done
  manually in Excel and saved as "DesignFile.csv";
* Read the DesignFile.csv input data file that contains the Experimental Design information (Array Dye Sample Treatment 
  Time) into SAS and create "DesignFile" file to be used in the SAS analysis;
* The Array variable should be written as Array011, Array012, Array021, Array022,... etc., where the firsts two digits 
  indicate the Array number a the third digit the Dye (i.e. Array011 represents Array 1 and Cy3 label, Array012 represents 
  Array 1 and Cy5 label);
* This code is written for an experiment with 7 Arrays, 4 replications, hence we have 28 Arrays;
data DemokA.DesignFile;
infile "C:\temp\Demo\SAS_Analysis\DesignFile.csv" LRECL=1000 firstobs=2 dlm=",";
input Array $ Dye $ Sample Treatment $ Time;
run;

* Sort "Data1" and "DesignFile" files by Array and merge both data sets into a new file "Data2". Also, drop Sample variable;
proc sort data=DemokA.Data1;
by Array;
proc sort data=DemokA.DesignFile;
by Array;
data DemokA.Data2;
merge DemokA.Data1 DemokA.DesignFile;
by Array;
drop Sample;
output;
run;

* Sort "Data2" file by geneID;
proc sort data=DemokA.Data2;
by geneID;
run;

* Change Array variable values to get the adequate format for the analysis (correction of the number of the Arrays) 
  and save as "Data3" file;
data DemokA.Data3;
i=1;							* i = counter;
n=28;							* n = number of Arrays used in the experiment;
geneID="AF126021a";				* geneID name for the first gene in "Data2" file;
do while (i<=n);
	do j=1 to 2 by 1;
		set DemokA.Data2;
		if geneID=geneID then
			do;
				Array=i;
				output;
			end;
		else
			i=1;
	end;
	i=i+1;
end;
run;

* Drop unnecessary variables from "Data3" file;
data DemokA.Data3;
set DemokA.Data3;
drop i n j Slide;
output;
run;

* Visualize the normality of the log_intensity values in "Data3";
proc univariate data=DemokA.Data3 normal plot;
var log_intensity;
histogram;
run;


* Goal: Run ANOVA to examine the ANOVA assumption of normality of the residuals;
* Run mixed model with the whole data set "Data3" to study effect across all Treatments;
* Array is a random effect since there is expected manufacturing variation from slide to slide with cDNA slides
  (such as sticking pins). In addition, choosing "Random" allows us to conclude that changes are independent of slides used;
* Time(Treatment) because Time and Treatment are unbalanced since there are 2 Treatments (Mock and Inoc) for Time points 
  T1, T2, and T3, but there is only one Treatment (Mock) for T0;
proc mixed data=DemokA.Data3;
ods listing close;									* Closes the listing destination so that no listing output is created;
by geneID;
class Array Dye Treatment Time;
model log_intensity= Dye Treatment Time(Treatment)/residual outp=DemokA.Resid;
random Array;
run;

* Visualize the normality of the Studentized residual values in "Resid";
* Shows normality plot of the entire data set in one graph;
ods listing;
proc univariate data=DemokA.Resid normal plot;
var StudentResid;
histogram;
run;


* Goal: Run ANOVA to calculate differences (actually ratios as we are working with log values) between Cy3 and Cy5 samples
  due to treatment and how these differences differ from the mean difference of all genes as well as associated error and p-values;
* Run mixed model with the whole data set "Data3" to study effect across all Treatments;
* Array is a random effect since there is expected manufacturing variation from slide to slide with cDNA slides
  (such as sticking pins). In addition, choosing "Random" allows us to conclude that changes are independent of slides used;
* Time(Treatment) because Time and Treatment are unbalanced since there are 2 Treatments (Mock and Inoc) for Time points 
  T1, T2, and T3, but there is only one Treatment (Mock) for T0;
proc mixed data=DemokA.Data3;
ods listing close;									* Closes the listing destination so that no listing output is created;
by geneID;
class Array Dye Treatment Time;
model log_intensity= Dye Treatment Time(Treatment)/outp=DemokA.pred;
random Array;
lsmeans Dye Treatment Time(Treatment)/diff;			* diff= Pairwise differences from LSMEANS;
ods output tests3=DemokA.test diffs=DemokA.diff;	* Create "diff" file with the Differences of Least Squares Means;
run;

/*****************************************************************************************************/
* Determine intensity ratio level of significance for all genes of ANOVA, using FDR adjustment;
* Determine if any genes show a significant Dye bias independent of Treatment or Time;
* Save "diff" file into "diff_Dye" file in order to keep the original "diff" for future calculations;
data DemokA.diff_Dye;
set DemokA.diff;
where effect="Dye" and Dye="Cy3" and _Dye="Cy5";
raw_p=probt;										* Change variable name "probt" to "raw_p";
* The MULTTEST procedure performs many hypothesis tests on the same data set;
* PROC MULTTEST approaches the multiple testing problem by adjusting the p-values;
* An adjusted p-value is defined as the smallest significance level for which the given hypothesis would be rejected;
* The decision rule is to reject the null hypothesis when the adjusted p-value is less than alpha;
* We are using the following p_value adjustments:
  Holm's step-down Bonferroni method, Hochberg's step-up Bonferroni method, Benjamini and Hochberg's method (FDR);
proc multtest pdata=DemokA.diff_Dye holm hoc fdr out=DemokA.diff_Dye noprint;
run;

* Create "sig_Dye" file with the significant genes due to dye effect across all the experiment (fdr_p<0.01);
data DemokA.sig_Dye;
set DemokA.diff_Dye;
where fdr_p<0.01;
run;

* Back transform the log_intensity Cy3/Cy5 ratios and save the values as "sig_Dye" file;
data DemokA.sig_Dye;
set DemokA.sig_Dye;
Intensity_Ratio=2**(1*estimate);				* or use "Intensity_Ratio=2**(-1*estimate)" if you'd rather see Cy5/Cy3; 
run;									

* Clean the "sig_Dye" file for the researcher final output file;
data DemokA.sig_Dye;
set DemokA.sig_Dye;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replications for each gene;
* Sort "sig_Dye" and "Reps" files by geneID;
* Merge "sig_Dye" and "Reps" files into "sig_Dye_R" file;
proc sort data=DemokA.sig_Dye;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig_Dye_R;
merge DemokA.sig_Dye DemokA.Reps;
by geneID;
data DemokA.sig_Dye_R;
modify DemokA.sig_Dye_R;
if Intensity_Ratio=. then remove;
run;

*Save a version in Excel format; 
proc export data=DemokA.sig_Dye_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Dye.xls" DBMS=EXCEL REPLACE;
run;

/*****************************************************************************************************/
* Determine intensity ratio and level of significance for all genes by ANOVA, using FDR adjustment;
* Looking only at the Treatment effect (across all Time points);
* Later one can restrict this list by choosing p-value cutoff of their choice;
* Save "diff" file into "diff_AT" file in order to keep the original "diff" for future calculations;
* This experiment has two Treatments "Mock" and "Inoc" and four Time points= 0, 1, 2, and 3 days;
data DemokA.diff_AT;
set DemokA.diff;
where effect="Treatment" and Treatment="Inoc" and _Treatment="Mock";
raw_p=probt;										* Change variable name "probt" to "raw_p";
* The MULTTEST procedure performs many hypothesis tests on the same data set;
* PROC MULTTEST approaches the multiple testing problem by adjusting the p-values;
* An adjusted p-value is defined as the smallest significance level for which the given hypothesis would be rejected;
* The decision rule is to reject the null hypothesis when the adjusted p-value is less than alpha;
* We are using the following p-value adjustments:
  Holm's step-down Bonferroni method, Hochberg's step-up Bonferroni method, Benjamini and Hochberg's method (FDR);
proc multtest pdata=DemokA.diff_AT holm hoc fdr out=DemokA.sig_AT noprint;
run;

* Back transform the log_intensity Inoc/Mock ratios and save the values as "sig_AT" file;
data DemokA.sig_AT;
set DemokA.sig_AT;
Intensity_Ratio=2**(1*estimate);			* or use "Intensity_Ratio=2**(-1*estimate)" if you'd rather see Mock/Inoc; 
run;									

* Clean the "sig_AT" file for the researcher final output file;
data DemokA.sig_AT;
set DemokA.sig_AT;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replications for each gene;
* Sort "sig_AT" and "Reps" files by geneID;
* Merge "sig_AT" and "Reps" files into "sig_AT_R" file;
proc sort data=DemokA.sig_AT;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig_AT_R;
merge DemokA.sig_AT DemokA.Reps;
by geneID;
data DemokA.sig_AT_R;
modify DemokA.sig_AT_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig_AT_R outfile="C:\temp\Demo\SAS_Analysis\Sig_AllTime.xls" DBMS=EXCEL REPLACE;
run;

/*****************************************************************************************************/
* Goal: see effect of Treatment during every Time point;
* Determine intensity ratio and level of significance for all genes due to Treatment effect at each Time point, starting 
  with first Time point having an Inoc to Mock comparison (the 1 day post inoculation Time point);
* This experiment has 4 Time points= 0, 1, 2, 3 days.
* As we are interested in the Treatment effect across one Time point, we use the "Time(Treatment)" variable;   
* For each Time point an analysis is required;
data DemokA.diff1;
set DemokA.diff;
where effect="Time(Treatment)" and Time=1 and _Time=1 and Treatment="Inoc" and _Treatment="Mock";
raw_p=probt;
proc multtest pdata=DemokA.diff1 holm hoc fdr out=DemokA.sig1 noprint;
data DemokA.sig1;
set DemokA.sig1;
Intensity_Ratio=2**(1*estimate);			* or use "Intensity_Ratio=2**(-1*estimate)" if you'd rather see Mock/Inoc; 
data DemokA.sig1;
set DemokA.sig1;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replications for each gene;
proc sort data=DemokA.sig1;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig1_R;
merge DemokA.sig1 DemokA.Reps;
by geneID;
data DemokA.sig1_R;
modify DemokA.sig1_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig1_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Time1.xls" DBMS=EXCEL REPLACE;
run;

* Determine intensity ratio and level of significance for all genes due to Treatment effect at 2 day post inoculation; 
data DemokA.diff2;
set DemokA.diff;
where effect="Time(Treatment)" and Time=2 and _Time=2 and Treatment="Inoc" and _Treatment="Mock";
raw_p=probt;
proc multtest pdata=DemokA.diff2 holm hoc fdr out=DemokA.sig2 noprint;
data DemokA.sig2;
set DemokA.sig2;
Intensity_Ratio=2**(1*estimate);		* or use "Intensity_Ratio=2**(-1*estimate)" if you'd rather see Mock/Inoc;
data DemokA.sig2;
set DemokA.sig2;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replications for each gene;
proc sort data=DemokA.sig2;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig2_R;
merge DemokA.sig2 DemokA.Reps;
by geneID;
data DemokA.sig2_R;
modify DemokA.sig2_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig2_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Time2.xls" DBMS=EXCEL REPLACE;
run;

* Determine intensity ratio and level of significance for all genes due to Treatment effect at 3 day post inoculation; 
data DemokA.diff3;
set DemokA.diff;
where effect="Time(Treatment)" and Time=3 and _Time=3 and Treatment="Inoc" and _Treatment="Mock";
raw_p=probt;
proc multtest pdata=DemokA.diff3 holm hoc fdr out=DemokA.diff3 noprint;
data DemokA.sig3;
set DemokA.sig3;
Intensity_Ratio=2**(1*estimate);		* or use "Intensity_Ratio=2**(-1*estimate)" if you'd rather see Mock/Inoc;
data DemokA.sig3;
set DemokA.sig3;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replications for each gene;
proc sort data=DemokA.sig3;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig3_R;
merge DemokA.sig3 DemokA.Reps;
by geneID;
data DemokA.sig3_R;
modify DemokA.sig3_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig3_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Time3.xls" DBMS=EXCEL REPLACE;
run;

/*****************************************************************************************************/
* Sort "Data3" file by geneID;
proc sort data=DemokA.Data3;
by geneID;
run;

* Goal: Run ANOVA to calculate differences (actually ratios as we are working with log values) between Cy3 and Cy5 samples
  due to time and how these differences differ from the mean difference of all genes as well as associated error and p-values;
* Run mixed model with the whole data set "Data3" to study effect across all Time points;
* Array is a random effect since there is expected manufacturing variation from slide to slide with cDNA slides
  (such as sticking pins). In addition, choosing "Random" allows us to conclude that changes are independent of slides used;
* Treatment(Time) because Time and Treatment are unbalanced since there are 2 Treatments (Mock and Inoc) for Time points 
  T1, T2, and T3, but there is only one Treatment (Mock) for T0;
proc mixed data=DemokA.Data3;
ods listing close;									* Closes the listing destination so that no listing output is created;
by geneID;
class Array Dye Treatment Time;
model log_intensity= Dye Treatment(Time) Time/outp=DemokA.predB;
random Array;
lsmeans Treatment(Time) Time/diff;					* diff= Pairwise differences from LSMEANS;
ods output tests3=DemokA.testB diffs=DemokA.diffB;	* Create "diff" file with the Differences of Least Squares Means;
run;

/*****************************************************************************************************/
* Determine intensity ratio and level of significance for all genes by ANOVA, using FDR adjustment;
* Looking at the Time effect, starting with Time=0 vs. Time=1 comparison;
* Later one can restrict this list by choosing p-value cutoff of their choice;
* This experiment has two Treatments "Mock" and "Inoc" and four Time points= 0, 1, 2, and 3 days;
* For each Time point comparison an analysis is required;
* This data is taken from Table "diffB". In table "diffB" the column "Time" goes from 0-3 and column "_Time" goes from 1-3.
  Because the program takes the values from the columns in numerical order, when setting the first ratio (T1 to T0) you 
  have to write it as T0 over T1 and then take the negative to have T1/T0;
data DemokA.diff_T1_T0;
set DemokA.diffB;
where effect="Time" and Time=0 and _Time=1;
raw_p=probt;
proc multtest pdata=DemokA.diff_T1_T0 holm hoc fdr out=DemokA.diff_T1_T0 noprint;
data DemokA.sig_T1_T0;
set DemokA.sig_T1_T0;
Intensity_Ratio=2**(-1*estimate);		* or use "Intensity_Ratio=2**(1*estimate)" if you'd rather see T0/T1;
data DemokA.sig_T1_T0;
set DemokA.sig_T1_T0;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replications for each gene;
proc sort data=DemokA.sig_T1_T0;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig_T1_T0_R;
merge DemokA.sig_T1_T0 DemokA.Reps;
by geneID;
data DemokA.sig_T1_T0_R;
modify DemokA.sig_T1_T0_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig_T1_T0_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Time1vs0.xls" DBMS=EXCEL REPLACE;
run;
* As in T1 to T0 above, because the program takes the values from the columns in numerical order, when setting 
the ratio (T2 to T0) you have to write it as T0 over T2 and then take the negative to have T2/T0;
* Comparison for time=2 vs. time=0 days;
data DemokA.diff_T2_T0;
set DemokA.diffB;
where effect="Time" and Time=0 and _Time=2;
raw_p=probt;
proc multtest pdata=DemokA.diff_T2_T0 holm hoc fdr out=DemokA.diff_T2_T0 noprint;
data DemokA.sig_T2_T0;
set DemokA.sig_T2_T0;
Intensity_Ratio=2**(-1*estimate);		* or use "Intensity_Ratio=2**(1*estimate)" if you'd rather see T0/T2;
data DemokA.sig_T2_T0;
set DemokA.sig_T2_T0;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replication for each gene;
proc sort data=DemokA.sig_T2_T0;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig_T2_T0_R;
merge DemokA.sig_T2_T0 DemokA.Reps;
by geneID;
data DemokA.sig_T2_T0_R;
modify DemokA.sig_T2_T0_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig_T2_T0_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Time2vs0.xls" DBMS=EXCEL REPLACE;
run;

* As in T1 to T0 above, because the program takes the values from the columns in numerical order, when setting 
the ratio (T3 to T0) you have to write it as T0 over T3 and then take the negative to have T3/T0;
* Comparison for time=3 vs. time=0 days;
data DemokA.diff_T3_T0;
set DemokA.diffB;
where effect="Time" and Time=0 and _Time=3;
raw_p=probt;
proc multtest pdata=DemokA.diff_T3_T0 holm hoc fdr out=DemokA.diff_T3_T0 noprint;
data DemokA.sig_T3_T0;
set DemokA.sig_T3_T0;
Intensity_Ratio=2**(-1*estimate);		* or use "Intensity_Ratio=2**(1*estimate)" if you'd rather see T0/T3;
data DemokA.sig_T3_T0;
set DemokA.sig_T3_T0;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replication for each gene;
proc sort data=DemokA.sig_T3_T0;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig_T3_T0_R;
merge DemokA.sig_T3_T0 DemokA.Reps;
by geneID;
data DemokA.sig_T3_T0_R;
modify DemokA.sig_T3_T0_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig_T3_T0_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Time3vs0.xls" DBMS=EXCEL REPLACE;
run;

/*****************************************************************************************************/
* Goal: see effect of Time during the Mock Treatment;
* Determine intensity ratio and level of significance for all genes by ANOVA, using FDR adjustment;
* Looking at the Time effect across Mock Treatment, starting with Time=0 vs. Time=1 comparison;
* Later one can restrict this list by choosing p-value cutoff of their choice;
* This experiment has two Treatments "Mock" and "Inoc" and four Time points= 0, 1, 2, and 3 days;
* For each Time point comparison an analysis is required;
* This data is taken from Table "diffB". In table "diffB" the column "Time" goes from 0-3 and column "_Time" goes from 1-3.
  Because the program takes the values from the columns in numerical order, when setting the first ratio (T1 to T0) you 
  have to write it as T0 over T1 and then take the negative to have T1/T0;
* As we are interested in the Time effect across only one of the Treatments (the Mock Treatment), we use the 
  "Treatment(Time)" variable;   
* Comparison for Time=1 vs. Time=0 with Treatment=Mock;
data DemokA.diff_Mock_T1_T0;
set DemokA.diffB;
where effect="Treatment(Time)" and Treatment="Mock" and _Treatment="Mock" and Time=0 and _Time=1;
raw_p=probt;
proc multtest pdata=DemokA.diff_Mock_T1_T0 holm hoc fdr out=DemokA.diff_Mock_T1_T0 noprint;
data DemokA.sig_Mock_T1_T0;
set DemokA.sig_Mock_T1_T0;
Intensity_Ratio=2**(-1*estimate);		* or use "Intensity_Ratio=2**(1*estimate)" if you'd rather see T0/T1;
data DemokA.sig_Mock_T1_T0;
set DemokA.sig_Mock_T1_T0;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replication for each gene;
proc sort data=DemokA.sig_Mock_T1_T0;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig_Mock_T1_T0_R;
merge DemokA.sig_Mock_T1_T0 DemokA.Reps;
by geneID;
data DemokA.sig_Mock_T1_T0_R;
modify DemokA.sig_Mock_T1_T0_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig_Mock_T1_T0_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Mock_Time1vs0.xls" DBMS=EXCEL REPLACE;
run;

* Comparison for Time=2 vs. Time=0 days with Treatment=Mock;
data DemokA.diff_Mock_T2_T0;
set DemokA.diffB;
where effect="Treatment(Time)" and Treatment="Mock" and _Treatment="Mock" and Time=0 and _Time=2;
raw_p=probt;
proc multtest pdata=DemokA.diff_Mock_T2_T0 holm hoc fdr out=DemokA.diff_Mock_T2_T0 noprint;
data DemokA.sig_Mock_T2_T0;
set DemokA.sig_Mock_T2_T0;
Intensity_Ratio=2**(-1*estimate);		* or use "Intensity_Ratio=2**(1*estimate)" if you'd rather see T0/T2;
data DemokA.sig_Mock_T2_T0;
set DemokA.sig_Mock_T2_T0;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replication for each gene;
proc sort data=DemokA.sig_Mock_T2_T0;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig_Mock_T2_T0_R;
merge DemokA.sig_Mock_T2_T0 DemokA.Reps;
by geneID;
data DemokA.sig_Mock_T2_T0_R;
modify DemokA.sig_Mock_T2_T0_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig_Mock_T2_T0_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Mock_Time2vs0.xls" DBMS=EXCEL REPLACE;
run;

* Comparison for Time=3 vs. Time=0 days with Treatment=Mock;
data DemokA.diff_Mock_T3_T0;
set DemokA.diffB;
where effect="Treatment(Time)" and Treatment="Mock" and _Treatment="Mock" and Time=0 and _Time=3;
raw_p=probt;
proc multtest pdata=DemokA.diff_Mock_T3_T0 holm hoc fdr out=DemokA.diff_Mock_T3_T0 noprint;
data DemokA.sig_Mock_T3_T0;
set DemokA.sig_Mock_T3_T0;
Intensity_Ratio=2**(-1*estimate);		* or use "Intensity_Ratio=2**(1*estimate)" if you'd rather see T0/T3;
data DemokA.sig_Mock_T3_T0;
set DemokA.sig_Mock_T3_T0;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replication for each gene;
proc sort data=DemokA.sig_Mock_T3_T0;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig_Mock_T3_T0_R;
merge DemokA.sig_Mock_T3_T0 DemokA.Reps;
by geneID;
data DemokA.sig_Mock_T3_T0_R;
modify DemokA.sig_Mock_T3_T0_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig_Mock_T3_T0_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Mock_Time3vs0.xls" DBMS=EXCEL REPLACE;
run;

* Comparison for Time=1 vs. Time=2 days with Treatment=Mock;
data DemokA.diff_Mock_T2_T1;
set DemokA.diffB;
where effect="Treatment(Time)" and Treatment="Mock" and _Treatment="Mock" and Time=1 and _Time=2;
raw_p=probt;
proc multtest pdata=DemokA.diff_Mock_T2_T1 holm hoc fdr out=DemokA.diff_Mock_T2_T1 noprint;
data DemokA.sig_Mock_T2_T1;
set DemokA.sig_Mock_T2_T1;
Intensity_Ratio=2**(-1*estimate);		* or use "Intensity_Ratio=2**(1*estimate)" if you'd rather see T1/T2;
data DemokA.sig_Mock_T2_T1;
set DemokA.sig_Mock_T2_T1;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replication for each gene;
proc sort data=DemokA.sig_Mock_T2_T1;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig_Mock_T2_T1_R;
merge DemokA.sig_Mock_T2_T1 DemokA.Reps;
by geneID;
data DemokA.sig_Mock_T2_T1_R;
modify DemokA.sig_Mock_T2_T1_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig_Mock_T2_T1_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Mock_Time2vs1.xls" DBMS=EXCEL REPLACE;
run;

* Comparison for Time=2 vs. Time=3 days with Treatment=Mock;
data DemokA.diff_Mock_T3_T2;
set DemokA.diffB;
where effect="Treatment(Time)" and Treatment="Mock" and _Treatment="Mock" and Time=2 and _Time=3;
raw_p=probt;
proc multtest pdata=DemokA.diff_Mock_T3_T2 holm hoc fdr out=DemokA.diff_Mock_T3_T2 noprint;
data DemokA.sig_Mock_T3_T2;
set DemokA.sig_Mock_T3_T2;
Intensity_Ratio=2**(-1*estimate);		* or use "Intensity_Ratio=2**(1*estimate)" if you'd rather see T2/T3;
data DemokA.sig_Mock_T3_T2;
set DemokA.sig_Mock_T3_T2;
where raw_p~=.;
keep geneid Intensity_Ratio fdr_p;
run;

* Write the number of replication for each gene;
proc sort data=DemokA.sig_Mock_T3_T2;
by geneID;
proc sort data=DemokA.Reps;
by geneID;
data DemokA.sig_Mock_T3_T2_R;
merge DemokA.sig_Mock_T3_T2 DemokA.Reps;
by geneID;
data DemokA.sig_Mock_T3_T2_R;
modify DemokA.sig_Mock_T3_T2_R;
if Intensity_Ratio=. then remove;
run;

* Save a version in Excel format;
proc export data=DemokA.sig_Mock_T3_T2_R outfile="C:\temp\Demo\SAS_Analysis\Sig_Mock_Time3vs2.xls" DBMS=EXCEL REPLACE;
run;