UMR GDEC
BWGS

Repository

Date: July 6, 2018
Title: BWGS - BreedWheat Genomic Selection pipeline
Version: 1.12
Date: 2018-07-06
Author:
  - Gilles CHARMET
  - Louis Gautier TRAN
Description: Package for Breed Wheat Genomic Selection pipeline
# script for BWGS tutorial


#YieldGBLUP <-bwgs.cv (TRAIN47K, YieldBLUE, geno.impute.method="mni", predict.method= "gblup", nFolds=10, nTimes=10 )
#YieldLASSO <-bwgs.cv (TRAIN47K, YieldBLUE, geno.impute.method="mni", predict.method= "LASSO", nFolds=10, nTimes=10 )
#YieldBA <-bwgs.cv (TRAIN47K, YieldBLUE, geno.impute.method="mni", predict.method= "BA", nFolds=10, nTimes=10 )
#YieldRKHS <-bwgs.cv (TRAIN47K, YieldBLUE, geno.impute.method="mni", predict.method= "RKHS", nFolds=10, nTimes=10 )
#YieldEGBLUP <-bwgs.cv (TRAIN47K, YieldBLUE, geno.impute.method="mni", predict.method= "EGBLUP", nFolds=10, nTimes=10 )

#compareM=cbind(YieldGBLUP$cv, YieldLASSO$cv, YieldBA$cv, YieldRKHS$cv, YieldEGBLUP$cv)
#colnames(compareM) = c("GBLUP","LASSO","BayesA","RKHS","EGBLUP")
#boxplot(compareM,xlab="Prediction method",ylab="predictive ability",main="Predictive ability of 5 methods. Yield with 47K markers")


#YieldGBLUP100 <-bwgs.cv (TRAIN47K, YieldBLUE,pop.reduct.method="RANDOM", sample.pop.size=100, geno.impute.method="mni", predict.method="gblup", nFolds=10, nTimes=10 )
#YieldGBLUP300 <-bwgs.cv (TRAIN47K, YieldBLUE,pop.reduct.method="RANDOM", sample.pop.size=300, geno.impute.method="mni", predict.method="gblup", nFolds=10, nTimes=10 )
#YieldGBLUP500 <-bwgs.cv (TRAIN47K, YieldBLUE, pop.reduct.method="RANDOM",sample.pop.size=500, geno.impute.method="mni", predict.method="gblup", nFolds=10, nTimes=10 )

#boxplot(cbind(YieldGBLUP100$cv, YieldGBLUP300$cv, YieldGBLUP500$cv, YieldGBLUP$cv))

#CompareSize=cbind(YieldGBLUP100$cv, YieldGBLUP300$cv, YieldGBLUP500$cv, YieldGBLUP$cv)
#colnames(CompareSize)=c("N=100","N=300","N=500","N=700")
#boxplot(CompareSize,xlab="Training POP size",ylab="Predictive avility",main="Effect of TRAINING POPULATION SIZE")

#testPREDICT_GBLUP=bwgs.predict(geno_train=TRAIN47K,pheno_train=YieldBLUE,geno_target=TARGET47K,MAXNA=0.2,MAF=0.05,geno.reduct.method="NULL",reduct.size="NULL",r2="NULL",pval="NULL",
#MAP="NULL",geno.impute.method="MNI",predict.method="GBLUP")

#testPREDICT_EGBLUP=bwgs.predict(geno_train=TRAIN47K,pheno_train=YieldBLUE,geno_target=TARGET47K,MAXNA=0.2,MAF=0.05,geno.reduct.method="NULL",reduct.size="NULL",r2="NULL",pval="NULL",
#MAP="NULL",geno.impute.method="MNI",predict.method="EGBLUP")

#testPREDICT_LASSO=bwgs.predict(geno_train=TRAIN47K,pheno_train=YieldBLUE,geno_target=TARGET47K,MAXNA=0.2,MAF=0.05,geno.reduct.method="NULL",reduct.size="NULL",r2="NULL",pval="NULL",
#MAP="NULL",geno.impute.method="MNI",predict.method="LASSO")

#testPREDICT_RKHS=bwgs.predict(geno_train=TRAIN47K,pheno_train=YieldBLUE,geno_target=TARGET47K,MAXNA=0.2,MAF=0.05,geno.reduct.method="NULL",reduct.size="NULL",r2="NULL",pval="NULL",
#MAP="NULL",geno.impute.method="MNI",predict.method="RKHS")

#testPREDICT_BayesA=bwgs.predict(geno_train=TRAIN47K,pheno_train=YieldBLUE,geno_target=TARGET47K,MAXNA=0.2,MAF=0.05,geno.reduct.method="NULL",reduct.size="NULL",r2="NULL",pval="NULL",
#MAP="NULL",geno.impute.method="MNI",predict.method="BA")

#ComparePRED=cbind(testPREDICT_GBLUP[,1] ,testPREDICT_BayesA[,1] ,testPREDICT_LASSO[,1], testPREDICT_RKHS[,1], testPREDICT_EGBLUP[,1])
#colnames(ComparePRED=)c("GBLEP","BauesA","LASSO","RKHS","EGBLUP")
#pairs(ComparePRED,lower.panel = panel.smooth,upper.panel = panel.cor,diag.panel=panel.hist)

TRAIN47K_NO_NA=MNI(TRAIN47K)

datasim03 <- qtlSIM (TRAIN47K_NO_NA, NQTL=100,h2=0.3)
datasim05 <- qtlSIM (TRAIN47K_NO_NA,NQTL=100,h2=0.5)
datasim08 <- qtlSIM(TRAIN47K_NO_NA,NQTL=100,h2=0.8)

cbind(rownames(datasim03$newSNP),names(datasim03$pheno))

SIM03 <- bwgs.cv (datasim03$newSNP, datasim03$pheno, geno.impute.method="MNI", predict.method ="gblup", nTimes=20, nFolds=5) #
SIM05 <- bwgs.cv (datasim05$newSNP, datasim05$pheno, geno.impute.method="MNI", predict.method="gblup", nTimes=20, nFolds=5) #
SIM08 <- bwgs.cv (datasim08$newSNP, datasim08$pheno, geno.impute.method="MNI", predict.method="gblup", nTimes20, nFolds=5) #
CompareH2=cbind (SIM03,SIM05,SIM08)

colnames(CompareH2)=c("h²=0.3","h²=0.5","h²=0.8")
boxplot(CompareH2,xlab="Simulated Trait heritability",ylab="Predictive avility",main="Effect of TRAIT heritability")

AM(geno)
library (bwgs)

data (inra)
geno47K_AM <- AM(geno47K)
ANO (pheno , geno, pval)
Library (bwgs)

Data (inra)
geno_impote <- MNI (geno47K)
geno_shrink001 <- ANO(pheno, geno_impute, pval=0.001)
bwgs.cv (geno, pheno, MAXNA=0.2, MAF=0.05, pop.reduct.method="NULL",
sample.pop.size="NULL", geno.reduct.method="NULL",
reduct.marker.size="NULL", pval="NULL", r2="NULL", MAP="NULL",
geno.impute.method="NULL", predict.method="NULL", nFolds, nTimes)
library(bwgs)

data(inra)

 #GPLUB with RMR (reduct.marker.size = 5000)

testGBLUPRMR5000=bwgs.cv(geno47K,pheno,random.pop.size="NULL",geno. reduct.method="RMR",reduct.size=5000, geno.impute.method="MNI",predict.method="GBLUP",nFolds=10,nTimes= 50)

#RKHS with marker selection by ANOVA (pval=0.001)

testRKHSANO<bwgs.cv(geno47K,pheno,geno.reduct.method="ANO",pval=0.001,geno.impute.method="mni",predict.method="rkhs", nFolds=10, nTimes=50)

 #Boxplot to compare prediction methods

 boxplot(cbind(testGBLUPRMR3000$cv, testRKHSANO$cv))

 # sampling training population

 testBWGSRPS300=bwgs.cv(geno_shrink0001,pheno,MAXNA=0.2,MAF=0.05,pop. reduct.method="RANDOM",sample.pop.size=300,geno.reduct.method="NUL L",reduct.marker.size="NULL",pval="NULL",r2="NULL",MAP="NULL ",geno.impute.method="MNI",predict.method="GBLUP",nFolds=10,nTime s=50)

 # optimizing a subset for training

 testBWGSOPT300=bwgs.cv(geno_shrink0001,pheno,MAXNA=0.2,MAF=0.05,pop. reduct.method="OPTI",sample.pop.size=300,geno.reduct.method="NULL ",reduct.marker.size="NULL",pval="NULL",r2="NULL",MAP="NULL",geno.impute.method="MNI",predict.method="GBLUP",nFolds=10,nTimes= 5)
bwgs.predict (geno\_train, pheno\_train, geno\_valid, MAXNA=0.2,MAF=0.05, geno.impute.method="NULL", geno.reduct.method="NULL",reduct.size="NULL", pval="NULL", r2="NULL", MAP="NULL",predict.method="GBLUP")
Library (bwgs)

Data (inra)

testPREDICT_GBLUP=bwgs.predict(geno_train=geno_shrink0001,pheno_train=pheno,geno_target=XTARGET,MAXNA=0.2,MAF=0.05,geno.reduct.method="NULL",reduct.size="NULL",r2="NULL",pval="NULL",MAP="NULL",geno.impute.method="MNI",predict.method="GBLUP")

testPREDICT_EGBLUP=bwgs.predict(geno_train=geno_shrink0001,pheno\train=pheno,geno_target=XTARGET,MAXNA=0.2,MAF=0.05,geno.reduct.method="NULL",reduct.size="NULL",r2="NULL",pval="NULL",MAP="NULL",geno.impute.method="MNI",predict.method="EGBLUP")
testPREDICT_BA=bwgs.predict(geno_train=geno_shrink0001,pheno_train=pheno,geno_target=XTARGET,MAXNA=0.2,MAF=0.05,geno.reduct.method="NULL",reduct.size="NULL",r2="NULL",pval="NULL",MAP="NULL",geno.impute.method="MNI",predict.method="BA")

# correlation between prediction method

cor
cbind((testPREDICT_GBLUP[,1],testPREDICT_EGBLUP[,1],testPREDICT_BA[,1]))
CHROMLD (geno,R2seuil,MAP)
library(bwgs)

data(inra)

# Impute using EMI:

genoLD95 <- CHROMLD(geno47K, R2seuil=0.95, MAP)
EMI (geno)
library(bwgs)

data(inra)

# Impute using EMI:

geno_EMI <- EMI(geno47K)
data(inra)
MNI(geno)
library(bwgs)

data(inra)

geno_MNI <- MNI(geno47K)
optiTRAIN=function(geno, NSample=100,Nopti=1000)
library(bwgs)

data(inra)

# 50% missing:

Train_opti300 <- optiTRAIN (geno47K, 300, 1000)
qtlSIM (geno, NQLT, h2)
result <- list(newSNP=X,pheno=QT, TBV=TBV, Effects=Effects, h2QTL=h2QTL )
library(bwgs)

data(inra)

# QTL simulation:

TestQTL = qtlSIM (geno47K, NQLT=100, h2=0.3)
RMR (geno, N)
library(bwgs)

data(inra)

# Select 5000 random markers:

Geno5K <- RMR(geno47K, 5000)
RPS(geno, N)
library(bwgs)

data(inra)

# Select 200 random lines:

Sample200 <- RPS(geno47K, 200)