# For this tutorial I assume that you have the following: # # An account on the linux cluster (Helen is here to help today if you need it) # A basic level of command line skills (I'm awful so you clearly don't need to be an expert) # We cannot share real genetic data (but there are lots of places where you can get some - and I know you have some). # Therefore, for the tutorial, we are going to use simulated data. #If you want to play around with simulating data, I put the script in the folder, but it is tedious, so I've cooked up a dataset we can use today. # If you want to simulate similar genomic data, use: PlinkSim.R # If you want to simulate similar phenotypic data use GW-SEMdemoDataSim.R # There are four steps that are required for any GW-SEM GWAS (regardless of the specific model): # 1) Load the phenotypic data # 2) Build the model # 3) Run the GWAS # 4) Look at the results # Note: Not all of the steps have the same level of difficulty. ################################################################################################ ### Step 0: Log on to the cluster ### ################################################################################################ # For the tutorial, I would suggest that you used two terminal windows. One for R, and one for file management # In the file management window, create a folder called: GWSEMday mkdir GWSEMday cd GWSEMday cp ../../braver/GWSEMday/chr1* . cp ../../braver/GWSEMday/oneFacPhenoData.txt # Next, you can use scp (not in R) to transfer your results file to your computer. scp UserName@genesandsem.mpi.nl:/home/braver/GWSEMday/GWSEMdemo2023.R . ##### FIX # In the "R" window move to the GWSEMday folder and start R. cd GWSEMday R # Now you're in R # Load any necessary packages library(gwsem) ################################################################################################ ### Step 1: Load the phenotypic data ### ################################################################################################ # Read the phenotype data into R and look at the data phenoData <- read.table("oneFacPhenoData.txt", header=TRUE) head(phenoData) phenoData$age <- as.numeric(phenoData$age) phenoData$sex <- as.numeric(phenoData$sex) # This would be a great time to recode the data, transform it, and tell R if we have ordinal or binary indicators using mxFactor() # The data for this example are simulated and continuous, and therefore, we will not be doing anything now, but if you would like # to chop the indicators up into binary or ordinal variables, this would be a great time to do it. ################################################################################################ ### Step 2: Build the Model ### ################################################################################################ # The first thing that we would want to do is build a general model. # This model takes a series of arguments: addFac <- buildOneFac(phenoData = phenoData, # what the data object is (which you read in above) itemNames = c("tobacco", "cannabis", "alcohol"), # what the items of the latent factor are covariates=c('age', 'sex','pc1','pc2','pc3','pc4','pc5'), # what covariates that you want to include in the analysis fitfun = "WLS") # and the fit function that you would like to use (WLS is much faster than ML) # You can take this object (addFac) which is technically an OpenMx model, and simply fit it using mxRun() addFacFit <- mxRun(addFac) summary(addFacFit) # This is strongly advised, as it is a great time to learn if your model is being specified in a way that you didn't expect or that # the model is not giving you the answers that you are expecting ################################################################################################ ### Step 3: Run the GWAS ### ################################################################################################ # Now, provided that the model looks reasonable, you can plug the model that you have built into the GWAS function # To fit this function, you must tell GW-SEM : GWAS(model = addFac, # what model object you would like to fit snpData = 'chr1.pgen', # that path to the snpData file. out="latFac1a.log", # the file that you would like to save the full results into SNP=1:2000) # the index of the snps (how many) you would like to fit # If you want to monitor the GWAS progress (in the file management window) then you can use this command (note: change 1a to your file) wc -l latFac1a.log # Then in the file management window copy the file over to the "shared results folder": GenesandsemResults cp latFac1a.log ../../GenesandsemResults/. GWAS(model = addFac, snpData = 'chr1.pgen', out="latFac1b.log", SNP=2001:4000) # Typically, each of these lines of code would be submitted as a different GWAS(model = addFac, snpData = 'chr1.pgen', out="latFac1c.log", SNP=4001:6000) # job (with the necessary data cleaning and model building steps above) GWAS(model = addFac, snpData = 'chr1.pgen', out="latFac1d.log", SNP=6001:8000) GWAS(model = addFac, snpData = 'chr1.pgen', out="latFac1e.log", SNP=8001:9854) # Note about snpData: The path to your snpData will likely include switching to a different directory (as you will likely do your analysis in a different # folder than your SNP data). All you need to do is point to the data using relative paths. Further, it is able to take plink bed/bim/fam or pgen/psam/pvar # data or bgen data (oxford format) # Note about SNP: This can be used to run a limited number of SNP (i.e. not the whole snp file). This is particularly useful if you would like to run chop up a # chr into several parts without cutting you actual genotype data into seperate files. # This will take a while and frequently be done on a cluster. If you chop up this script and remove the comments and insert you data # The next step is to read the data in. While you are unlikely to want to read all of the output into R, it is useful to do this on a small test GWAS analysis # to ensure that you are getting reasonable results. ################################################################################################ ### Step 4: Look at the Results ### ################################################################################################ FullResult <- read.delim("latFac1a.log") # This is didactically useful, but it contains much more information than most people want head(FullResult) # Then, we can read the results into R for a specific parameter. This function takes takes two arguments: the path to the data and the column in the results # file for the parameter that you want to examine. ### Now it's time to pull all of the data together into a single "succinct" object # I like grabbing each file name (not in R) and putting them into a single object (or building the file list with a paste function) c1 <- c("../../GenesandsemResults/latFac1a.log", "../../GenesandsemResults/latFac1b.log", "../../GenesandsemResults/latFac1c.log", "../../GenesandsemResults/latFac1d.log", "../../GenesandsemResults/latFac1e.log") # Then you can load all the files in a single step succinct <- loadResults(path = c1, focus = "snp_to_F", # You must provide the path to the files and the key parameter you want to load (probably the SNP regression) extraColumns = c("MAF", "OneFac.data.nrow", "lambda_tobacco", "lambda_cannabis", "lambda_alcohol")) # it is often useful to grab a few extra columns such as MAF, sample size, and for a latent factor model, the factor loadings. # Now it's time to calculate z and p values for the interesting parameters succinct <- signif(succinct, focus = "snp_to_F") # Let's look at the output head(succinct[order(abs(succinct$Z), decreasing = T),c("SNP", "snp_to_F", "Z", "P")], 20) ## For simplicity, these are the results: # SNP snp_to_F Z P # 1: rs2548 0.19917658 27.71650 4.417794e-169 LATENT # 2: rs6276 0.15292332 22.39894 4.030709e-111 TOB # 3: rs7994 0.12290516 20.86442 1.127601e-96 CAN # 4: rs1254 0.12267590 20.84224 1.792645e-96 TOB # 5: rs3129 0.10873347 19.52113 7.260856e-85 CAN # 6: rs2546 0.14680318 19.16834 6.804020e-82 # 7: rs2549 0.11445230 17.98909 2.372304e-72 # 8: rs2089 0.15503005 17.82258 4.720667e-71 LATENT # 9: rs2547 0.09530169 16.81987 1.745402e-63 # 10: rs1779 0.15995333 16.74285 6.385586e-63 ALC # 11: rs2550 0.14063169 16.72938 8.006838e-63 # 12: rs2545 0.11588786 16.60480 6.432946e-62 # 13: rs2542 0.09527286 16.35538 3.982145e-60 # 14: rs2552 -0.16518422 -16.04233 6.467854e-58 # 15: rs2551 0.10261793 16.01077 1.074792e-57 # 16: rs2544 0.11409312 15.85982 1.202379e-56 # 17: rs2555 0.09616745 15.54753 1.653637e-54 # 18: rs2560 0.10020870 15.14600 8.051688e-52 # 19: rs2556 0.08350638 14.92706 2.197589e-50 # 20: rs2554 0.12440751 14.58157 3.679737e-48 # hits [1] 2548 2089 # hits_tob [1] 6276 1254 # hits_can [1] 7994 3129 # hits_alc [1] 1779 9599 succinct[succinct$SNP == "rs2548"] succinct[succinct$SNP == "rs2089"] succinct[succinct$SNP == "rs6276"] succinct[succinct$SNP == "rs1254"] succinct[succinct$SNP == "rs7994"] succinct[succinct$SNP == "rs3129"] succinct[succinct$SNP == "rs1779"] succinct[succinct$SNP == "rs9599"] # Then, you can plot the results (this works much better on your local machine) so let's walk through how we are going to get the results onto your machine. # At this point its good to have a quick look to see what you can remove from the data before you transfer it. head(succinct) table(succinct$statusCode) # Normally you can remove the statusCode and catch1 columns (if they are empty) table(succinct$catch1) succinct$statusCode <- NULL succinct$catch1 <- NULL write.table(succinct, "latFac.txt", row.names = F, quote = F) # Write the data to disk on the cluster # Next, you can use scp (not in R) to transfer your results file to your computer. scp UserName@genesandsem.mpi.nl:/home/UserName/GWSEMday/latFac.txt . # You will need to type in your password to transfer the data. # Once the results file is on your computer, you can run the following lines of code to get a manhattan plot. latFac <- read.table("latFac.txt", header = T) plot(-log10(latFac$P), ylab = "-log10(p)") points(c(2548, 2089), c(-log10(latFac$P[2548]),-log10(latFac$P[2089])), cex = 2, pch= "L", col = "red") points(c(6276, 1254), c(-log10(latFac$P[6276]),-log10(latFac$P[1254])), cex = 2, pch= "T", col = "lightblue") points(c(7994, 3129), c(-log10(latFac$P[7994]),-log10(latFac$P[3129])), cex = 2, pch= "C", col = "green") points(c(1779, 9599), c(-log10(latFac$P[1779]),-log10(latFac$P[9599])), cex = 2, pch= "A", col = "orange") abline(h = -log10(5e-8), col = "red", lwd = 3) # If this was real data, we would plot a Manhattan Plot, but with simulated data it doesn't really work like that. # here is some useful code in case if you want to use qqman to make a manhattan plot #png("my_Manhattan_plot.png", width=1500, height=750) #par(cex.lab = 2, mai = c(1, 1, .1, .1) + 0.1, bg="white") #manhattan(myResults, p = "P") #dev.off() ################################################################################################ ### The Residuals Model ### ################################################################################################ # The process is very similar for the Latent factor model except that we are regressing the individual items on the SNP # Again, the first thing that we would want to do is build a general model. # The function tells GW-SEM that you want to run the residuals model. This model takes a series of arguments: # You must tell GW-SEM: addFacRes <- buildOneFacRes(phenoData, # what the data object is (which you read in above) c("tobacco", "cannabis", "alcohol"), # what the items of the latent factor are covariates=c('pc1','pc2','pc3','pc4','pc5'), # what covariates that you want to include in the analysis fitfun = "WLS") # and the fit function that you would like to use (WLS is much faster than ML) # Again, You can take this object (addFac) which is technically an OpenMx model, and simply fit it using mxRun() addFacResFit <- mxRun(addFacRes) summary(addFacResFit) # Then you will use the same function that we used for the latent variable model, with a different model object GWAS(model = addFacRes, snpData = 'chr1.pgen', out="facRes1a.log", SNP=1:2000) GWAS(model = addFacRes, snpData = 'chr1.pgen', out="facRes1b.log", SNP=2001:4000) GWAS(model = addFacRes, snpData = 'chr1.pgen', out="facRes1c.log", SNP=4001:6000) GWAS(model = addFacRes, snpData = 'chr1.pgen', out="facRes1d.log", SNP=6001:8000) GWAS(model = addFacRes, snpData = 'chr1.pgen', out="facRes1e.log", SNP=8001:9854) # Then in the terminal window copy the file over to the "shared results folder": GenesandsemResults cp facRes1a.log ../../GenesandsemResults/. # Again, it is instructive to look at the results again, but this might be easier to do in linux rather than R # This is how you load it into R ResResult <- read.delim("facRes1a.log") head(ResResult) # Now that we have run the residual's model, we have multiple parameters that we want to load into R (Three in this case) # The function to do this is the same as for one parameter, but you need to do it several times (once for each paramter). This gives several R objects. Rc1 <- c("../../GenesandsemResults/facRes1a.log", "../../GenesandsemResults/facRes1b.log", "../../GenesandsemResults/facRes1c.log", "../../GenesandsemResults/facRes1d.log", "../../GenesandsemResults/facRes1e.log") Tob <- loadResults(path = Rc1, focus = "snp_to_tobacco", extraColumns = c("MAF", "lambda_tobacco", "lambda_cannabis", "lambda_alcohol")) Tob <- signif(Tob, , focus = "snp_to_tobacco") Can <- loadResults(path = Rc1, focus = "snp_to_cannabis", extraColumns = c("MAF", "lambda_tobacco", "lambda_cannabis", "lambda_alcohol")) Can <- signif(Can, focus = "snp_to_cannabis") Alc <- loadResults(path = Rc1, focus = "snp_to_alcohol", extraColumns = c("MAF", "lambda_tobacco", "lambda_cannabis", "lambda_alcohol")) Alc <- signif(Alc, "snp_to_alcohol") # Now we can build datasets for each model and write them to the cluster head(Tob) table(Tob$statusCode) # Normally you can remove the statusCode and catch1 columns (if they are empty) table(Tob$catch1) Tob$statusCode <- NULL Tob$catch1 <- NULL write.table(Tob, "TobRes.txt", row.names = F, quote = F) # Write the data to disk head(Can) table(Can$statusCode) # Normally you can remove the statusCode and catch1 columns (if they are empty) table(Can$catch1) Can$statusCode <- NULL Can$catch1 <- NULL write.table(Can, "CanRes.txt", row.names = F, quote = F) # Write the data to disk head(Alc) table(Alc$statusCode) # Normally you can remove the statusCode and catch1 columns (if they are empty) table(Alc$catch1) Alc$statusCode <- NULL Alc$catch1 <- NULL write.table(Alc, "AlcRes.txt", row.names = F, quote = F) # Write the data to disk ## Looking into the Tobacco Residual Results head(Tob[order(abs(Tob$Z), decreasing = T),c("SNP", "snp_to_tobacco", "Z", "P")], 20) # hits [1] 2548 2089 # hits_tob [1] 6276 1254 # hits_can [1] 7994 3129 # hits_alc [1] 1779 9599 Tob[Tob$SNP == "rs2548"] Tob[Tob$SNP == "rs2089"] Tob[Tob$SNP == "rs6276"] Tob[Tob$SNP == "rs1254"] Tob[Tob$SNP == "rs7994"] Tob[Tob$SNP == "rs3129"] Tob[Tob$SNP == "rs1779"] Tob[Tob$SNP == "rs9599"] ## Looking into the Cannabis Residual Results head(Can[order(Can$Z, decreasing = T),c("SNP", "snp_to_cannabis", "Z", "P")], 20) # hits [1] 2548 2089 # hits_tob [1] 6276 1254 # hits_can [1] 7994 3129 # hits_alc [1] 1779 9599 Can[Can$SNP == "rs2548"] Can[Can$SNP == "rs2089"] Can[Can$SNP == "rs6276"] Can[Can$SNP == "rs1254"] Can[Can$SNP == "rs7994"] Can[Can$SNP == "rs3129"] Can[Can$SNP == "rs1779"] Can[Can$SNP == "rs9599"] ## Looking into the Alcohol Residual Results head(Alc[order(Alc$Z, decreasing = T),c("SNP", "snp_to_alcohol", "Z", "P")], 20) # hits [1] 2548 2089 # hits_tob [1] 6276 1254 # hits_can [1] 7994 3129 # hits_alc [1] 1779 9599 Alc[Alc$SNP == "rs2548"] Alc[Alc$SNP == "rs2089"] Alc[Alc$SNP == "rs6276"] Alc[Alc$SNP == "rs1254"] Alc[Alc$SNP == "rs7994"] Alc[Alc$SNP == "rs3129"] Alc[Alc$SNP == "rs1779"] Alc[Alc$SNP == "rs9599"] # Let's plot them on your local machine (so we need to transfer the data there first) scp UserName@genesandsem.mpi.nl:/home/UserName/GWSEMday/TobRes.txt . scp UserName@genesandsem.mpi.nl:/home/UserName/GWSEMday/CanRes.txt . scp UserName@genesandsem.mpi.nl:/home/UserName/GWSEMday/AlcRes.txt . # Now we can read the data into R and make manhattan plots. TobRes <- read.table("TobRes.txt", header = T) CanRes <- read.table("CanRes.txt", header = T) AlcRes <- read.table("AlcRes.txt", header = T) ### Make a few manhattan plots par(mfrow = c(1, 3)) plot(-log10(TobRes$P), main = "Tobacco", ylab = "-log10(p)") points(c(2548, 2089), c(-log10(TobRes$P[2548]),-log10(TobRes$P[2089])), cex = 2, pch= "L", col = "red") points(c(6276, 1254), c(-log10(TobRes$P[6276]),-log10(TobRes$P[1254])), cex = 2, pch= "T", col = "lightblue") points(c(7994, 3129), c(-log10(TobRes$P[7994]),-log10(TobRes$P[3129])), cex = 2, pch= "C", col = "green") points(c(1779, 9599), c(-log10(TobRes$P[1779]),-log10(TobRes$P[9599])), cex = 2, pch= "A", col = "orange") abline(h = -log10(5e-8), col = "red", lwd = 3) plot(-log10(CanRes$P), main = "Cannabis", ylab = "-log10(p)") points(c(2548, 2089), c(-log10(CanRes$P[2548]),-log10(CanRes$P[2089])), cex = 2, pch= "L", col = "red") points(c(6276, 1254), c(-log10(CanRes$P[6276]),-log10(CanRes$P[1254])), cex = 2, pch= "T", col = "lightblue") points(c(7994, 3129), c(-log10(CanRes$P[7994]),-log10(CanRes$P[3129])), cex = 2, pch= "C", col = "green") points(c(1779, 9599), c(-log10(CanRes$P[1779]),-log10(CanRes$P[9599])), cex = 2, pch= "A", col = "orange") abline(h = -log10(5e-8), col = "red", lwd = 3) plot(-log10(AlcRes$P), main = "Alcohol", ylab = "-log10(p)") points(c(2548, 2089), c(-log10(AlcRes$P[2548]),-log10(AlcRes$P[2089])), cex = 2, pch= "L", col = "red") points(c(6276, 1254), c(-log10(AlcRes$P[6276]),-log10(AlcRes$P[1254])), cex = 2, pch= "T", col = "lightblue") points(c(7994, 3129), c(-log10(AlcRes$P[7994]),-log10(AlcRes$P[3129])), cex = 2, pch= "C", col = "green") points(c(1779, 9599), c(-log10(AlcRes$P[1779]),-log10(AlcRes$P[9599])), cex = 2, pch= "A", col = "orange") abline(h = -log10(5e-8), col = "red", lwd = 3) # hits [1] 2548 2089 # hits_tob [1] 6276 1254 # hits_can [1] 7994 3129 # hits_alc [1] 1779 9599 ################################################################################################ ### The Residuals Model (Alternative Specification) ### ################################################################################################ # Tobacco FacTobRes <- buildOneFacRes(phenoData, factor = T, itemNames = c("tobacco", "cannabis", "alcohol"), res = "tobacco", covariates=c('age','sex','pc1','pc2','pc3','pc4','pc5'), fitfun = "WLS") FacTobResFit <- mxRun(FacTobRes) summary(FacTobResFit) GWAS(model = FacTobRes, snpData = 'chr1.pgen', out="facTobRes1a.log", SNP=1:2000) GWAS(model = FacTobRes, snpData = 'chr1.pgen', out="facTobRes1b.log", SNP=2001:4000) GWAS(model = FacTobRes, snpData = 'chr1.pgen', out="facTobRes1c.log", SNP=4001:6000) GWAS(model = FacTobRes, snpData = 'chr1.pgen', out="facTobRes1d.log", SNP=6001:8000) GWAS(model = FacTobRes, snpData = 'chr1.pgen', out="facTobRes1e.log", SNP=8001:9854) # Then in the terminal window copy the files over to the "shared results folder": GenesandsemResults cp facTobRes1a.log ../../GenesandsemResults/. cp facTobRes1b.log ../../GenesandsemResults/. cp facTobRes1c.log ../../GenesandsemResults/. cp facTobRes1d.log ../../GenesandsemResults/. cp facTobRes1e.log ../../GenesandsemResults/. # Cannabis FacCanRes <- buildOneFacRes(phenoData, factor = T, itemNames = c("tobacco", "cannabis", "alcohol"), res = "cannabis", covariates=c('age','sex','pc1','pc2','pc3','pc4','pc5'), fitfun = "WLS") FacCanResFit <- mxRun(FacCanRes) summary(FacCanResFit) GWAS(model = FacCanRes, snpData = 'chr1.pgen', out="facCanRes1a.log", SNP=1:2000) GWAS(model = FacCanRes, snpData = 'chr1.pgen', out="facCanRes1b.log", SNP=2001:4000) GWAS(model = FacCanRes, snpData = 'chr1.pgen', out="facCanRes1c.log", SNP=4001:6000) GWAS(model = FacCanRes, snpData = 'chr1.pgen', out="facCanRes1d.log", SNP=6001:8000) GWAS(model = FacCanRes, snpData = 'chr1.pgen', out="facCanRes1e.log", SNP=8001:9854) # Then in the terminal window copy the files over to the "shared results folder": GenesandsemResults cp facCanRes1a.log ../../GenesandsemResults/. cp facCanRes1b.log ../../GenesandsemResults/. cp facCanRes1c.log ../../GenesandsemResults/. cp facCanRes1d.log ../../GenesandsemResults/. cp facCanRes1e.log ../../GenesandsemResults/. # Alcohol FacAlcRes <- buildOneFacRes(phenoData, factor = T, itemNames = c("tobacco", "cannabis", "alcohol"), res = "alcohol", covariates=c('age','sex','pc1','pc2','pc3','pc4','pc5'), fitfun = "WLS") FacAlcResFit <- mxRun(FacAlcRes) summary(FacAlcResFit) GWAS(model = FacAlcRes, snpData = 'chr1.pgen', out="facAlcRes1a.log", SNP=1:2000) GWAS(model = FacAlcRes, snpData = 'chr1.pgen', out="facAlcRes1b.log", SNP=2001:4000) GWAS(model = FacAlcRes, snpData = 'chr1.pgen', out="facAlcRes1c.log", SNP=4001:6000) GWAS(model = FacAlcRes, snpData = 'chr1.pgen', out="facAlcRes1d.log", SNP=6001:8000) GWAS(model = FacAlcRes, snpData = 'chr1.pgen', out="facAlcRes1e.log", SNP=8001:9854) # Then in the terminal window copy the files over to the "shared results folder": GenesandsemResults cp facAlcRes1a.log ../../GenesandsemResults/. cp facAlcRes1b.log ../../GenesandsemResults/. cp facAlcRes1c.log ../../GenesandsemResults/. cp facAlcRes1d.log ../../GenesandsemResults/. cp facAlcRes1e.log ../../GenesandsemResults/. ### FT1 <- c("../../GenesandsemResults/facTobRes1a.log", "../../GenesandsemResults/facTobRes1b.log", "../../GenesandsemResults/facTobRes1c.log", "../../GenesandsemResults/facTobRes1d.log", "../../GenesandsemResults/facTobRes1e.log") FC1 <- c("../../GenesandsemResults/facCanRes1a.log", "../../GenesandsemResults/facCanRes1b.log", "../../GenesandsemResults/facCanRes1c.log", "../../GenesandsemResults/facCanRes1d.log", "../../GenesandsemResults/facCanRes1e.log") FA1 <- c("../../GenesandsemResults/facAlcRes1a.log", "../../GenesandsemResults/facAlcRes1b.log", "../../GenesandsemResults/facAlcRes1c.log", "../../GenesandsemResults/facAlcRes1d.log", "../../GenesandsemResults/facAlcRes1e.log") FacTobRes <- loadResults(path = FT1, focus = "snp_to_tobacco", extraColumns = c("snp_to_F", "Vsnp_to_F:snp_to_F", "MAF", "lambda_tobacco", "lambda_cannabis", "lambda_alcohol")) FacTobRes$statusCode <- NULL FacTobRes$catch1 <- NULL FacTobRes <- signif(FacTobRes, focus = "snp_to_tobacco"); colnames(FacTobRes)[15:16] <- c("Ztob","Ptob") FacTobRes <- signif(FacTobRes, focus = "snp_to_F"); colnames(FacTobRes)[17:18] <- c("Zfac","Pfac") FacCanRes <- loadResults(path = FC1, focus = "snp_to_cannabis", extraColumns = c("snp_to_F", "Vsnp_to_F:snp_to_F", "MAF", "lambda_tobacco", "lambda_cannabis", "lambda_alcohol")) FacCanRes$statusCode <- NULL FacCanRes$catch1 <- NULL FacCanRes <- signif(FacCanRes, focus = "snp_to_cannabis"); colnames(FacCanRes)[15:16] <- c("Zcan","Pcan") FacCanRes <- signif(FacCanRes, focus = "snp_to_F"); colnames(FacCanRes)[17:18] <- c("Zfac","Pfac") FacAlcRes <- loadResults(path = FA1, focus = "snp_to_alcohol", extraColumns = c("snp_to_F", "Vsnp_to_F:snp_to_F", "MAF", "lambda_tobacco", "lambda_cannabis", "lambda_alcohol")) FacAlcRes$statusCode <- NULL FacAlcRes$catch1 <- NULL FacAlcRes <- signif(FacAlcRes, "snp_to_alcohol"); colnames(FacAlcRes)[15:16] <- c("Zalc","Palc") FacAlcRes <- signif(FacAlcRes, "snp_to_F"); colnames(FacAlcRes)[17:18] <- c("Zfac","Pfac") head(FacTobRes) head(FacCanRes) head(FacAlcRes) write.table(FacTobRes, "FacTobRes.txt", row.names = F, quote = F) # Write the data to disk write.table(FacCanRes, "FacCanRes.txt", row.names = F, quote = F) # Write the data to disk write.table(FacAlcRes, "FacAlcRes.txt", row.names = F, quote = F) # Write the data to disk scp UserName@genesandsem.mpi.nl:/home/UserName/GWSEMday/FacTobRes.txt . scp UserName@genesandsem.mpi.nl:/home/UserName/GWSEMday/FacCanRes.txt . scp UserName@genesandsem.mpi.nl:/home/UserName/GWSEMday/FacAlcRes.txt . FacTobRes <- read.table("FacTobRes.txt", header = T) FacCanRes <- read.table("FacCanRes.txt", header = T) FacAlcRes <- read.table("FacAlcRes.txt", header = T) ### Make a few manhattan plots par(mfrow = c(3, 2)) plot(-log10(FacTobRes$Ptob), main = "Tobacco Residual", ylab = "-log10(p)") points(c(2548, 2089), c(-log10(FacTobRes$Ptob[2548]),-log10(FacTobRes$Ptob[2089])), cex = 2, pch= "L", col = "red") points(c(6276, 1254), c(-log10(FacTobRes$Ptob[6276]),-log10(FacTobRes$Ptob[1254])), cex = 2, pch= "T", col = "lightblue") points(c(7994, 3129), c(-log10(FacTobRes$Ptob[7994]),-log10(FacTobRes$Ptob[3129])), cex = 2, pch= "C", col = "green") points(c(1779, 9599), c(-log10(FacTobRes$Ptob[1779]),-log10(FacTobRes$Ptob[9599])), cex = 2, pch= "A", col = "orange") abline(h = -log10(5e-8), col = "red", lwd = 3) plot(-log10(FacTobRes$Pfac), main = "Latent Factor (Tobacco)", ylab = "-log10(p)") points(c(2548, 2089), c(-log10(FacTobRes$Pfac[2548]),-log10(FacTobRes$Pfac[2089])), cex = 2, pch= "L", col = "red") points(c(6276, 1254), c(-log10(FacTobRes$Pfac[6276]),-log10(FacTobRes$Pfac[1254])), cex = 2, pch= "T", col = "lightblue") points(c(7994, 3129), c(-log10(FacTobRes$Pfac[7994]),-log10(FacTobRes$Pfac[3129])), cex = 2, pch= "C", col = "green") points(c(1779, 9599), c(-log10(FacTobRes$Pfac[1779]),-log10(FacTobRes$Pfac[9599])), cex = 2, pch= "A", col = "orange") abline(h = -log10(5e-8), col = "red", lwd = 3) plot(-log10(FacCanRes$Pcan), main = "Cannabis Residual", ylab = "-log10(p)") points(c(2548, 2089), c(-log10(FacCanRes$Pcan[2548]),-log10(FacCanRes$Pcan[2089])), cex = 2, pch= "L", col = "red") points(c(6276, 1254), c(-log10(FacCanRes$Pcan[6276]),-log10(FacCanRes$Pcan[1254])), cex = 2, pch= "T", col = "lightblue") points(c(7994, 3129), c(-log10(FacCanRes$Pcan[7994]),-log10(FacCanRes$Pcan[3129])), cex = 2, pch= "C", col = "green") points(c(1779, 9599), c(-log10(FacCanRes$Pcan[1779]),-log10(FacCanRes$Pcan[9599])), cex = 2, pch= "A", col = "orange") abline(h = -log10(5e-8), col = "red", lwd = 3) plot(-log10(FacCanRes$Pfac), main = "Latent Factor (Cannabis)", ylab = "-log10(p)") points(c(2548, 2089), c(-log10(FacCanRes$Pfac[2548]),-log10(FacCanRes$Pfac[2089])), cex = 2, pch= "L", col = "red") points(c(6276, 1254), c(-log10(FacCanRes$Pfac[6276]),-log10(FacCanRes$Pfac[1254])), cex = 2, pch= "T", col = "lightblue") points(c(7994, 3129), c(-log10(FacCanRes$Pfac[7994]),-log10(FacCanRes$Pfac[3129])), cex = 2, pch= "C", col = "green") points(c(1779, 9599), c(-log10(FacCanRes$Pfac[1779]),-log10(FacCanRes$Pfac[9599])), cex = 2, pch= "A", col = "orange") abline(h = -log10(5e-8), col = "red", lwd = 3) plot(-log10(FacAlcRes$Palc), main = "Alcohol Residual", ylab = "-log10(p)") points(c(2548, 2089), c(-log10(FacAlcRes$Palc[2548]),-log10(FacAlcRes$Palc[2089])), cex = 2, pch= "L", col = "red") points(c(6276, 1254), c(-log10(FacAlcRes$Palc[6276]),-log10(FacAlcRes$Palc[1254])), cex = 2, pch= "T", col = "lightblue") points(c(7994, 3129), c(-log10(FacAlcRes$Palc[7994]),-log10(FacAlcRes$Palc[3129])), cex = 2, pch= "C", col = "green") points(c(1779, 9599), c(-log10(FacAlcRes$Palc[1779]),-log10(FacAlcRes$Palc[9599])), cex = 2, pch= "A", col = "orange") abline(h = -log10(5e-8), col = "red", lwd = 3) plot(-log10(FacAlcRes$Pfac), main = "Latent Factor (Alcohol)", ylab = "-log10(p)") points(c(2548, 2089), c(-log10(FacAlcRes$Pfac[2548]),-log10(FacAlcRes$Pfac[2089])), cex = 2, pch= "L", col = "red") points(c(6276, 1254), c(-log10(FacAlcRes$Pfac[6276]),-log10(FacAlcRes$Pfac[1254])), cex = 2, pch= "T", col = "lightblue") points(c(7994, 3129), c(-log10(FacAlcRes$Pfac[7994]),-log10(FacAlcRes$Pfac[3129])), cex = 2, pch= "C", col = "green") points(c(1779, 9599), c(-log10(FacAlcRes$Pfac[1779]),-log10(FacAlcRes$Pfac[9599])), cex = 2, pch= "A", col = "orange") abline(h = -log10(5e-8), col = "red", lwd = 3)