Garrido_Maraver_MERCS

Garrido-Maraver et al. 2019

General information

This is a data analysis pipeline linked to a manuscript in review in Biology Open, entitled “Forcing contacts between mitochondria and the endoplasmic reticulum extends lifespan in a Drosophila model of Alzheimer’s disease”. This pipeline relates to data included in Figure 3.

The input files for this analysis pipeline were deposited in ArrayExpress:

Experiment ArrayExpress accession: E-MTAB-8468

Title: mRNA transcription profiling by array of adult flies expressing a artificial construct designed to bring mitochondria and the endoplasmic reticulum in close proximity

The other relevant files are in this GitHub repository, following this link

Load CEL files into R using the oligo package since the old affy package does not work with ST arrays Make sure you run the commands inside the CEL Files folder

Microarray data analysis using R and Bioconductor

Load microarray data for analysis

#Load CEL files into R using the oligo package since the old affy package does not work with ST arrays
#Make sure you run the commands inside the CEL Files folder
library(oligo)
setwd("/Users/miguel/Dropbox/RStudio/Juan/Data/CELFiles")
list = list.files(full.names=TRUE)
data = oligo::read.celfiles(list)

## Reading in : ./Titan_0180_11614MM_A05.CEL
## Reading in : ./Titan_0180_11614MM_A09.CEL
## Reading in : ./Titan_0180_11614MM_B05.CEL
## Reading in : ./Titan_0180_11614MM_B09.CEL
## Reading in : ./Titan_0180_11614MM_C05.CEL
## Reading in : ./Titan_0180_11614MM_C09.CEL
## Reading in : ./Titan_0180_11614MM_D05.CEL
## Reading in : ./Titan_0180_11614MM_D09.CEL
## Reading in : ./Titan_0180_11614MM_E05.CEL
## Reading in : ./Titan_0180_11614MM_E07.CEL
## Reading in : ./Titan_0180_11614MM_F05.CEL
## Reading in : ./Titan_0180_11614MM_F07.CEL
## Reading in : ./Titan_0180_11614MM_G05.CEL
## Reading in : ./Titan_0180_11614MM_G07.CEL
## Reading in : ./Titan_0180_11614MM_H05.CEL
## Reading in : ./Titan_0180_11614MM_H07.CEL

ph = data@phenoData
feat = data@featureData

Load the sample information

#Merge sample metadata from Edinburgh Genomics with Juan's sample information
#Load required libraries
library(readr)
library(tidyr)
library(dplyr)
setwd("/Users/miguel/Dropbox/RStudio/Juan")
metadata_juan <- read.csv("data/11614MM_SampleData_BioOpen.csv")
metadata_edinburghG <- read.csv("data/11614MM_HybSetup_BioOpen.csv")
metadata <- inner_join (metadata_juan,metadata_edinburghG)

ph@data[ ,1] = c("Control_3days_01","Linker_30days_01",
                 "Control_3days_02","Linker_30days_02",
                 "Control_3days_03","Linker_30days_03",
                 "Control_3days_04","Linker_30days_04",
                 "Linker_3days_01","Control_30days_01",
                 "Linker_3days_02","Control_30days_02",
                 "Linker_3days_03","Control_30days_03",
                 "Linker_3days_04","Control_30days_04")

Pset = fitProbeLevelModel(data)

Quality control of microarray data.

Histograms of raw data

#Create histograms of microarray data, using ggplot
library(ggplot2)
pmexp = pm(data)
sampleNames = vector()
logs =  vector()
for (i in 1:16)
{
  sampleNames = c(sampleNames,rep(ph@data[i,1],dim(pmexp)[1]))
  logs = c(logs,log2(pmexp[,i]))
}
logData = data.frame(logInt=logs,sampleName=sampleNames)
dataHist2 = ggplot(logData, aes(logInt, colour = sampleName))
dataHist2 + geom_density()

Boxplots of raw data

#Create a BoxPlot of the raw data
dataBox = ggplot(logData,aes(sampleName,logInt))
dataBox + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, hjust = 1))

Normalize the data using Robust Multi-array Average (RMA)

data.rma = rma(data)

## Background correcting
## Normalizing
## Calculating Expression

library(pd.drogene.1.1.st) # Load the platform design info for Affymetrix DroGene-1_1-st
data.rma_V02 = rma(data, target="core") #Summaries at the transcript level

## Background correcting
## Normalizing
## Calculating Expression

data.matrix = exprs(data.rma)

Annotate affymetrix probes using the affycoretools package

# Add annotation information using the affycoretools package
library(affycoretools)
library(pd.drogene.1.1.st) # Load the platform design info for Affymetrix DroGene-1_1-st
data.rma_V02 <- annotateEset(data.rma_V02, pd.drogene.1.1.st)

Quality control of the normalized data using box plots

#Create a BoxPlot of normalized intensities
sampleNames = vector()
normlogs = vector()
for (i in 1:16)
{
  sampleNames = c(sampleNames,rep(ph@data[i,1],dim(data.matrix)[1]))
  normlogs = c(normlogs,data.matrix[,i])
}
normData = data.frame(norm_logInt=normlogs,sampleName=sampleNames)
dataBox = ggplot(normData, aes(sampleName,norm_logInt))
dataBox + geom_boxplot() + ylim(2,16) + ggtitle("after normalization") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Summarize the variations in the data set using principal component analysis

#PCA Analysis
#Create a PCA plot using prcomp (default R)
#color=c('green','green','green','green','red','red','red','red','blue','blue','blue','blue','purple','purple','purple','purple','cyan','cyan','cyan','cyan','black','black','black','black')
#data.PC = prcomp(t(data.matrix),scale.=TRUE)
#plot(data.PC$x[1:24],col=color)

library("FactoMineR")
library("factoextra")

data_as_frame <- as.data.frame(t(data.matrix))
data_as_frame$sample <-  c("Control_3days_01","Linker_30days_01",
                           "Control_3days_02","Linker_30days_02",
                           "Control_3days_03","Linker_30days_03",
                           "Control_3days_04","Linker_30days_04",
                           "Linker_3days_01","Control_30days_01",
                           "Linker_3days_02","Control_30days_02",
                           "Linker_3days_03","Control_30days_03",
                           "Linker_3days_04","Control_30days_04")

data_as_frame$sample_class <-  c("Control_3days","Linker_30days",
                                 "Control_3days","Linker_30days",
                                 "Control_3days","Linker_30days",
                                 "Control_3days","Linker_30days",
                                 "Linker_3days","Control_30days",
                                 "Linker_3days","Control_30days",
                                 "Linker_3days","Control_30days",
                                 "Linker_3days","Control_30days")

data_as_frame_ordered <- data_as_frame[, c(16324, 1:16323)]
data_sample_class <- data_as_frame_ordered[,1:16323]
PCA_mitoER <- PCA(data_sample_class[,-1], scale.unit = TRUE, ncp = 5, graph = FALSE)
fviz_pca_ind(PCA_mitoER, geom.ind = "point", col.ind = data_as_frame_ordered$sample_class,
             addEllipses = TRUE,
             legend.title = "Sample Class")

The plot above shows that with age, the variation in the data increases.

I therefore decided to focus on the arrays from young flies to assess the transcriptional changes in flies expressing the linker.

Identification of differentially expressed genes

ph@data[ ,2] = c("Control_3days","Linker_30days",
                 "Control_3days","Linker_30days",
                 "Control_3days","Linker_30days",
                 "Control_3days","Linker_30days",
                 "Linker_3days","Control_30days",
                 "Linker_3days","Control_30days",
                 "Linker_3days","Control_30days",
                 "Linker_3days","Control_30days")

colnames(ph@data)[2]="source"
ph@data

##                                        index         source
## Titan_0180_11614MM_A05.CEL  Control_3days_01  Control_3days
## Titan_0180_11614MM_A09.CEL  Linker_30days_01  Linker_30days
## Titan_0180_11614MM_B05.CEL  Control_3days_02  Control_3days
## Titan_0180_11614MM_B09.CEL  Linker_30days_02  Linker_30days
## Titan_0180_11614MM_C05.CEL  Control_3days_03  Control_3days
## Titan_0180_11614MM_C09.CEL  Linker_30days_03  Linker_30days
## Titan_0180_11614MM_D05.CEL  Control_3days_04  Control_3days
## Titan_0180_11614MM_D09.CEL  Linker_30days_04  Linker_30days
## Titan_0180_11614MM_E05.CEL   Linker_3days_01   Linker_3days
## Titan_0180_11614MM_E07.CEL Control_30days_01 Control_30days
## Titan_0180_11614MM_F05.CEL   Linker_3days_02   Linker_3days
## Titan_0180_11614MM_F07.CEL Control_30days_02 Control_30days
## Titan_0180_11614MM_G05.CEL   Linker_3days_03   Linker_3days
## Titan_0180_11614MM_G07.CEL Control_30days_03 Control_30days
## Titan_0180_11614MM_H05.CEL   Linker_3days_04   Linker_3days
## Titan_0180_11614MM_H07.CEL Control_30days_04 Control_30days

groups = ph@data$source
f = factor(groups,levels=c("Control_3days","Control_30days","Linker_3days","Linker_30days"))
design = model.matrix(~ 0 + f)
colnames(design) = c("Control_3days","Control_30days","Linker_3days","Linker_30days")
design

##    Control_3days Control_30days Linker_3days Linker_30days
## 1              1              0            0             0
## 2              0              0            0             1
## 3              1              0            0             0
## 4              0              0            0             1
## 5              1              0            0             0
## 6              0              0            0             1
## 7              1              0            0             0
## 8              0              0            0             1
## 9              0              0            1             0
## 10             0              1            0             0
## 11             0              0            1             0
## 12             0              1            0             0
## 13             0              0            1             0
## 14             0              1            0             0
## 15             0              0            1             0
## 16             0              1            0             0
## attr(,"assign")
## [1] 1 1 1 1
## attr(,"contrasts")
## attr(,"contrasts")$f
## [1] "contr.treatment"

library(limma)
data.fit = lmFit(data.rma_V02,design)
data.fit$coefficients[1:10,]

##          Control_3days Control_30days Linker_3days Linker_30days
## 18130001      1.845124       2.482081     2.290334      2.207351
## 18130003      3.816978       3.923237     4.005682      4.435453
## 18130005      3.340945       3.642506     4.081426      3.841263
## 18130007      1.599987       2.457995     1.651805      2.421834
## 18130009      2.549814       3.733490     2.533831      3.385800
## 18130011      2.201017       2.477259     2.047083      2.184250
## 18130013      3.629739       3.888842     2.857710      3.464217
## 18130015      2.199753       3.176709     2.162428      2.612304
## 18130017      2.538511       3.115224     2.785309      3.099264
## 18130019      1.795543       1.640211     1.711993      1.791020

contrast.matrix = makeContrasts(Linker_3days-Control_3days,
                                Linker_30days-Control_30days,
                                levels=design)
data.fit.con = contrasts.fit(data.fit,contrast.matrix)
data.fit.eb = eBayes(data.fit.con)
contrasts <- as.data.frame(data.fit.eb)

DEresults = decideTests(data.fit.eb,method='global',adjust.method="BH",p.value=0.05,lfc=1)

DEresults <- as.data.frame(DEresults)
write.csv(DEresults, "DEresults_V02.csv")
DEresults_filtered = subset(DEresults,
                           DEresults[,1] !=0 | DEresults[,2] !=0)

library(dplyr)
contrasts_filtered <- contrasts %>%
  filter(!is.na(genes.SYMBOL))

After filtering, there are 14981 mapped probe IDs. I will now merge the results in the “DE_results_filtered” with the “contrasts_filtered” data frame

DEresults_filtered <- cbind(genes.PROBEID = rownames(DEresults_filtered), DEresults_filtered)
contrasts_DEresults <- inner_join(contrasts_filtered, DEresults_filtered, by = "genes.PROBEID")
contrasts_DEresultsV02 <- contrasts_DEresults[,c(9,1:2,10)]

The coefficients in “contrasts_DEresultsV02” define the log fold changes for the comparisons Now we will create two separate files for significant changes (-2 > FC > 2 and FDR < 5%) for 3 days and 30 days

# A list of genes with altered expression at 3 days
DEresults_3d <- contrasts_DEresultsV02 %>%
  select (genes.SYMBOL,coefficients.Control_3days...Linker_3days,genes.GENENAME) %>%
  filter (coefficients.Control_3days...Linker_3days >= 1 | coefficients.Control_3days...Linker_3days <= -1)

The code above generates a list of 58 genes with altered expression in 3-day-old flies

# A list of genes with altered expression at 30 days
DEresults_30d <- contrasts_DEresultsV02 %>%
  select (genes.SYMBOL,coefficients.Control_30days...Linker_30days,genes.GENENAME) %>%
  filter (coefficients.Control_30days...Linker_30days >= 1 | coefficients.Control_30days...Linker_30days <= -1)

The code above generates a list of 49 genes with altered expression in 30-day-old flies

Downstream analysis using Cytoscape and GeneMANIA

Output both lists above as csv files for downstream analysis using the GeneMANIA Cytoscape plugin

write.csv(DEresults_3d, "GeneMANIAInputData_3d.csv")
write.csv(DEresults_30d, "GeneMANIAInputData_30d.csv")

GeneMANIA identified an enrichment by co-expression, 9.60, described by Zhao and colleagues.

Load the enriched network as a new matrix

GeneMANIA_Output <- read.csv("GeneMANIA_3d_Zhao_Haddad_2010.csv")
# Change the column name for genes in the GeneMANIA dataset so that it matches the name in the DEresults_3d
colnames(GeneMANIA_Output)[colnames(GeneMANIA_Output)=="gene.name"] <- "genes.SYMBOL"
# Join the two tables by genes.SYMBOL
GeneMANIA <- inner_join(DEresults_3d,GeneMANIA_Output, by="genes.SYMBOL")

## Warning: Column `genes.SYMBOL` joining factors with different levels,
## coercing to character vector

This resulted in 28 genes belonging to the Zhao dataset (genes involved in tolerance to high oxygen levels)

write.csv(GeneMANIA, "Genemania_output.csv")

GeneMANIA networks Above is the Cytoscape GUI displaying the hypoxia network (left) and the score for that network, surrounded by a red box on the right.

Display the 28 genes associated with tolerance to hyperoxia in flies:

GeneMANIA[,"genes.SYMBOL"]

##  [1] "CG17322" "CG15263" "CG11034" "CG17107" "CG33679" "CG43402" "CG16713"
##  [8] "CG10659" "Mtk"     "GstE1"   "Mal-A6"  "Cht4"    "CG12780" "Damm"   
## [15] "CG15870" "CG3264"  "CG34040" "PGRP-SD" "CG10516" "CG11458" "Drsl4"  
## [22] "CG7298"  "TotA"    "Diedel"  "Npc2e"   "Fst"     "CG9616"  "CG43125"