7 Influenza DGE DESeq2

7.1 Transcriptome - DESeq2 - Females

7.1.1 Libraries

library(tidyverse)
library(DESeq2)
library(ggpubr)

7.1.2 Data

Unknown = female

SampleInfo <- 
  read.csv("Input Files/metadata.csv", 
           stringsAsFactors = FALSE) %>% 
  mutate(year=as.factor(year)) %>% 
  filter(!sex=="male") %>% 
  mutate(RNA = ifelse(disease_stage == "acute", "viruspos", 
                        ifelse(disease_stage == "peak", "viruspos", "virusneg")))

Raw <- read.csv(file = "Output Files/txome_genecounts_locid_nohemo.csv", 
                stringsAsFactors = FALSE,
                header = TRUE, 
                row.names = 1) %>% 
  select(SampleInfo$sample)

SampleInfo <- 
  SampleInfo %>% 
  column_to_rownames(var="sample")

7.1.3 Viral RNA

7.1.3.1 Create DESeq Dataset (dds)

count_matrix <- as.matrix(Raw)

dds_virus <- DESeqDataSetFromMatrix(countData = round(count_matrix), 
                              colData = SampleInfo, 
                              design = ~ RNA)
dds_virus

7.1.3.2 Filter to remove lowly expressed genes (<10 counts)

keep <- rowSums(counts(dds_virus)) >= 10
dds_virus <- dds_virus[keep,]

7.1.3.3 Set reference level to virus neg group - which group to compare against

dds_virus$RNA <- factor(dds_virus$RNA, levels = c("virusneg","viruspos"))
dds_virus$RNA <- relevel(dds_virus$RNA, ref = "virusneg")

7.1.3.4 Test for differentially expressed genes

cooksCutoff = results function automatically flags genes that contain a Cook’s distance above a cutoff for samples which have 3 or more replicates

dds_virus <- DESeq(dds_virus)

results_virus <- 
  results(dds_virus, contrast=c("RNA", "viruspos", "virusneg"), 
          cooksCutoff=FALSE) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="gene")

7.1.3.5 Save results

write.csv(results_virus, "Output Files/txome_female_deseq_viralRNA_DGE_results.csv", row.names=FALSE)

7.1.3.6 Select DE genes with absolute logFC>1 and p-value<0.05

de_virus <- 
  results_virus %>% 
  filter(pvalue<0.05 & abs(log2FoldChange)>1) 

7.1.3.7 Save results

write.csv(de_virus, "Output Files/txome_female_deseq_viralRNA_DEG.csv", row.names=FALSE)

7.1.4 Influenza Stage

7.1.4.1 Create DESeq Dataset (dds)

count_matrix <- as.matrix(Raw)

dds <- DESeqDataSetFromMatrix(countData = round(count_matrix), 
                              colData = SampleInfo, 
                              design = ~ disease_stage)
dds

7.1.4.2 Filter to remove lowly expressed genes (<10 counts)

keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]

7.1.4.3 Set reference level to control group - which group to compare against

dds$disease_stage <- factor(dds$disease_stage, levels = c("control","acute","peak","late"))
dds$disease_stage <- relevel(dds$disease_stage, ref = "control")

7.1.4.4 Test for differentially expressed genes

cooksCutoff = results function automatically flags genes that contain a Cook’s distance above a cutoff for samples which have 3 or more replicates

dds <- DESeq(dds)

results_acute <- 
  results(dds, contrast=c("disease_stage", "acute", "control"), 
          cooksCutoff=FALSE) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="gene")

results_peak <- 
  results(dds, contrast=c("disease_stage", "peak", "control"), 
          cooksCutoff=FALSE) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="gene")

results_late <- 
  results(dds, contrast=c("disease_stage", "late", "control"), 
          cooksCutoff=FALSE) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="gene")

7.1.4.5 Save results

write.csv(results_acute, "Output Files/txome_female_deseq_DGE_results_acute.csv", row.names=FALSE)
write.csv(results_peak, "Output Files/txome_female_deseq_DGE_results_peak.csv", row.names=FALSE)
write.csv(results_late, "Output Files/txome_female_deseq_DGE_results_late.csv", row.names=FALSE)

7.1.4.6 Select DE genes with absolute logFC>1 and p-value<0.05

de_acute <- 
  results_acute %>% 
  filter(pvalue<0.05 & abs(log2FoldChange)>1) 

de_peak <-
  results_peak %>% 
  filter(pvalue<0.05, abs(log2FoldChange)>1)

de_late <-
  results_late %>% 
  filter(pvalue<0.05, abs(log2FoldChange)>1) 

7.1.4.7 Save results

write.csv(de_acute, "Output Files/txome_female_deseq_DEG_acute.csv", row.names=FALSE)
write.csv(de_peak, "Output Files/txome_female_deseq_DEG_peak.csv", row.names=FALSE)
write.csv(de_late, "Output Files/txome_female_deseq_DEG_late.csv", row.names=FALSE)

7.2 Genome - DESeq2 - Females

7.2.1 Libraries

library(tidyverse)
library(DESeq2)
library(ggpubr)

7.2.2 Data

Unknown = female

SampleInfo <- 
  read.csv("Input Files/metadata.csv", 
           stringsAsFactors = FALSE) %>% 
  mutate(year=as.factor(year)) %>% 
  filter(!sex=="male") %>% 
  mutate(RNA = ifelse(disease_stage == "acute", "viruspos", 
                        ifelse(disease_stage == "peak", "viruspos", "virusneg")))

Raw <- read.csv(file = "Output Files/gnome_genecounts_locid_nohemo.csv", 
                stringsAsFactors = FALSE,
                header = TRUE, 
                row.names = 1) %>% 
  select(SampleInfo$sample)

SampleInfo <- 
  SampleInfo %>% 
  column_to_rownames(var="sample")

7.2.3 Viral RNA

7.2.3.1 Create DESeq Dataset (dds)

count_matrix <- as.matrix(Raw)

dds_virus <- DESeqDataSetFromMatrix(countData = round(count_matrix), 
                              colData = SampleInfo, 
                              design = ~ RNA)
dds_virus

7.2.3.2 Filter to remove lowly expressed genes (<10 counts)

keep <- rowSums(counts(dds_virus)) >= 10
dds_virus <- dds_virus[keep,]

7.2.3.3 Set reference level to virus neg group - which group to compare against

dds_virus$RNA <- factor(dds_virus$RNA, levels = c("virusneg","viruspos"))
dds_virus$RNA <- relevel(dds_virus$RNA, ref = "virusneg")

7.2.3.4 Test for differentially expressed genes

cooksCutoff = results function automatically flags genes that contain a Cook’s distance above a cutoff for samples which have 3 or more replicates

dds_virus <- DESeq(dds_virus)

results_virus <- 
  results(dds_virus, contrast=c("RNA", "viruspos", "virusneg"), 
          cooksCutoff=FALSE) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="gene")

7.2.3.5 Save results

write.csv(results_virus, "Output Files/gnome_female_deseq_viralRNA_DGE_results.csv", row.names=FALSE)

7.2.3.6 Select DE genes with absolute logFC>1 and p-value<0.05

de_virus <- 
  results_virus %>% 
  filter(pvalue<0.05 & abs(log2FoldChange)>1) 

7.2.3.7 Save results

write.csv(de_virus, "Output Files/gnome_female_deseq_viralRNA_DEG.csv", row.names=FALSE)

7.2.4 Influenza Stage

7.2.4.1 Create DESeq Dataset (dds)

count_matrix <- as.matrix(Raw)

dds <- DESeqDataSetFromMatrix(countData = round(count_matrix), 
                              colData = SampleInfo, 
                              design = ~ disease_stage)
dds

7.2.4.2 Filter to remove lowly expressed genes (<10 counts)

keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]

7.2.4.3 Set reference level to control group - which group to compare against

dds$disease_stage <- factor(dds$disease_stage, levels = c("control","acute","peak","late"))
dds$disease_stage <- relevel(dds$disease_stage, ref = "control")

7.2.4.4 Test for differentially expressed genes

cooksCutoff = results function automatically flags genes that contain a Cook’s distance above a cutoff for samples which have 3 or more replicates

dds <- DESeq(dds)

results_acute <- 
  results(dds, contrast=c("disease_stage", "acute", "control"), 
          cooksCutoff=FALSE) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="gene")

results_peak <- 
  results(dds, contrast=c("disease_stage", "peak", "control"), 
          cooksCutoff=FALSE) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="gene")

results_late <- 
  results(dds, contrast=c("disease_stage", "late", "control"), 
          cooksCutoff=FALSE) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="gene")

7.2.4.5 Save results

write.csv(results_acute, "Output Files/gnome_female_deseq_DGE_results_acute.csv", row.names=FALSE)
write.csv(results_peak, "Output Files/gnome_female_deseq_DGE_results_peak.csv", row.names=FALSE)
write.csv(results_late, "Output Files/gnome_female_deseq_DGE_results_late.csv", row.names=FALSE)

7.2.4.6 Select DE genes with absolute logFC>1 and p-value<0.05

de_acute <- 
  results_acute %>% 
  filter(pvalue<0.05 & abs(log2FoldChange)>1) 

de_peak <-
  results_peak %>% 
  filter(pvalue<0.05, abs(log2FoldChange)>1)

de_late <-
  results_late %>% 
  filter(pvalue<0.05, abs(log2FoldChange)>1) 

7.2.4.7 Save results

write.csv(de_acute, "Output Files/gnome_female_deseq_DEG_acute.csv", row.names=FALSE)
write.csv(de_peak, "Output Files/gnome_female_deseq_DEG_peak.csv", row.names=FALSE)
write.csv(de_late, "Output Files/gnome_female_deseq_DEG_late.csv", row.names=FALSE)