Load required packages

library(readr)
library(DESeq2)
library(EnhancedVolcano)
library(dplyr)
library(tibble)
library(gridExtra)

Load data

chrRNA_counts_path <- "/path/to/folder/with/iXistChrX_YY1-FKBP_chrRNA/data/"
data_path <- "/path/to/folder/with/chrRNA/data/"
aF1_chrRNA_ES_d0_un <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_d0_un.sortn.count"), skip = 1)
aF1_chrRNA_ES_2dDox_un <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_2dDox_un.sortn.count"), skip = 1)
aF1_chrRNA_ES_6dDox_un <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_6dDox_un.sortn.count"), skip = 1)

aF1_chrRNA_ES_d0_dTag52h <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_d0_dTag52h.sortn.count"), skip = 1)
aF1_chrRNA_ES_2dDox_dTag52h <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_2dDox_dTag52h.sortn.count"), skip = 1)
aF1_chrRNA_ES_6dDox_dTag52h <- read_table2(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_6dDox_dTag52h.sortn.count"), skip = 1)

cC3_chrRNA_ES_d0_un <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_d0_un.sortn.count"), skip = 1)
cC3_chrRNA_ES_2dDox_un <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_2dDox_un.sortn.count"), skip = 1)
cC3_chrRNA_ES_6dDox_un <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_6dDox_un.sortn.count"), skip = 1)

cC3_chrRNA_ES_d0_dTag52h <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_d0_dTag52h.sortn.count"), skip = 1)
cC3_chrRNA_ES_2dDox_dTag52h <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_2dDox_dTag52h.sortn.count"), skip = 1)
cC3_chrRNA_ES_6dDox_dTag52h <- read_table2(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_6dDox_dTag52h.sortn.count"), skip = 1)

DEseq2 differential expression analysis

aF1 (iXist-ChrX-Dom YY1-FKBP12-3xT7)

#make cts
cts_aF1 <- cbind(aF1_chrRNA_ES_d0_un[7], aF1_chrRNA_ES_2dDox_un[7], aF1_chrRNA_ES_6dDox_un[7],
             aF1_chrRNA_ES_d0_dTag52h[7], aF1_chrRNA_ES_2dDox_dTag52h[7], aF1_chrRNA_ES_6dDox_dTag52h[7])
genelist <- as.list(aF1_chrRNA_ES_d0_un[,1])
rownames(cts_aF1) <- genelist$Geneid
sample <- c("aF1_d0_un", "aF1_d2_un","aF1_d6_un",
            "aF1_d0_dTag", "aF1_d2_dTag","aF1_d6_dTag")
colnames(cts_aF1) <- sample
dTAG <- c(rep("un", times=3), rep("dTAG", times=3))
coldata <- cbind(sample, dTAG)
#make DESeq data set
dds_aF1 <- DESeqDataSetFromMatrix(countData = cts_aF1,
                              colData = coldata,
                              design= ~ dTAG)

#Set untreated "un" as the reference factor
dds_aF1$dTAG <- relevel(dds_aF1$dTAG, ref = "un")
dds_aF1 <- DESeq(dds_aF1)
res <- results(dds_aF1)
plotMA(res, ylim=c(-3,3))

#perform shrinkage
resLFC_aF1 <- lfcShrink(dds_aF1, coef="dTAG_dTAG_vs_un", type="apeglm")
plotMA(resLFC_aF1, ylim=c(-3,3))

aF1_plotMA <- plotMA(resLFC_aF1, ylim=c(-3,3), returnData = TRUE)

cC3 (iXist-ChrX-Cast YY1-FKBP12-3xT7)

#make cts 
cts_cC3 <- cbind(cC3_chrRNA_ES_d0_un[7], cC3_chrRNA_ES_2dDox_un[7], cC3_chrRNA_ES_6dDox_un[7],
                 cC3_chrRNA_ES_d0_dTag52h[7], cC3_chrRNA_ES_2dDox_dTag52h[7], cC3_chrRNA_ES_6dDox_dTag52h[7])
genelist <- as.list(cC3_chrRNA_ES_d0_un[,1])
rownames(cts_cC3) <- genelist$Geneid
sample <- c("cC3_d0_un", "cC3_d2_un","cC3_d6_un",
            "cC3_d0_dTag", "cC3_d2_dTag","cC3_d6_dTag")
colnames(cts_cC3) <- sample
#make DESeq data set
dds_cC3 <- DESeqDataSetFromMatrix(countData = cts_cC3,
                                  colData = coldata,
                                  design= ~ dTAG)
#Set untreated "un" as the reference factor
dds_cC3$dTAG <- relevel(dds_cC3$dTAG, ref = "un")
dds_cC3 <- DESeq(dds_cC3)
res <- results(dds_cC3)
plotMA(res, ylim=c(-3,3))

#perform shrinkage
resLFC_cC3 <- lfcShrink(dds_cC3, coef="dTAG_dTAG_vs_un", type="apeglm")
plotMA(resLFC_cC3, ylim=c(-3,3))

cC3_plotMA <- plotMA(resLFC_cC3, ylim=c(-3,3), returnData = TRUE)

Are the same gene differentially regulated in aF1 and cC3?

aF1_down <- aF1_plotMA[aF1_plotMA$isDE == TRUE & aF1_plotMA$lfc < 0,]
cC3_down <- cC3_plotMA[cC3_plotMA$isDE == TRUE & cC3_plotMA$lfc < 0,]

both_down <- sum(rownames(cC3_down) %in% rownames(aF1_down) == TRUE)
paste0(both_down, " of ", nrow(cC3_down), " downregulated genes in cC3 are also downregulated in aF1 (",100*(both_down/nrow(cC3_down)),"%)")
## [1] "916 of 1219 downregulated genes in cC3 are also downregulated in aF1 (75.143560295324%)"
aF1_up <- aF1_plotMA[aF1_plotMA$isDE == TRUE & aF1_plotMA$lfc > 0,]
cC3_up <- cC3_plotMA[cC3_plotMA$isDE == TRUE & cC3_plotMA$lfc > 0,]

both_up <- sum(rownames(cC3_up) %in% rownames(aF1_up) == TRUE)
paste0(both_up, " of ", nrow(cC3_up), " upregulated genes in cC3 are also upregulated in aF1 (",100*(both_up/nrow(cC3_up)),"%)")
## [1] "537 of 863 upregulated genes in cC3 are also upregulated in aF1 (62.2247972190035%)"

Volcano plots

Volcano plots for aF1 and cC3 lines separately

# plot only signficant genes on volcano plot
padj.cutoff <- 0.01
lfc.cutoff <- 0.58 #this is equivalent to a fold change of 1.5

#for cC3
resLFC_cC3_tb <- resLFC_cC3 %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

sig_resLFC_cC3 <- resLFC_cC3_tb %>%
  filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)

signif_up_cC3 <- subset(sig_resLFC_cC3, log2FoldChange > 0)
signif_down_cC3 <- subset(sig_resLFC_cC3, log2FoldChange < 0)

keyvals.colour_cC3 <- ifelse(
  rownames(resLFC_cC3) %in% signif_up_cC3$gene, 'orange',
    ifelse(rownames(resLFC_cC3) %in% signif_down_cC3$gene, "blue",
           'black'))
keyvals.colour_cC3[is.na(keyvals.colour_cC3)] <- 'black'
names(keyvals.colour_cC3)[keyvals.colour_cC3 == 'orange'] <- paste("up n=",nrow(signif_up_cC3))
names(keyvals.colour_cC3)[keyvals.colour_cC3 == 'blue'] <- paste("up n=",nrow(signif_down_cC3))
names(keyvals.colour_cC3)[keyvals.colour_cC3 == 'black'] <- 'non-sig'

volcano_cC3 <- EnhancedVolcano(resLFC_cC3,
                               title = 'cC3 - dTAG vs untreated',
                               lab = NA,
                               xlim = c(-4,3),
                               ylim = c(0,50),
                               x = 'log2FoldChange',
                               y = 'pvalue',
                               pCutoff = 10e-4,
                               FCcutoff = 0.58,
                               colCustom = keyvals.colour_cC3,
                               gridlines.minor = FALSE)
volcano_cC3

#for aF1
resLFC_aF1_tb <- resLFC_aF1 %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

sig_resLFC_aF1 <- resLFC_aF1_tb %>%
  filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)

signif_up_aF1 <- subset(sig_resLFC_aF1, log2FoldChange > 0)
signif_down_aF1 <- subset(sig_resLFC_aF1, log2FoldChange < 0)

keyvals.colour_aF1 <- ifelse(
  rownames(resLFC_aF1) %in% signif_up_aF1$gene, 'orange',
  ifelse(rownames(resLFC_aF1) %in% signif_down_aF1$gene, "blue",
         'black'))
keyvals.colour_aF1[is.na(keyvals.colour_aF1)] <- 'black'
names(keyvals.colour_aF1)[keyvals.colour_aF1 == 'orange'] <- paste("up n=",nrow(signif_up_aF1))
names(keyvals.colour_aF1)[keyvals.colour_aF1 == 'blue'] <- paste("up n=",nrow(signif_down_aF1))
names(keyvals.colour_aF1)[keyvals.colour_aF1 == 'black'] <- 'non-sig'

volcano_aF1 <- EnhancedVolcano(resLFC_aF1,
                               title = 'aF1 - dTAG vs untreated',
                               lab = NA,
                               xlim = c(-4,3),
                               ylim = c(0,50),
                               pCutoff = 10e-4,
                               FCcutoff = 0.58,
                               x = 'log2FoldChange',
                               y = 'pvalue',
                               colCustom = keyvals.colour_aF1,
                               gridlines.minor = FALSE)
volcano_aF1

Analyse all samples together

#make cts
cts <- cbind(aF1_chrRNA_ES_d0_un[7], aF1_chrRNA_ES_2dDox_un[7], aF1_chrRNA_ES_6dDox_un[7],
             cC3_chrRNA_ES_d0_un[7], cC3_chrRNA_ES_2dDox_un[7], cC3_chrRNA_ES_6dDox_un[7],
             aF1_chrRNA_ES_d0_dTag52h[7], aF1_chrRNA_ES_2dDox_dTag52h[7], aF1_chrRNA_ES_6dDox_dTag52h[7],
             cC3_chrRNA_ES_d0_dTag52h[7], cC3_chrRNA_ES_2dDox_dTag52h[7], cC3_chrRNA_ES_6dDox_dTag52h[7])
genelist <- as.list(aF1_chrRNA_ES_d0_un[,1])
rownames(cts) <- genelist$Geneid
sample <- c("aF1_d0_un", "aF1_d2_un","aF1_d6_un",
            "cC3_d0_un", "cC3_d2_un","cC3_d6_un",
            "aF1_d0_dTag", "aF1_d2_dTag","aF1_d6_dTag",
            "cC3_d0_dTag", "cC3_d2_dTag","cC3_d6_dTag")
colnames(cts) <- sample
dTAG <- c(rep("un", times=6), rep("dTAG", times=6))
coldata <- cbind(sample, dTAG)
#make DESeq data set
dds <- DESeqDataSetFromMatrix(countData = cts,
                              colData = coldata,
                              design= ~ dTAG)

#Set untreated "un" as the reference factor
dds$dTAG <- relevel(dds$dTAG, ref = "un")
dds <- DESeq(dds)
res <- results(dds)
plotMA(res, ylim=c(-3,3))

#perform shrinkage
resLFC <- lfcShrink(dds, coef="dTAG_dTAG_vs_un", type="apeglm")
plotMA(resLFC, ylim=c(-3,3))

Make volcano plot all samples together

#volcano plot
padj.cutoff <- 0.01
lfc.cutoff <- 0.58 #this is equivalent to a fold change of 1.5

resLFC_tb <- resLFC %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

sig_resLFC <- resLFC_tb %>%
  filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)

signif_up <- subset(sig_resLFC, log2FoldChange > 0)
signif_down <- subset(sig_resLFC, log2FoldChange < 0)

keyvals.colour <- ifelse(
  rownames(resLFC) %in% signif_up$gene, 'orange',
  ifelse(rownames(resLFC) %in% signif_down$gene, "blue",
         'black'))
keyvals.colour[is.na(keyvals.colour)] <- 'black'
names(keyvals.colour)[keyvals.colour == 'orange'] <- paste("up n=",nrow(signif_up))
names(keyvals.colour)[keyvals.colour == 'blue'] <- paste("down n=",nrow(signif_down))
names(keyvals.colour)[keyvals.colour == 'black'] <- 'non-sig'

volcano_6samples <- EnhancedVolcano(resLFC,
                           title = 'both lines: dTAG vs untreated',
                           lab = NA,
                           xlim = c(-4,3),
                           ylim = c(0,50),
                           x = 'log2FoldChange',
                           y = 'pvalue',
                           colCustom = keyvals.colour,
                           pCutoff = 10e-4,
                           FCcutoff = 0.58,
                           gridlines.minor = FALSE)
volcano_6samples

All samples, but only plot YY1 targets

GeneTSS_YY1prom <- read.delim(paste0(data_path,"GeneTSS_YY1prom.bed"), header=FALSE)

YY1targets <- rownames(resLFC[rownames(resLFC) %in% GeneTSS_YY1prom$V4,])

resLFC_YY1 <- resLFC[rownames(resLFC[rownames(resLFC) %in% GeneTSS_YY1prom$V4,]),]

#volcano plot
padj.cutoff <- 0.01
lfc.cutoff <- 0.58 #this is equivalent to a fold change of 1.5

resLFC_YY1_tb <- resLFC_YY1 %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

sig_resLFC_YY1 <- resLFC_YY1_tb %>%
  filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)

signif_up_YY1 <- subset(sig_resLFC_YY1, log2FoldChange > 0)
signif_down_YY1 <- subset(sig_resLFC_YY1, log2FoldChange < 0)

keyvals.colour <- ifelse(
  rownames(resLFC_YY1) %in% signif_up_YY1$gene, 'orange',
  ifelse(rownames(resLFC_YY1) %in% signif_down_YY1$gene, "blue",
         'black'))
keyvals.colour[is.na(keyvals.colour)] <- 'black'
names(keyvals.colour)[keyvals.colour == 'orange'] <- paste("up n=",nrow(signif_up_YY1))
names(keyvals.colour)[keyvals.colour == 'blue'] <- paste("down n=",nrow(signif_down_YY1))
names(keyvals.colour)[keyvals.colour == 'black'] <- 'non-sig'

volcano_6samples_YY1 <- EnhancedVolcano(resLFC_YY1,
                                    title = 'both lines: dTAG vs untreated',
                                    lab = NA,
                                    xlim = c(-4,3),
                                    ylim = c(0,50),
                                    x = 'log2FoldChange',
                                    y = 'pvalue',
                                    colCustom = keyvals.colour,
                                    pCutoff = 10e-4,
                                    FCcutoff = 0.58,
                                    gridlines.minor = FALSE)
#plot whole-genome and YY1 targets
volcano_6samples_YY1

All samples, but split by ChrX vs Autosomes

xlinked_genes <- resLFC[rownames(resLFC) %in% aF1_chrRNA_ES_d0_un[(aF1_chrRNA_ES_d0_un$Chr %in% "chrX"),]$Geneid,]
xlinked_gene_names <- rownames(xlinked_genes)

resLFC_chrX <- resLFC[rownames(resLFC[rownames(resLFC) %in% xlinked_gene_names,]),]

#volcano plot
padj.cutoff <- 0.01
lfc.cutoff <- 0.58 #this is equivalent to a fold change of 1.5

resLFC_chrX_tb <- resLFC_chrX %>%
  data.frame() %>%
  rownames_to_column(var="gene") %>% 
  as_tibble()

sig_resLFC_chrX <- resLFC_chrX_tb %>%
  filter(padj < padj.cutoff & abs(log2FoldChange) > lfc.cutoff)

signif_up_chrX <- subset(sig_resLFC_chrX, log2FoldChange > 0)
signif_down_chrX <- subset(sig_resLFC_chrX, log2FoldChange < 0)

keyvals.colour <- ifelse(
  rownames(resLFC_chrX) %in% signif_up_chrX$gene, 'orange',
  ifelse(rownames(resLFC_chrX) %in% signif_down_chrX$gene, "blue",
         'black'))
keyvals.colour[is.na(keyvals.colour)] <- 'black'
names(keyvals.colour)[keyvals.colour == 'orange'] <- paste("up n=",nrow(signif_up_chrX))
names(keyvals.colour)[keyvals.colour == 'blue'] <- paste("down n=",nrow(signif_down_chrX))
names(keyvals.colour)[keyvals.colour == 'black'] <- 'non-sig'

volcano_6samples_chrX <- EnhancedVolcano(resLFC_chrX,
                                    title = 'both lines: dTAG vs untreated',
                                    lab = rownames(resLFC_chrX),
                                    xlim = c(-4,3),
                                    ylim = c(0,50),
                                    x = 'log2FoldChange',
                                    y = 'pvalue',
                                    colCustom = keyvals.colour,
                                    pCutoff = 10e-4,
                                    FCcutoff = 0.58,
                                    gridlines.minor = FALSE)
volcano_6samples_chrX