Load required packages and functions
#Packages
library(readr)
library(stringr)
library(ggplot2)
library(ggridges)
library(stringr)
library(dplyr)
library(forcats)
library(minpack.lm)
library(mefa4)
library(reshape)
library(gridExtra)
library(ggpubr)
#Functions
LoadingCountFile <- function(file=""){
Count <- read.table(file, header=T)
return(Count)
}
Allelic.Ratio <- function(g1_counts,g2_counts){
Xa <- as.numeric(g1_counts)
Xi <- as.numeric(g2_counts)
AllelicRatio <- ( Xi / (Xi + Xa))
return(AllelicRatio)
}
Allelic.Ratio.Reverse <- function(g1_counts,g2_counts){
Xi <- as.numeric(g1_counts)
Xa <- as.numeric(g2_counts)
AllelicRatio <- ( Xi / (Xi + Xa))
return(AllelicRatio)
}
AR.table.reverse <- function(counts_table){
AllelicRatio_table <- counts_table[1:6]
for (j in seq(8,ncol(counts_table),3)){
AllelicRatio_table <- cbind(AllelicRatio_table,
Allelic.Ratio.Reverse(as.matrix(counts_table[j]),
as.matrix(counts_table[j+1]) ) )
}
return(AllelicRatio_table)
}
AR.table <- function(counts_table){
AllelicRatio_table <- counts_table[1:6]
for (j in seq(8,ncol(counts_table),3)){
AllelicRatio_table <- cbind(AllelicRatio_table,
Allelic.Ratio(as.matrix(counts_table[j]),
as.matrix(counts_table[j+1]) ) )
}
return(AllelicRatio_table)
}
chrX1.filter <- function(counts_table){
chrRNA_counts_table_chrX <- filter(counts_table, !grepl("chrX",counts_table$Chr))
chrRNA_counts_table_chrX <- counts_table[which(grepl("chrX",counts_table$Chr) == TRUE), ]
numextract <- function(string){
str_extract(string, "\\-*\\d+\\.*\\d*")
}
chrRNA_chrX1_filter_End <- cbind(chrRNA_counts_table_chrX[1], as.numeric(numextract(chrRNA_counts_table_chrX$End)))
chrRNA_chrX1_filter_End <- chrRNA_chrX1_filter_End[which(chrRNA_chrX1_filter_End$`as.numeric(numextract(chrRNA_counts_table_chrX$End))` < 103483233), ]
chrRNA_filter_chrX1 <- counts_table[
which(counts_table$Geneid %in% chrRNA_chrX1_filter_End$Geneid ), ]
return(chrRNA_filter_chrX1)
}
chrRNA.RPM <- function(counts_table){
#Calculate sum of counts for each sample and divide through (per million) to make RPM
countsSums <- colSums(counts_table[7:ncol(counts_table)])
GeneNormFactors <- (rep(countsSums[seq(1,length(countsSums),3)], each=3))/1000000
chrRNA_RPM <- cbind(counts_table[1:6], sweep(counts_table[7:ncol(counts_table)], 2, GeneNormFactors, "/"))
return(chrRNA_RPM)
}
ARtable.melt <- function(ARtable){
ARtable_melt <- melt(ARtable[,c(1:ncol(ARtable))], id=c("Geneid","Chr","Start","End","Strand","Length","YY1target"))
sample_info <- cbind(
str_extract(ARtable_melt[,(ncol(ARtable_melt)-1)], "[^_]*$"),
str_extract(ARtable_melt[,(ncol(ARtable_melt)-1)], ".+?(?=_)"),
str_extract(ARtable_melt[,(ncol(ARtable_melt)-1)], c("ES|NPC")))
colnames(sample_info) <- c("day","sample","protocol")
sample_info <- as.data.frame(sample_info)
ARtable_melt <- cbind(ARtable_melt[1:(ncol(ARtable_melt)-2)],
sample_info$day,
sample_info$sample,
sample_info$protocol,
ARtable_melt$value)
colnames(ARtable_melt)[(ncol(ARtable_melt)-3):ncol(ARtable_melt)] <- c("day","sample","protocol","ratio")
return(ARtable_melt)
}
RPMtable.melt <- function(RPMtable){
RPMtable_melt <- melt(RPMtable[,c(1:ncol(RPMtable))], id=c("Geneid","Chr","Start","End","Strand","Length"))
sample_info <- as.data.frame(
cbind(
str_extract(RPMtable_melt[,(ncol(RPMtable_melt)-1)], c("cC3|aF1")),
str_extract(RPMtable_melt[,(ncol(RPMtable_melt)-1)], c("d0|2dDox|6dDox")),
str_extract(RPMtable_melt[,(ncol(RPMtable_melt)-1)], c("un|dTag52h")),
str_extract(RPMtable_melt[,(ncol(RPMtable_melt)-1)], c("genome1|genome2|sorted"))))
colnames(sample_info) <- c("line","day","treatment","allele")
sample_info <- as.data.frame(sample_info)
sample_info[sample_info == "sorted"] <- "total"
RPMtable_melt <- data.frame(RPMtable_melt[1:(ncol(RPMtable_melt)-2)],
sample_info$line,
sample_info$day,
sample_info$treatment,
sample_info$allele,
RPMtable_melt$value)
colnames(RPMtable_melt)[(ncol(RPMtable_melt)-4):ncol(RPMtable_melt)] <- c("line","day","treatment","allele","RPM")
return(RPMtable_melt)
}
AllelicFoldChangetable.melt <- function(AllelicFoldChangetable){
AllelicFoldChangetable_melt <- melt(AllelicFoldChangetable[,c(1:ncol(AllelicFoldChangetable))], id=c("Geneid","Chr","Start","End","Strand","Length","YY1target"))
sample_info <- as.data.frame(
cbind(
str_extract(AllelicFoldChangetable_melt[,(ncol(AllelicFoldChangetable_melt)-1)], c("cC3|aF1")),
str_extract(AllelicFoldChangetable_melt[,(ncol(AllelicFoldChangetable_melt)-1)], c("d0|2d|6d")),
str_extract(AllelicFoldChangetable_melt[,(ncol(AllelicFoldChangetable_melt)-1)], c("g1|g2"))) )
colnames(sample_info) <- c("line","day","allele")
sample_info <- as.data.frame(sample_info)
AllelicFoldChangetable_melt <- data.frame(AllelicFoldChangetable_melt[1:(ncol(AllelicFoldChangetable_melt)-2)],
sample_info$line,
sample_info$day,
sample_info$allele,
AllelicFoldChangetable_melt$value)
colnames(AllelicFoldChangetable_melt)[(ncol(AllelicFoldChangetable_melt)-3):ncol(AllelicFoldChangetable_melt)] <- c("line","treatment","allele","log2foldchange")
return(AllelicFoldChangetable_melt)
}
data_path <- "/path/to/folder/with/chrRNA/data/"
chrRNA_counts_path <- "/path/to/folder/with/iXistChrX_YY1-FKBP_chrRNA/data/"
GeneInfo_A11B2 <- read_delim(paste0(data_path,"GeneInfo_A11B2.txt"),
"\t", escape_double = FALSE, col_names = TRUE, trim_ws = TRUE)
colnames(GeneInfo_A11B2) <- c("Geneid","Halftime","Starts_numeric","Dist_Xist","InitialTPM",
"Distance_Xist","Expression_group","SilencingRate","SmcHD1_dependence")
GeneInfo_C7H8 <- read_delim(paste0(data_path,"GeneInfo_C7H8.txt"),
"\t", escape_double = FALSE, col_names = TRUE, trim_ws = TRUE)
colnames(GeneInfo_C7H8) <- c("Geneid","Halftime","Starts_numeric","Dist_Xist","InitialTPM",
"Distance_Xist","Expression_group","SilencingRate","SmcHD1_dependence")
#Load in files of TSS which have YY1 binding at promoter
GeneTSS_YY1prom <- read.delim(paste0(data_path,"GeneTSS_YY1prom.bed"), header=FALSE)
Load counts files for cC3 and aF1
#Load in cC3
cC3_chrRNA_ES_d0_un <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_d0_un.sortn.count"))
cC3_chrRNA_ES_d0_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_d0_dTag52h.sortn.count"))
cC3_chrRNA_ES_2dDox_un <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_2dDox_un.sortn.count"))
cC3_chrRNA_ES_2dDox_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_2dDox_dTag52h.sortn.count"))
cC3_chrRNA_ES_6dDox_un <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_6dDox_un.sortn.count"))
cC3_chrRNA_ES_6dDox_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_6dDox_dTag52h.sortn.count"))
cC3_counts_table <- cbind(cC3_chrRNA_ES_d0_un[1:9], cC3_chrRNA_ES_d0_dTag52h[7:9],
cC3_chrRNA_ES_2dDox_un[7:9],cC3_chrRNA_ES_2dDox_dTag52h[7:9],
cC3_chrRNA_ES_6dDox_un[7:9],cC3_chrRNA_ES_6dDox_dTag52h[7:9])
aF1_chrRNA_ES_d0_un <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_d0_un.sortn.count"))
aF1_chrRNA_ES_d0_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_d0_dTag52h.sortn.count"))
aF1_chrRNA_ES_2dDox_un <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_2dDox_un.sortn.count"))
aF1_chrRNA_ES_2dDox_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_2dDox_dTag52h.sortn.count"))
aF1_chrRNA_ES_6dDox_un <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_6dDox_un.sortn.count"))
aF1_chrRNA_ES_6dDox_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_6dDox_dTag52h.sortn.count"))
aF1_counts_table <- cbind(aF1_chrRNA_ES_d0_un[1:9], aF1_chrRNA_ES_d0_dTag52h[7:9],
aF1_chrRNA_ES_2dDox_un[7:9],aF1_chrRNA_ES_2dDox_dTag52h[7:9],
aF1_chrRNA_ES_6dDox_un[7:9],aF1_chrRNA_ES_6dDox_dTag52h[7:9])
#cC3
cC3_counts_table_filt <- cC3_counts_table[which(cC3_counts_table$Geneid %in% GeneInfo_C7H8$Geneid),]
cC3_AR_table <- AR.table.reverse(cC3_counts_table_filt)
colnames(cC3_AR_table)[7:ncol(cC3_AR_table)] <- c("cC3ctrl_ES_d0","cC3dTAG_ES_d0",
"cC3ctrl_ES_d2","cC3dTAG_ES_d2",
"cC3ctrl_ES_d6","cC3dTAG_ES_d6")
cC3_AR_table_byYY1 <- mutate(cC3_AR_table,
YY1target = case_when(
cC3_AR_table$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
!(cC3_AR_table$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1") )
#aF1
aF1_counts_table_filt <- aF1_counts_table[which(aF1_counts_table$Geneid %in% GeneInfo_A11B2$Geneid),]
aF1_AR_table <- AR.table(aF1_counts_table_filt)
colnames(aF1_AR_table)[7:ncol(aF1_AR_table)] <- c("aF1ctrl_ES_d0","aF1dTAG_ES_d0",
"aF1ctrl_ES_d2","aF1dTAG_ES_d2",
"aF1ctrl_ES_d6","aF1dTAG_ES_d6")
aF1_AR_table_byYY1 <- mutate(aF1_AR_table,
YY1target = case_when(
aF1_AR_table$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
!(aF1_AR_table$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1") )
#melt
cC3_AR_table_byYY1_melt <- ARtable.melt(cC3_AR_table_byYY1)
aF1_AR_table_byYY1_melt <- ARtable.melt(aF1_AR_table_byYY1)
Allelic ratio boxplots
#plot just +/- dTAG13
cC3_boxplot <- mutate(cC3_AR_table_byYY1_melt, sample = factor(sample, levels=c("cC3ctrl","cC3dTAG"))) %>%
mutate(day = factor(day, levels=c("d0","d2","d6"))) %>%
ggplot(aes(x=day,y=ratio,fill=sample)) + theme_bw() +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed") + stat_compare_means(method = "t.test",label.y = 0.9,paired = TRUE)
aF1_boxplot <- mutate(aF1_AR_table_byYY1_melt, sample = factor(sample, levels=c("aF1ctrl","aF1dTAG"))) %>%
mutate(day = factor(day, levels=c("d0","d2","d6"))) %>%
ggplot(aes(x=day,y=ratio,fill=sample)) + theme_bw() +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed") + stat_compare_means(method = "t.test",label.y = 0.9,paired = TRUE)
aF1_boxplot
cC3_boxplot
#plot boxplot by Yy1target
cC3_boxplot_byYY1 <- subset(cC3_AR_table_byYY1_melt, sample = factor(sample, levels=c("cC3ctrl","cC3dTAG"))) %>%
mutate(YY1target = factor(YY1target, levels=c("YY1","nonYY1"))) %>%
mutate(day = factor(day, levels=c("d0","d2","d6"))) %>%
ggplot(aes(x=day,y=ratio,fill=sample)) +
facet_grid(cols = vars(YY1target)) + theme_bw() + coord_cartesian(ylim=c(0,0.7)) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed")
#plot boxplot by Yy1target
aF1_boxplot_byYY1 <- subset(aF1_AR_table_byYY1_melt, sample = factor(sample, levels=c("aF1ctrl","aF1dTAG"))) %>%
mutate(YY1target = factor(YY1target, levels=c("YY1","nonYY1"))) %>%
mutate(day = factor(day, levels=c("d0","d2","d6"))) %>%
ggplot(aes(x=day,y=ratio,fill=sample)) +
facet_grid(cols = vars(YY1target)) + theme_bw() + coord_cartesian(ylim=c(0,0.7)) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed")
aF1_boxplot_byYY1
cC3_boxplot_byYY1
Reads Per Million - Xist and other genes
#cC3
cC3_RPM_table <- chrRNA.RPM(cC3_counts_table)
cC3_RPM_table <- as.matrix(cC3_RPM_table[,c(seq(7,ncol(cC3_RPM_table),3))])
colnames(cC3_RPM_table) <- colnames(cC3_AR_table)[7:ncol(cC3_AR_table)]
cC3_RPM_table <- as.data.frame(cC3_RPM_table)
cC3_RPM_table <- cbind(cC3_counts_table$Geneid, cC3_RPM_table)
colnames(cC3_RPM_table) <- c("Geneid","cC3_d0_un","cC3_d0_dTAG","cC3_d2_un","cC3_d2_dTAG","cC3_d6_un","cC3_d6_dTAG")
sample <- c("Geneid","cC3_d0_un","cC3_d0_dTAG","cC3_d2_un","cC3_d2_dTAG","cC3_d6_un","cC3_d6_dTAG")
cC3_RPM_table <- as.data.frame(cC3_RPM_table)
cC3_RPM_table_melt <- melt(cC3_RPM_table, id=c("Geneid"))
colnames(cC3_RPM_table_melt)[2:3] <- c("sample","RPM")
cC3_Xist <- ggplot(subset(cC3_RPM_table_melt, Geneid %in% c("Xist")), aes(x=sample, y=RPM)) + ylim(0,8000) +
geom_bar(stat = "identity", width=0.4) + theme_bw() + facet_wrap(~Geneid, scales="free_y")
#aF1
aF1_RPM_table <- chrRNA.RPM(aF1_counts_table)
aF1_RPM_table <- as.matrix(aF1_RPM_table[,c(seq(7,ncol(aF1_RPM_table),3))])
colnames(aF1_RPM_table) <- colnames(aF1_AR_table)[7:ncol(aF1_AR_table)]
aF1_RPM_table <- as.data.frame(aF1_RPM_table)
aF1_RPM_table <- cbind(aF1_counts_table$Geneid, aF1_RPM_table)
colnames(aF1_RPM_table) <- c("Geneid","aF1_d0_un","aF1_d0_dTAG","aF1_d2_un","aF1_d2_dTAG","aF1_d6_un","aF1_d6_dTAG")
sample <- c("Geneid","aF1_d0_un","aF1_d0_dTAG","aF1_d2_un","aF1_d2_dTAG","aF1_d6_un","aF1_d6_dTAG")
aF1_RPM_table <- as.data.frame(aF1_RPM_table)
aF1_RPM_table_melt <- melt(aF1_RPM_table, id=c("Geneid"))
colnames(aF1_RPM_table_melt)[2:3] <- c("sample","RPM")
aF1_Xist <- ggplot(subset(aF1_RPM_table_melt, Geneid %in% c("Xist")), aes(x=sample, y=RPM)) + ylim(0,8000) +
geom_bar(stat = "identity", width=0.4) + theme_bw() + facet_wrap(~Geneid, scales="free_y")
grid.arrange(aF1_Xist, cC3_Xist, nrow = 1)
#Check some other genes involved in XCI in case they can explain the greater silencing in YY1 dTAG
cC3_Genes <- ggplot(subset(cC3_RPM_table_melt, Geneid %in% c("Spen","Pcgf3","Hnrnpk","Xist","Zfp42","Rnf12","Rlim")),
aes(x=sample, y=RPM)) +
geom_bar(stat = "identity", width=0.4) + theme_bw() + facet_wrap(~Geneid, scales="free_y")
cC3_Genes
Allelic RPM
cC3_RPM_table <- as.data.frame(chrRNA.RPM(cC3_counts_table))
aF1_RPM_table <- as.data.frame(chrRNA.RPM(aF1_counts_table))
cC3_RPM_table_all <- cC3_RPM_table[,c(1,7:ncol(cC3_RPM_table))]
RPM_table_all <- merge(cC3_RPM_table,aF1_RPM_table)
RPM_table_melt <- RPMtable.melt(RPM_table_all)
#Ddx3x, Hcfc1, Ogt, Rbmx
cC3_allelic <- subset(RPM_table_melt, Geneid %in% c("Hcfc1","Ddx3x","Ogt","Rbmx","Xist")) %>%
subset(allele %in% c("genome1","genome2")) %>%
mutate(allele = factor(allele, levels=c("genome2","genome1"))) %>%
subset(line %in% c("cC3")) %>%
mutate(day = factor(day, levels=c("d0","2dDox","6dDox"))) %>%
mutate(treatment = factor(treatment, levels=c("un","dTag52h"))) %>%
ggplot(aes(x=day,y=RPM,fill=treatment)) +
geom_bar(stat = "identity", width=0.4, position="dodge") +
facet_grid(rows=vars(Geneid), cols= vars(allele),scales="free_y") + theme_bw()
cC3_allelic
aF1_allelic <- subset(RPM_table_melt, Geneid %in% c("Hcfc1","Ddx3x","Ogt","Rbmx","Xist")) %>%
subset(allele %in% c("genome1","genome2")) %>%
mutate(allele = factor(allele, levels=c("genome1","genome2"))) %>%
subset(line %in% c("aF1")) %>%
mutate(day = factor(day, levels=c("d0","2dDox","6dDox"))) %>%
mutate(treatment = factor(treatment, levels=c("un","dTag52h"))) %>%
ggplot(aes(x=day,y=RPM,fill=treatment)) +
geom_bar(stat = "identity", width=0.4, position="dodge") +
facet_grid(rows=vars(Geneid), cols= vars(allele),scales="free_y") + theme_bw()
aF1_allelic
Allelic Log2 Fold Change plot
library(IDPmisc) #need to remove NAs in logFC tables
#aF1
aF1_RPM_table_filt <- aF1_RPM_table[which(aF1_RPM_table$Geneid %in% GeneInfo_A11B2$Geneid),]
aF1_Allelic_FoldChange <- data.frame(aF1_RPM_table_filt[,c(1:6)],
log2(aF1_RPM_table_filt$aF1_chrRNA_ES_d0_dTag52h.genome1.sorted.bam /
aF1_RPM_table_filt$aF1_chrRNA_ES_d0_un.genome1.sorted.bam),
log2(aF1_RPM_table_filt$aF1_chrRNA_ES_d0_dTag52h.genome2.sorted.bam /
aF1_RPM_table_filt$aF1_chrRNA_ES_d0_un.genome2.sorted.bam),
log2(aF1_RPM_table_filt$aF1_chrRNA_ES_2dDox_dTag52h.genome1.sorted.bam /
aF1_RPM_table_filt$aF1_chrRNA_ES_2dDox_un.genome1.sorted.bam),
log2(aF1_RPM_table_filt$aF1_chrRNA_ES_2dDox_dTag52h.genome2.sorted.bam /
aF1_RPM_table_filt$aF1_chrRNA_ES_2dDox_un.genome2.sorted.bam),
log2(aF1_RPM_table_filt$aF1_chrRNA_ES_6dDox_dTag52h.genome1.sorted.bam /
aF1_RPM_table_filt$aF1_chrRNA_ES_6dDox_un.genome1.sorted.bam),
log2(aF1_RPM_table_filt$aF1_chrRNA_ES_6dDox_dTag52h.genome2.sorted.bam /
aF1_RPM_table_filt$aF1_chrRNA_ES_6dDox_un.genome2.sorted.bam))
colnames(aF1_Allelic_FoldChange)[7:ncol(aF1_Allelic_FoldChange)] <- c("aF1_d0_g1","aF1_d0_g2",
"aF1_2d_g1","aF1_2d_g2",
"aF1_6d_g1","aF1_6d_g2")
aF1_Allelic_FoldChange_filt <- NaRV.omit(aF1_Allelic_FoldChange)
aF1_Allelic_FoldChange_yy1 <- mutate(aF1_Allelic_FoldChange_filt,
YY1target = case_when(
aF1_Allelic_FoldChange_filt$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
!(aF1_Allelic_FoldChange_filt$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1") )
aF1_Allelic_FoldChange_nonfilt_yy1 <- mutate(aF1_Allelic_FoldChange,
YY1target = case_when(
aF1_Allelic_FoldChange$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
!(aF1_Allelic_FoldChange$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1") )
aF1_Allelic_FoldChange_melt <- AllelicFoldChangetable.melt(aF1_Allelic_FoldChange_yy1)
#cC3
cC3_RPM_table_filt <- cC3_RPM_table[which(cC3_RPM_table$Geneid %in% GeneInfo_C7H8$Geneid),]
cC3_Allelic_FoldChange <- data.frame(cC3_RPM_table_filt[,c(1:6)],
log2(cC3_RPM_table_filt$cC3_chrRNA_ES_d0_dTag52h.genome1.sorted.bam /
cC3_RPM_table_filt$cC3_chrRNA_ES_d0_un.genome1.sorted.bam),
log2(cC3_RPM_table_filt$cC3_chrRNA_ES_d0_dTag52h.genome2.sorted.bam /
cC3_RPM_table_filt$cC3_chrRNA_ES_d0_un.genome2.sorted.bam),
log2(cC3_RPM_table_filt$cC3_chrRNA_ES_2dDox_dTag52h.genome1.sorted.bam /
cC3_RPM_table_filt$cC3_chrRNA_ES_2dDox_un.genome1.sorted.bam),
log2(cC3_RPM_table_filt$cC3_chrRNA_ES_2dDox_dTag52h.genome2.sorted.bam /
cC3_RPM_table_filt$cC3_chrRNA_ES_2dDox_un.genome2.sorted.bam),
log2(cC3_RPM_table_filt$cC3_chrRNA_ES_6dDox_dTag52h.genome1.sorted.bam /
cC3_RPM_table_filt$cC3_chrRNA_ES_6dDox_un.genome1.sorted.bam),
log2(cC3_RPM_table_filt$cC3_chrRNA_ES_6dDox_dTag52h.genome2.sorted.bam /
cC3_RPM_table_filt$cC3_chrRNA_ES_6dDox_un.genome2.sorted.bam))
colnames(cC3_Allelic_FoldChange)[7:ncol(cC3_Allelic_FoldChange)] <- c("cC3_d0_g1","cC3_d0_g2",
"cC3_2d_g1","cC3_2d_g2",
"cC3_6d_g1","cC3_6d_g2")
cC3_Allelic_FoldChange_filt <- NaRV.omit(cC3_Allelic_FoldChange)
cC3_Allelic_FoldChange_yy1 <- mutate(cC3_Allelic_FoldChange_filt,
YY1target = case_when(
cC3_Allelic_FoldChange_filt$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
!(cC3_Allelic_FoldChange_filt$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1") )
cC3_Allelic_FoldChange_nonfilt_yy1 <- mutate(cC3_Allelic_FoldChange,
YY1target = case_when(
cC3_Allelic_FoldChange$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
!(cC3_Allelic_FoldChange$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1") )
cC3_Allelic_FoldChange_melt <- AllelicFoldChangetable.melt(cC3_Allelic_FoldChange_yy1)
# plot boxplots of log2foldchange
#aF1
aF1_log2foldchange <- mutate(aF1_Allelic_FoldChange_melt, treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
ggplot(aes(x=treatment,y=log2foldchange,fill=allele) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
geom_hline(yintercept=0, linetype="dashed")
#cC3
cC3_log2foldchange <- mutate(cC3_Allelic_FoldChange_melt, treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
ggplot(aes(x=treatment,y=log2foldchange,fill=allele) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
geom_hline(yintercept=0, linetype="dashed") # + stat_compare_means(method = "t.test",label.y = 3,paired = TRUE) # + facet_grid(cols= vars(YY1target))
grid.arrange(aF1_log2foldchange, cC3_log2foldchange, nrow = 1)
# plot boxplots of log2foldchange by YY1, plot all alleles
aF1_Allelic_FoldChange_melt$alleleYY1 <- interaction(aF1_Allelic_FoldChange_melt$YY1target,aF1_Allelic_FoldChange_melt$allele)
aF1_log2foldchange_byYY1_all <- mutate(aF1_Allelic_FoldChange_melt, treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
geom_hline(yintercept=0, linetype="dashed")
cC3_Allelic_FoldChange_melt$alleleYY1 <- interaction(cC3_Allelic_FoldChange_melt$YY1target,cC3_Allelic_FoldChange_melt$allele)
cC3_log2foldchange_byYY1_all <- mutate(cC3_Allelic_FoldChange_melt, treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
geom_hline(yintercept=0, linetype="dashed")
grid.arrange(aF1_log2foldchange_byYY1_all, cC3_log2foldchange_byYY1_all, nrow = 1)
# Plot only shows allele containing _iXist_, but with calculations of p values Wilcoxon rank-sum test
cC3_Xi_pvals <- subset(cC3_Allelic_FoldChange_melt, allele %in% c("g1")) %>%
mutate(treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) +
theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
geom_hline(yintercept=0, linetype="dashed") +
stat_compare_means(method = "wilcox.test", label="p.format",label.y=1)
cC3_Xi_pvals
aF1_Xi_pvals <- subset(aF1_Allelic_FoldChange_melt, allele %in% c("g2")) %>%
mutate(treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) +
theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
geom_hline(yintercept=0, linetype="dashed") +
stat_compare_means(method = "wilcox.test", label="p.format",label.y=1)
aF1_Xi_pvals
# p values for Xa differences with a Wilcoxon rank-sum test
cC3_Xi_pvals <- subset(cC3_Allelic_FoldChange_melt, allele %in% c("g2")) %>%
mutate(treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) +
theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
geom_hline(yintercept=0, linetype="dashed") +
stat_compare_means(method = "wilcox.test", label="p.format",label.y=1)
cC3_Xi_pvals
aF1_Xi_pvals <- subset(aF1_Allelic_FoldChange_melt, allele %in% c("g1")) %>%
mutate(treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) +
theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
geom_hline(yintercept=0, linetype="dashed") +
stat_compare_means(method = "wilcox.test", label="p.format",label.y=1)
aF1_Xi_pvals