Load required packages and functions
library(readr)
library(plyr)
library(dplyr)
library(reshape)
library(ggpubr)
library(gridExtra)
#custom functions
give.n <- function(x){
return(c(y = 0, label = length(x)))}
AR.table <- function(counts_table){
AllelicRatio_table <- counts_table[1:6]
for (j in seq(8,ncol(counts_table),3)){
AllelicRatio_table <- cbind(AllelicRatio_table,
Allelic.Ratio(as.matrix(counts_table[j]),
as.matrix(counts_table[j+1]) ) )
}
return(AllelicRatio_table)
}
Allelic.Ratio <- function(g1_counts,g2_counts){
Xa <- as.numeric(g1_counts)
Xi <- as.numeric(g2_counts)
AllelicRatio <- ( Xi / (Xi + Xa))
return(AllelicRatio)
}
Allelic.Filter.OverFracSamples <- function(counts_table,cutoff,fraction){
counts_table_g1 <- counts_table[,seq(8,ncol(counts_table),3)]
counts_table_g2 <- counts_table[,seq(9,ncol(counts_table),3)]
counts_g1g2 <- counts_table_g1 + counts_table_g2
counts_table_filter <- data.frame()
for (i in c(1:nrow(counts_g1g2))){
pass <- length((which(counts_g1g2[i,] > cutoff) == "TRUE"))
if (pass >= fraction*(ncol(counts_g1g2))){
counts_table_filter <- rbind(counts_table_filter,counts_table[i,])
}
}
return(counts_table_filter)
}
Allelic.Ratio.Reverse <- function(g1_counts,g2_counts){
Xi <- as.numeric(g1_counts)
Xa <- as.numeric(g2_counts)
AllelicRatio <- ( Xi / (Xi + Xa))
return(AllelicRatio)
}
AR.table.reverse <- function(counts_table){
AllelicRatio_table <- counts_table[1:6]
for (j in seq(8,ncol(counts_table),3)){
AllelicRatio_table <- cbind(AllelicRatio_table,
Allelic.Ratio.Reverse(as.matrix(counts_table[j]),
as.matrix(counts_table[j+1]) ) )
}
return(AllelicRatio_table)
}
SmcHD1_ATAC_counts_path <- "/path/to/folder/with/SmcHD1_KO/ATAC/data/"
data_path <- "/path/to/folder/with/chrRNA/data/"
#iXist-ChrX-Dom WT vs SmcHD1 KO ATAC-seq counts
TN_A11B2_ATAC_ftCounts_chrX1 <- read_delim(paste0(SmcHD1_ATAC_counts_path,"A11B2_SmcHD1_ATAC_ftCounts_chrX1"),
delim = "\t", escape_double = FALSE, trim_ws = TRUE, skip = 1)
colnames(TN_A11B2_ATAC_ftCounts_chrX1) <- c("Peakid","Chr","Start","End","Strand","Length",
"A11B2_ES_d0","A11B2_ES_d0.genome1","A11B2_ES_d0.genome2",
"A11B2_NPC_d3","A11B2_NPC_d3.genome1","A11B2_NPC_d3.genome2",
"A11B2_NPC_d5","A11B2_NPC_d5.genome1","A11B2_NPC_d5.genome2",
"A11B2_NPC_d7","A11B2_NPC_d7.genome1","A11B2_NPC_d7.genome2",
"A11B2_NPC_d15","A11B2_NPC_d15.genome1","A11B2_NPC_d15.genome2",
"SmA3_ES_d0","SmA3_ES_d0.genome1","SmA3_ES_d0.genome2",
"SmA3_NPC_d3","SmA3_NPC_d3.genome1","SmA3_NPC_d3.genome2",
"SmA3_NPC_d5","SmA3_NPC_d5.genome1","SmA3_NPC_d5.genome2",
"SmA3_NPC_d7","SmA3_NPC_d7.genome1","SmA3_NPC_d7.genome2",
"SmA3_NPC_d15","SmA3_NPC_d15.genome1","SmA3_NPC_d15.genome2")
#iXist-ChrX-Cast WT vs SmcHD1 KO ATAC-seqcounts
TN_C7H8_ATAC_ftCounts_chrX <- read_delim(paste0(SmcHD1_ATAC_counts_path,"C7H8_SmcHD1_ATAC_ftCounts_chrX"),
delim = "\t", escape_double = FALSE, trim_ws = TRUE, skip = 1)
colnames(TN_C7H8_ATAC_ftCounts_chrX) <- c("Peakid","Chr","Start","End","Strand","Length",
"C7H8_ES_d0","C7H8_ES_d0.genome1","C7H8_ES_d0.genome2",
"C7H8_NPC_d3","C7H8_NPC_d3.genome1","C7H8_NPC_d3.genome2",
"C7H8_NPC_d5","C7H8_NPC_d5.genome1","C7H8_NPC_d5.genome2",
"C7H8_NPC_d7","C7H8_NPC_d7.genome1","C7H8_NPC_d7.genome2",
"C7H8_NPC_d15","C7H8_NPC_d15.genome1","C7H8_NPC_d15.genome2",
"SmD5_ES_d0","SmD5_ES_d0.genome1","SmD5_ES_d0.genome2",
"SmD5_NPC_d3","SmD5_NPC_d3.genome1","SmD5_NPC_d3.genome2",
"SmD5_NPC_d5","SmD5_NPC_d5.genome1","SmD5_NPC_d5.genome2",
"SmD5_NPC_d7","SmD5_NPC_d7.genome1","SmD5_NPC_d7.genome2",
"SmD5_NPC_d15","SmD5_NPC_d15.genome1","SmD5_NPC_d15.genome2")
#Load annotations of YY1-binding ATAC-seq peaks
A11B2_ConsensusPeaks_YY1anno_chrX <- read_table(paste0(SmcHD1_ATAC_counts_path,"A11B2_ConsensusPeaks.YY1anno.chrX1.bed"),
col_names = FALSE)
C7H8_ConsensusPeaks_YY1anno_chrX <- read_table(paste0(SmcHD1_ATAC_counts_path,"C7H8_ConsensusPeaks.YY1anno.chrX.bed"),
col_names = FALSE)
#Load annotatiosn of CTCF-binding ATAC-seq peaks
A11B2_ConsensusPeaks_CTCFanno_chrX <- read_table(paste0(SmcHD1_ATAC_counts_path,"A11B2_ConsensusPeaks.CTCFanno.chrX1.bed"),
col_names = FALSE)
C7H8_ConsensusPeaks_CTCFanno_chrX <- read_table(paste0(SmcHD1_ATAC_counts_path,"C7H8_ConsensusPeaks.CTCFanno.chrX.bed"),
col_names = FALSE)
## Subset peaks by allelic filters for ATAC peaks
TN_A11B2_ATAC_ftCounts_filt <- Allelic.Filter.OverFracSamples(TN_A11B2_ATAC_ftCounts_chrX1,10,0.8)
#Make AR table
TN_A11B2_ATAC_AR_table <- AR.table(TN_A11B2_ATAC_ftCounts_filt)
colnames(TN_A11B2_ATAC_AR_table) <- c("Peakid","Chr","Start","End","Strand","Length",
"A11B2_ES_d0","A11B2_NPC_d3","A11B2_NPC_d5","A11B2_NPC_d7","A11B2_NPC_d15",
"SmA3_ES_d0","SmA3_NPC_d3", "SmA3_NPC_d5","SmA3_NPC_d7","SmA3_NPC_d15")
#apply initial allelic filter by average of day 0 in A11B2 and SmA3
AR_table_filter1 <- subset(TN_A11B2_ATAC_AR_table, 0.2 <= rowMeans(TN_A11B2_ATAC_AR_table[,c("A11B2_ES_d0","SmA3_ES_d0")]))
TN_A11B2_ATAC_AR_table <- subset(AR_table_filter1, 0.8 >= rowMeans(AR_table_filter1[,c("A11B2_ES_d0","SmA3_ES_d0")]))
rm(TN_A11B2_ATAC_ftCounts_filt, AR_table_filter1)
#Allelic Ratio boxplot
boxplot.matrix(as.matrix(TN_A11B2_ATAC_AR_table[7:ncol(TN_A11B2_ATAC_AR_table)]), use.cols=TRUE,
ylab = 'chrRNA Allelic Ratio (Xi/(Xi+Xa))',
ylim=c(0,1))
abline(h=0.5)
#Label peaks by YY1 binding
A11B2_ConsensusPeaks_YY1anno_chrX <- A11B2_ConsensusPeaks_YY1anno_chrX[which(A11B2_ConsensusPeaks_YY1anno_chrX$X1 %in% TN_A11B2_ATAC_AR_table$Peakid),]
TN_A11B2_ATAC_AR_table <- mutate(TN_A11B2_ATAC_AR_table,
YY1target = case_when(
A11B2_ConsensusPeaks_YY1anno_chrX$X5 == 1 ~ "YY1bound",
A11B2_ConsensusPeaks_YY1anno_chrX$X5 == 0 ~ "nonYY1"))
#plot ribbon WT vs SmcHD1 KO
#separate WT from SmA3
TN_A11B2_WT_AR_table <- TN_A11B2_ATAC_AR_table[,c(1:6,17,7:11)]
TN_A11B2_SmA3_AR_table <- TN_A11B2_ATAC_AR_table[,c(1:6,17,12:16)]
timepoints <- c(0,3,5,7,15)
colnames(TN_A11B2_WT_AR_table)[8:12] <- as.numeric(timepoints)
colnames(TN_A11B2_SmA3_AR_table)[8:12] <- as.numeric(timepoints)
TN_A11B2_WT_ARsummary <- as.array(apply(TN_A11B2_WT_AR_table[,8:ncol(TN_A11B2_WT_AR_table)], 2, summary))
TN_A11B2_WT_ARsummary_t <- data.frame(timepoints, t(TN_A11B2_WT_ARsummary))
colnames(TN_A11B2_WT_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")
TN_A11B2_SmA3_ARsummary <- as.array(apply(TN_A11B2_SmA3_AR_table[,8:ncol(TN_A11B2_SmA3_AR_table)], 2, summary))
TN_A11B2_SmA3_ARsummary_t <- data.frame(timepoints, t(TN_A11B2_SmA3_ARsummary))
colnames(TN_A11B2_SmA3_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")
A11B2_WTvSmA3_ribbon <- ggplot() + theme_light() +
coord_cartesian(xlim =c(0,15), ylim = c(0,0.8),expand = FALSE) +
#WT
geom_ribbon(data=TN_A11B2_WT_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.4, fill="darkgrey") +
geom_point(data=TN_A11B2_WT_ARsummary_t, aes(x=time,y=Median, group=1)) +
geom_line(data=TN_A11B2_WT_ARsummary_t, aes(x=time,y=Median, group=1), linetype="dotted") +
#SmA3
geom_ribbon(data=TN_A11B2_SmA3_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.2, fill="purple") +
geom_point(data=TN_A11B2_SmA3_ARsummary_t, aes(x=time,y=Median, group=1), colour="purple") +
geom_line(data=TN_A11B2_SmA3_ARsummary_t, aes(x=time,y=Median, group=1), colour="purple", linetype="dotted")
A11B2_WTvSmA3_ribbon
#plot boxplot WT vs mutant
TN_A11B2_ATAC_AR_table_melt <- melt(TN_A11B2_ATAC_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length","YY1target"),
stringsAsFactors = FALSE)
colnames(TN_A11B2_ATAC_AR_table_melt)[(ncol(TN_A11B2_ATAC_AR_table_melt)-1):ncol(TN_A11B2_ATAC_AR_table_melt)] <- c("sample","ratio")
TN_A11B2_ATAC_AR_table_melt$sample <- as.character(TN_A11B2_ATAC_AR_table_melt$sample)
TN_A11B2_ATAC_AR_YY1target_boxplot <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("A11B2_ES_d0","A11B2_NPC_d3","A11B2_NPC_d5","A11B2_NPC_d7","A11B2_NPC_d15")) %>%
mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
mutate(sample = factor(sample, levels=c("A11B2_ES_d0","A11B2_NPC_d3","A11B2_NPC_d5","A11B2_NPC_d7","A11B2_NPC_d15"))) %>%
ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed") +
stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)
TN_SmA3_ATAC_AR_YY1target_boxplot <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("SmA3_ES_d0","SmA3_NPC_d3", "SmA3_NPC_d5","SmA3_NPC_d7","SmA3_NPC_d15")) %>%
mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
mutate(sample = factor(sample, levels=c("SmA3_ES_d0","SmA3_NPC_d3", "SmA3_NPC_d5","SmA3_NPC_d7","SmA3_NPC_d15"))) %>%
ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed") +
stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)
grid.arrange(TN_A11B2_ATAC_AR_YY1target_boxplot, TN_SmA3_ATAC_AR_YY1target_boxplot, nrow = 1)
grid.arrange(TN_A11B2_ATAC_AR_YY1target_boxplot, TN_SmA3_ATAC_AR_YY1target_boxplot, nrow = 2)
#only day 3 and day 7
TN_A11B2_ATAC_AR_YY1target_boxplot_day3day7 <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("A11B2_NPC_d3","A11B2_NPC_d7")) %>%
mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
mutate(sample = factor(sample, levels=c("A11B2_NPC_d3","A11B2_NPC_d7"))) %>%
ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed")
TN_SmA3_ATAC_AR_YY1target_boxplot_day3day7 <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("SmA3_NPC_d3","SmA3_NPC_d7")) %>%
mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
mutate(sample = factor(sample, levels=c("SmA3_NPC_d3","SmA3_NPC_d7"))) %>%
ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed")
grid.arrange(TN_A11B2_ATAC_AR_YY1target_boxplot_day3day7, TN_SmA3_ATAC_AR_YY1target_boxplot_day3day7, nrow = 1)
Save output file of ATAC-seq allelic ratios for iXist-ChrX-Dom vs SmA3
#write.table(TN_A11B2_ATAC_AR_table,
# row.names = FALSE, col.names = TRUE, quote = FALSE,
# paste0(data_path,"iXistChrX_Dom_WTvsSmA3_AR_table.txt"), sep="\t")
## Subset peaks by allelic filters for ATAC peaks
TN_C7H8_ATAC_ftCounts_filt <- Allelic.Filter.OverFracSamples(TN_C7H8_ATAC_ftCounts_chrX,10,0.8)
#Make AR table and AR filter
TN_C7H8_ATAC_AR_table <- AR.table.reverse(TN_C7H8_ATAC_ftCounts_filt)
colnames(TN_C7H8_ATAC_AR_table) <- c("Peakid","Chr","Start","End","Strand","Length",
"C7H8_ES_d0","C7H8_NPC_d3","C7H8_NPC_d5","C7H8_NPC_d7","C7H8_NPC_d15",
"SmD5_ES_d0","SmD5_NPC_d3", "SmD5_NPC_d5","SmD5_NPC_d7","SmD5_NPC_d15")
#apply initial allelic filter by average of day 0 in C7H8 and SmD5
AR_table_filter1 <- subset(TN_C7H8_ATAC_AR_table, 0.2 <= rowMeans(TN_C7H8_ATAC_AR_table[,c("C7H8_ES_d0","SmD5_ES_d0")]))
TN_C7H8_ATAC_AR_table <- subset(AR_table_filter1, 0.8 >= rowMeans(AR_table_filter1[,c("C7H8_ES_d0","SmD5_ES_d0")]))
rm(TN_C7H8_ATAC_ftCounts_filt, AR_table_filter1)
#remove row with NA
TN_C7H8_ATAC_AR_table <- na.omit(TN_C7H8_ATAC_AR_table)
#Allelic Ratio boxplot
boxplot.matrix(as.matrix(TN_C7H8_ATAC_AR_table[7:ncol(TN_C7H8_ATAC_AR_table)]), use.cols=TRUE,
ylab = 'chrRNA Allelic Ratio (Xi/(Xi+Xa))',
ylim=c(0,1))
abline(h=0.5)
#Label peaks by YY1 binding
C7H8_ConsensusPeaks_YY1anno_chrX <- C7H8_ConsensusPeaks_YY1anno_chrX[which(C7H8_ConsensusPeaks_YY1anno_chrX$X1 %in% TN_C7H8_ATAC_AR_table$Peakid),]
TN_C7H8_ATAC_AR_table <- mutate(TN_C7H8_ATAC_AR_table,
YY1target = case_when(
C7H8_ConsensusPeaks_YY1anno_chrX$X5 == 1 ~ "YY1bound",
C7H8_ConsensusPeaks_YY1anno_chrX$X5 == 0 ~ "nonYY1"))
#plot ribbon WT vs SmcHD1 KO
#separate WT from SmD5
TN_C7H8_WT_AR_table <- TN_C7H8_ATAC_AR_table[,c(1:6,17,7:11)]
TN_C7H8_SmD5_AR_table <- TN_C7H8_ATAC_AR_table[,c(1:6,17,12:16)]
timepoints <- c(0,3,5,7,15)
colnames(TN_C7H8_WT_AR_table)[8:12] <- as.numeric(timepoints)
colnames(TN_C7H8_SmD5_AR_table)[8:12] <- as.numeric(timepoints)
TN_C7H8_WT_ARsummary <- as.array(apply(TN_C7H8_WT_AR_table[,8:ncol(TN_C7H8_WT_AR_table)], 2, summary))
TN_C7H8_WT_ARsummary_t <- data.frame(timepoints, t(TN_C7H8_WT_ARsummary))
colnames(TN_C7H8_WT_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")
TN_C7H8_SmD5_ARsummary <- as.array(apply(TN_C7H8_SmD5_AR_table[,8:ncol(TN_C7H8_SmD5_AR_table)], 2, summary))
TN_C7H8_SmD5_ARsummary_t <- data.frame(timepoints, t(TN_C7H8_SmD5_ARsummary))
colnames(TN_C7H8_SmD5_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")
C7H8_WTvSmD5_ribbon <- ggplot() + theme_light() +
coord_cartesian(xlim =c(0,15), ylim = c(0,0.8),expand = FALSE) +
#WT
geom_ribbon(data=TN_C7H8_WT_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.4, fill="darkgrey") +
geom_point(data=TN_C7H8_WT_ARsummary_t, aes(x=time,y=Median, group=1)) +
geom_line(data=TN_C7H8_WT_ARsummary_t, aes(x=time,y=Median, group=1), linetype="dotted") +
#SmD5
geom_ribbon(data=TN_C7H8_SmD5_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.2, fill="purple") +
geom_point(data=TN_C7H8_SmD5_ARsummary_t, aes(x=time,y=Median, group=1), colour="purple") +
geom_line(data=TN_C7H8_SmD5_ARsummary_t, aes(x=time,y=Median, group=1), colour="purple", linetype="dotted")
C7H8_WTvSmD5_ribbon
#plot boxplot WT vs mutant
TN_C7H8_ATAC_AR_table_melt <- melt(TN_C7H8_ATAC_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length","YY1target"),
stringsAsFactors = FALSE)
colnames(TN_C7H8_ATAC_AR_table_melt)[(ncol(TN_C7H8_ATAC_AR_table_melt)-1):ncol(TN_C7H8_ATAC_AR_table_melt)] <- c("sample","ratio")
TN_C7H8_ATAC_AR_table_melt$sample <- as.character(TN_C7H8_ATAC_AR_table_melt$sample)
TN_C7H8_ATAC_AR_YY1target_boxplot <- subset(TN_C7H8_ATAC_AR_table_melt, sample %in% c("C7H8_ES_d0","C7H8_NPC_d3","C7H8_NPC_d5","C7H8_NPC_d7","C7H8_NPC_d15")) %>%
mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
mutate(sample = factor(sample, levels=c("C7H8_ES_d0","C7H8_NPC_d3","C7H8_NPC_d5","C7H8_NPC_d7","C7H8_NPC_d15"))) %>%
ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed") +
stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)
TN_SmD5_ATAC_AR_YY1target_boxplot <- subset(TN_C7H8_ATAC_AR_table_melt, sample %in% c("SmD5_ES_d0","SmD5_NPC_d3", "SmD5_NPC_d5","SmD5_NPC_d7","SmD5_NPC_d15")) %>%
mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
mutate(sample = factor(sample, levels=c("SmD5_ES_d0","SmD5_NPC_d3", "SmD5_NPC_d5","SmD5_NPC_d7","SmD5_NPC_d15"))) %>%
ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed") +
stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)
grid.arrange(TN_C7H8_ATAC_AR_YY1target_boxplot, TN_SmD5_ATAC_AR_YY1target_boxplot, nrow = 1)
grid.arrange(TN_C7H8_ATAC_AR_YY1target_boxplot, TN_SmD5_ATAC_AR_YY1target_boxplot, nrow = 2)
#only day 3 and day 7
TN_C7H8_ATAC_AR_YY1target_boxplot_day3day7 <- subset(TN_C7H8_ATAC_AR_table_melt, sample %in% c("C7H8_NPC_d3","C7H8_NPC_d7","SmD5_NPC_d3","SmD5_NPC_d7")) %>%
mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
mutate(sample = factor(sample, levels=c("C7H8_NPC_d3","SmD5_NPC_d3","C7H8_NPC_d7","SmD5_NPC_d7"))) %>%
ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed")
TN_C7H8_ATAC_AR_YY1target_boxplot_day3day7
Save output file of ATAC-seq allelic ratios for iXist-ChrX-Cast vs SmD5
#write.table(TN_C7H8_ATAC_AR_table,
# row.names = FALSE, col.names = TRUE, quote = FALSE,
# paste0(data_path,"iXistChrX_Cast_WTvsSmD5_AR_table.txt"), sep="\t")
iXist-ChrX-Dom WT vs SmcHD1 KO (SmA3) - allelic ratio plots by CTCF binding
#Label peaks by CTCF binding
A11B2_ConsensusPeaks_CTCFanno_chrX <- A11B2_ConsensusPeaks_CTCFanno_chrX[which(A11B2_ConsensusPeaks_CTCFanno_chrX$X1 %in% TN_A11B2_ATAC_AR_table$Peakid),]
TN_A11B2_ATAC_AR_table <- mutate(TN_A11B2_ATAC_AR_table,
CTCFtarget = case_when(
A11B2_ConsensusPeaks_CTCFanno_chrX$X5 == 1 ~ "CTCFbound",
A11B2_ConsensusPeaks_CTCFanno_chrX$X5 == 0 ~ "nonCTCF"))
#plot boxplot WT vs mutant
TN_A11B2_ATAC_AR_table_melt <- melt(TN_A11B2_ATAC_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length","YY1target","CTCFtarget"),
stringsAsFactors = FALSE)
colnames(TN_A11B2_ATAC_AR_table_melt)[(ncol(TN_A11B2_ATAC_AR_table_melt)-1):ncol(TN_A11B2_ATAC_AR_table_melt)] <- c("sample","ratio")
TN_A11B2_ATAC_AR_table_melt$sample <- as.character(TN_A11B2_ATAC_AR_table_melt$sample)
TN_A11B2_ATAC_AR_CTCFtarget_boxplot <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("A11B2_ES_d0","A11B2_NPC_d3","A11B2_NPC_d5","A11B2_NPC_d7","A11B2_NPC_d15")) %>%
mutate(CTCFtarget = factor(CTCFtarget, levels=c("nonCTCF", "CTCFbound"))) %>%
mutate(sample = factor(sample, levels=c("A11B2_ES_d0","A11B2_NPC_d3","A11B2_NPC_d5","A11B2_NPC_d7","A11B2_NPC_d15"))) %>%
ggplot(aes(x=sample,y=ratio,fill=CTCFtarget) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed") +
stat_compare_means(method = "wilcox.test", label="p.format",label.y=1, vjust = 2)
TN_SmA3_ATAC_AR_CTCFtarget_boxplot <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("SmA3_ES_d0","SmA3_NPC_d3", "SmA3_NPC_d5","SmA3_NPC_d7","SmA3_NPC_d15")) %>%
mutate(CTCFtarget = factor(CTCFtarget, levels=c("nonCTCF", "CTCFbound"))) %>%
mutate(sample = factor(sample, levels=c("SmA3_ES_d0","SmA3_NPC_d3", "SmA3_NPC_d5","SmA3_NPC_d7","SmA3_NPC_d15"))) %>%
ggplot(aes(x=sample,y=ratio,fill=CTCFtarget) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed") +
stat_compare_means(method = "wilcox.test", label="p.format",label.y=1, vjust = 2)
grid.arrange(TN_A11B2_ATAC_AR_CTCFtarget_boxplot, TN_SmA3_ATAC_AR_CTCFtarget_boxplot, nrow = 1)
grid.arrange(TN_A11B2_ATAC_AR_CTCFtarget_boxplot, TN_SmA3_ATAC_AR_CTCFtarget_boxplot, nrow = 2)
iXist-ChrX-Cast WT vs SmcHD1 KO (SmD5) - allelic ratio plots by CTCF
#Label peaks by YY1 binding
C7H8_ConsensusPeaks_CTCFanno_chrX <- C7H8_ConsensusPeaks_CTCFanno_chrX[which(C7H8_ConsensusPeaks_CTCFanno_chrX$X1 %in% TN_C7H8_ATAC_AR_table$Peakid),]
TN_C7H8_ATAC_AR_table <- mutate(TN_C7H8_ATAC_AR_table,
CTCFtarget = case_when(
C7H8_ConsensusPeaks_CTCFanno_chrX$X5 == 1 ~ "CTCFbound",
C7H8_ConsensusPeaks_CTCFanno_chrX$X5 == 0 ~ "nonCTCF"))
#plot boxplot WT vs mutant
TN_C7H8_ATAC_AR_table_melt <- melt(TN_C7H8_ATAC_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length","YY1target","CTCFtarget"),
stringsAsFactors = FALSE)
colnames(TN_C7H8_ATAC_AR_table_melt)[(ncol(TN_C7H8_ATAC_AR_table_melt)-1):ncol(TN_C7H8_ATAC_AR_table_melt)] <- c("sample","ratio")
TN_C7H8_ATAC_AR_table_melt$sample <- as.character(TN_C7H8_ATAC_AR_table_melt$sample)
TN_C7H8_ATAC_AR_CTCFtarget_boxplot <- subset(TN_C7H8_ATAC_AR_table_melt, sample %in% c("C7H8_ES_d0","C7H8_NPC_d3","C7H8_NPC_d5","C7H8_NPC_d7","C7H8_NPC_d15")) %>%
mutate(CTCFtarget = factor(CTCFtarget, levels=c("nonCTCF", "CTCFbound"))) %>%
mutate(sample = factor(sample, levels=c("C7H8_ES_d0","C7H8_NPC_d3","C7H8_NPC_d5","C7H8_NPC_d7","C7H8_NPC_d15"))) %>%
ggplot(aes(x=sample,y=ratio,fill=CTCFtarget) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed") +
stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)
TN_SmD5_ATAC_AR_CTCFtarget_boxplot <- subset(TN_C7H8_ATAC_AR_table_melt, sample %in% c("SmD5_ES_d0","SmD5_NPC_d3", "SmD5_NPC_d5","SmD5_NPC_d7","SmD5_NPC_d15")) %>%
mutate(CTCFtarget = factor(CTCFtarget, levels=c("nonCTCF", "CTCFbound"))) %>%
mutate(sample = factor(sample, levels=c("SmD5_ES_d0","SmD5_NPC_d3", "SmD5_NPC_d5","SmD5_NPC_d7","SmD5_NPC_d15"))) %>%
ggplot(aes(x=sample,y=ratio,fill=CTCFtarget) ) + theme_bw() + theme(axis.text.x=element_blank()) +
geom_boxplot() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
geom_hline(yintercept=0.5, linetype="dashed") +
stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)
grid.arrange(TN_C7H8_ATAC_AR_CTCFtarget_boxplot, TN_SmD5_ATAC_AR_CTCFtarget_boxplot, nrow = 1)
grid.arrange(TN_C7H8_ATAC_AR_CTCFtarget_boxplot, TN_SmD5_ATAC_AR_CTCFtarget_boxplot, nrow = 2)