Load required packages and functions

library(readr)
library(plyr)
library(dplyr)
library(reshape)
library(ggpubr)
library(gridExtra)

#custom functions
give.n <- function(x){
  return(c(y = 0, label = length(x)))}
AR.table <- function(counts_table){
  AllelicRatio_table <- counts_table[1:6]
  for (j in seq(8,ncol(counts_table),3)){
    AllelicRatio_table <- cbind(AllelicRatio_table,
                                Allelic.Ratio(as.matrix(counts_table[j]),
                                              as.matrix(counts_table[j+1]) ) )
  }
  return(AllelicRatio_table)
}
Allelic.Ratio <- function(g1_counts,g2_counts){
  Xa <- as.numeric(g1_counts)
  Xi <- as.numeric(g2_counts)
  AllelicRatio <- ( Xi / (Xi + Xa))
  return(AllelicRatio)
}
Allelic.Filter.OverFracSamples <- function(counts_table,cutoff,fraction){
  counts_table_g1 <- counts_table[,seq(8,ncol(counts_table),3)]
  counts_table_g2 <- counts_table[,seq(9,ncol(counts_table),3)]
  counts_g1g2 <- counts_table_g1 + counts_table_g2
  counts_table_filter <- data.frame()
  for (i in c(1:nrow(counts_g1g2))){
    pass <- length((which(counts_g1g2[i,] > cutoff) == "TRUE"))
    if (pass >= fraction*(ncol(counts_g1g2))){
      counts_table_filter <- rbind(counts_table_filter,counts_table[i,])
    }
  }
  return(counts_table_filter)
}
Allelic.Ratio.Reverse <- function(g1_counts,g2_counts){
  Xi <- as.numeric(g1_counts)
  Xa <- as.numeric(g2_counts)
  AllelicRatio <- ( Xi / (Xi + Xa))
  return(AllelicRatio)
}
AR.table.reverse <- function(counts_table){
  AllelicRatio_table <- counts_table[1:6]
  for (j in seq(8,ncol(counts_table),3)){
    AllelicRatio_table <- cbind(AllelicRatio_table,
                                Allelic.Ratio.Reverse(as.matrix(counts_table[j]),
                                                      as.matrix(counts_table[j+1]) ) )
  }
  return(AllelicRatio_table)
}

Load required data files

SmcHD1_ATAC_counts_path <- "/path/to/folder/with/SmcHD1_KO/ATAC/data/"
data_path <- "/path/to/folder/with/chrRNA/data/"
#iXist-ChrX-Dom WT vs SmcHD1 KO ATAC-seq counts
TN_A11B2_ATAC_ftCounts_chrX1 <- read_delim(paste0(SmcHD1_ATAC_counts_path,"A11B2_SmcHD1_ATAC_ftCounts_chrX1"), 
                                           delim = "\t", escape_double = FALSE, trim_ws = TRUE, skip = 1)
colnames(TN_A11B2_ATAC_ftCounts_chrX1) <- c("Peakid","Chr","Start","End","Strand","Length",
                                            "A11B2_ES_d0","A11B2_ES_d0.genome1","A11B2_ES_d0.genome2",
                                            "A11B2_NPC_d3","A11B2_NPC_d3.genome1","A11B2_NPC_d3.genome2",
                                            "A11B2_NPC_d5","A11B2_NPC_d5.genome1","A11B2_NPC_d5.genome2",
                                            "A11B2_NPC_d7","A11B2_NPC_d7.genome1","A11B2_NPC_d7.genome2",
                                            "A11B2_NPC_d15","A11B2_NPC_d15.genome1","A11B2_NPC_d15.genome2",
                                            "SmA3_ES_d0","SmA3_ES_d0.genome1","SmA3_ES_d0.genome2",
                                            "SmA3_NPC_d3","SmA3_NPC_d3.genome1","SmA3_NPC_d3.genome2",
                                            "SmA3_NPC_d5","SmA3_NPC_d5.genome1","SmA3_NPC_d5.genome2",
                                            "SmA3_NPC_d7","SmA3_NPC_d7.genome1","SmA3_NPC_d7.genome2",
                                            "SmA3_NPC_d15","SmA3_NPC_d15.genome1","SmA3_NPC_d15.genome2")
#iXist-ChrX-Cast WT vs SmcHD1 KO ATAC-seqcounts
TN_C7H8_ATAC_ftCounts_chrX <- read_delim(paste0(SmcHD1_ATAC_counts_path,"C7H8_SmcHD1_ATAC_ftCounts_chrX"), 
                                           delim = "\t", escape_double = FALSE, trim_ws = TRUE, skip = 1)
colnames(TN_C7H8_ATAC_ftCounts_chrX) <- c("Peakid","Chr","Start","End","Strand","Length",
                                            "C7H8_ES_d0","C7H8_ES_d0.genome1","C7H8_ES_d0.genome2",
                                            "C7H8_NPC_d3","C7H8_NPC_d3.genome1","C7H8_NPC_d3.genome2",
                                            "C7H8_NPC_d5","C7H8_NPC_d5.genome1","C7H8_NPC_d5.genome2",
                                            "C7H8_NPC_d7","C7H8_NPC_d7.genome1","C7H8_NPC_d7.genome2",
                                            "C7H8_NPC_d15","C7H8_NPC_d15.genome1","C7H8_NPC_d15.genome2",
                                            "SmD5_ES_d0","SmD5_ES_d0.genome1","SmD5_ES_d0.genome2",
                                            "SmD5_NPC_d3","SmD5_NPC_d3.genome1","SmD5_NPC_d3.genome2",
                                            "SmD5_NPC_d5","SmD5_NPC_d5.genome1","SmD5_NPC_d5.genome2",
                                            "SmD5_NPC_d7","SmD5_NPC_d7.genome1","SmD5_NPC_d7.genome2",
                                            "SmD5_NPC_d15","SmD5_NPC_d15.genome1","SmD5_NPC_d15.genome2")

#Load annotations of YY1-binding ATAC-seq peaks
A11B2_ConsensusPeaks_YY1anno_chrX <- read_table(paste0(SmcHD1_ATAC_counts_path,"A11B2_ConsensusPeaks.YY1anno.chrX1.bed"), 
                                                col_names = FALSE)
C7H8_ConsensusPeaks_YY1anno_chrX <- read_table(paste0(SmcHD1_ATAC_counts_path,"C7H8_ConsensusPeaks.YY1anno.chrX.bed"), 
                                                col_names = FALSE)
#Load annotatiosn of CTCF-binding ATAC-seq peaks
A11B2_ConsensusPeaks_CTCFanno_chrX <- read_table(paste0(SmcHD1_ATAC_counts_path,"A11B2_ConsensusPeaks.CTCFanno.chrX1.bed"), 
                                                col_names = FALSE)

C7H8_ConsensusPeaks_CTCFanno_chrX <- read_table(paste0(SmcHD1_ATAC_counts_path,"C7H8_ConsensusPeaks.CTCFanno.chrX.bed"), 
                                                col_names = FALSE)

iXist-ChrX-Dom WT vs SmcHD1 KO (SmA3) - plots by allelic ratio

## Subset peaks by allelic filters for ATAC peaks
TN_A11B2_ATAC_ftCounts_filt <- Allelic.Filter.OverFracSamples(TN_A11B2_ATAC_ftCounts_chrX1,10,0.8)

#Make AR table
TN_A11B2_ATAC_AR_table <- AR.table(TN_A11B2_ATAC_ftCounts_filt)
colnames(TN_A11B2_ATAC_AR_table) <- c("Peakid","Chr","Start","End","Strand","Length",
                                      "A11B2_ES_d0","A11B2_NPC_d3","A11B2_NPC_d5","A11B2_NPC_d7","A11B2_NPC_d15",
                                      "SmA3_ES_d0","SmA3_NPC_d3", "SmA3_NPC_d5","SmA3_NPC_d7","SmA3_NPC_d15")

#apply initial allelic filter by average of day 0 in A11B2 and SmA3
AR_table_filter1 <- subset(TN_A11B2_ATAC_AR_table, 0.2 <= rowMeans(TN_A11B2_ATAC_AR_table[,c("A11B2_ES_d0","SmA3_ES_d0")]))
TN_A11B2_ATAC_AR_table <- subset(AR_table_filter1, 0.8 >= rowMeans(AR_table_filter1[,c("A11B2_ES_d0","SmA3_ES_d0")]))
rm(TN_A11B2_ATAC_ftCounts_filt, AR_table_filter1)

#Allelic Ratio boxplot
boxplot.matrix(as.matrix(TN_A11B2_ATAC_AR_table[7:ncol(TN_A11B2_ATAC_AR_table)]), use.cols=TRUE,
               ylab = 'chrRNA Allelic Ratio (Xi/(Xi+Xa))',
               ylim=c(0,1))
abline(h=0.5)

#Label peaks by YY1 binding
A11B2_ConsensusPeaks_YY1anno_chrX <- A11B2_ConsensusPeaks_YY1anno_chrX[which(A11B2_ConsensusPeaks_YY1anno_chrX$X1 %in% TN_A11B2_ATAC_AR_table$Peakid),]
TN_A11B2_ATAC_AR_table <-  mutate(TN_A11B2_ATAC_AR_table,
                                        YY1target = case_when(
                                          A11B2_ConsensusPeaks_YY1anno_chrX$X5 == 1 ~ "YY1bound",
                                          A11B2_ConsensusPeaks_YY1anno_chrX$X5 == 0 ~ "nonYY1"))

#plot ribbon WT vs SmcHD1 KO
#separate WT from SmA3
TN_A11B2_WT_AR_table <- TN_A11B2_ATAC_AR_table[,c(1:6,17,7:11)]
TN_A11B2_SmA3_AR_table <- TN_A11B2_ATAC_AR_table[,c(1:6,17,12:16)]
  
timepoints <- c(0,3,5,7,15)
colnames(TN_A11B2_WT_AR_table)[8:12] <- as.numeric(timepoints)
colnames(TN_A11B2_SmA3_AR_table)[8:12] <- as.numeric(timepoints)

TN_A11B2_WT_ARsummary <- as.array(apply(TN_A11B2_WT_AR_table[,8:ncol(TN_A11B2_WT_AR_table)], 2, summary))
TN_A11B2_WT_ARsummary_t <- data.frame(timepoints, t(TN_A11B2_WT_ARsummary))
colnames(TN_A11B2_WT_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")

TN_A11B2_SmA3_ARsummary <- as.array(apply(TN_A11B2_SmA3_AR_table[,8:ncol(TN_A11B2_SmA3_AR_table)], 2, summary))
TN_A11B2_SmA3_ARsummary_t <- data.frame(timepoints, t(TN_A11B2_SmA3_ARsummary))
colnames(TN_A11B2_SmA3_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")

A11B2_WTvSmA3_ribbon <- ggplot() +  theme_light() + 
  coord_cartesian(xlim =c(0,15), ylim = c(0,0.8),expand = FALSE) +
  #WT
  geom_ribbon(data=TN_A11B2_WT_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.4, fill="darkgrey") +
  geom_point(data=TN_A11B2_WT_ARsummary_t, aes(x=time,y=Median, group=1)) +
  geom_line(data=TN_A11B2_WT_ARsummary_t, aes(x=time,y=Median, group=1), linetype="dotted") +
  #SmA3
  geom_ribbon(data=TN_A11B2_SmA3_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.2, fill="purple") +
  geom_point(data=TN_A11B2_SmA3_ARsummary_t, aes(x=time,y=Median, group=1), colour="purple") +
  geom_line(data=TN_A11B2_SmA3_ARsummary_t, aes(x=time,y=Median, group=1), colour="purple", linetype="dotted")
A11B2_WTvSmA3_ribbon

#plot boxplot WT vs mutant
TN_A11B2_ATAC_AR_table_melt <- melt(TN_A11B2_ATAC_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length","YY1target"), 
                                          stringsAsFactors = FALSE)
colnames(TN_A11B2_ATAC_AR_table_melt)[(ncol(TN_A11B2_ATAC_AR_table_melt)-1):ncol(TN_A11B2_ATAC_AR_table_melt)] <- c("sample","ratio")
TN_A11B2_ATAC_AR_table_melt$sample <- as.character(TN_A11B2_ATAC_AR_table_melt$sample)

TN_A11B2_ATAC_AR_YY1target_boxplot <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("A11B2_ES_d0","A11B2_NPC_d3","A11B2_NPC_d5","A11B2_NPC_d7","A11B2_NPC_d15")) %>%
  mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
  mutate(sample = factor(sample, levels=c("A11B2_ES_d0","A11B2_NPC_d3","A11B2_NPC_d5","A11B2_NPC_d7","A11B2_NPC_d15"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)

TN_SmA3_ATAC_AR_YY1target_boxplot <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("SmA3_ES_d0","SmA3_NPC_d3", "SmA3_NPC_d5","SmA3_NPC_d7","SmA3_NPC_d15")) %>%
  mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
  mutate(sample = factor(sample, levels=c("SmA3_ES_d0","SmA3_NPC_d3", "SmA3_NPC_d5","SmA3_NPC_d7","SmA3_NPC_d15"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)

grid.arrange(TN_A11B2_ATAC_AR_YY1target_boxplot, TN_SmA3_ATAC_AR_YY1target_boxplot, nrow = 1)

grid.arrange(TN_A11B2_ATAC_AR_YY1target_boxplot, TN_SmA3_ATAC_AR_YY1target_boxplot, nrow = 2)

#only day 3 and day 7
TN_A11B2_ATAC_AR_YY1target_boxplot_day3day7 <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("A11B2_NPC_d3","A11B2_NPC_d7")) %>%
  mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
  mutate(sample = factor(sample, levels=c("A11B2_NPC_d3","A11B2_NPC_d7"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") 

TN_SmA3_ATAC_AR_YY1target_boxplot_day3day7 <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("SmA3_NPC_d3","SmA3_NPC_d7")) %>%
  mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
  mutate(sample = factor(sample, levels=c("SmA3_NPC_d3","SmA3_NPC_d7"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") 

grid.arrange(TN_A11B2_ATAC_AR_YY1target_boxplot_day3day7, TN_SmA3_ATAC_AR_YY1target_boxplot_day3day7, nrow = 1)

Save output file of ATAC-seq allelic ratios for iXist-ChrX-Dom vs SmA3

#write.table(TN_A11B2_ATAC_AR_table,
#            row.names = FALSE, col.names = TRUE, quote = FALSE,
#            paste0(data_path,"iXistChrX_Dom_WTvsSmA3_AR_table.txt"), sep="\t")

iXist-ChrX-Cast WT vs SmcHD1 KO (SmD5) - plots by allelic ratio

## Subset peaks by allelic filters for ATAC peaks
TN_C7H8_ATAC_ftCounts_filt <- Allelic.Filter.OverFracSamples(TN_C7H8_ATAC_ftCounts_chrX,10,0.8)

#Make AR table and AR filter
TN_C7H8_ATAC_AR_table <- AR.table.reverse(TN_C7H8_ATAC_ftCounts_filt)
colnames(TN_C7H8_ATAC_AR_table) <- c("Peakid","Chr","Start","End","Strand","Length",
                                      "C7H8_ES_d0","C7H8_NPC_d3","C7H8_NPC_d5","C7H8_NPC_d7","C7H8_NPC_d15",
                                      "SmD5_ES_d0","SmD5_NPC_d3", "SmD5_NPC_d5","SmD5_NPC_d7","SmD5_NPC_d15")

#apply initial allelic filter by average of day 0 in C7H8 and SmD5
AR_table_filter1 <- subset(TN_C7H8_ATAC_AR_table, 0.2 <= rowMeans(TN_C7H8_ATAC_AR_table[,c("C7H8_ES_d0","SmD5_ES_d0")]))
TN_C7H8_ATAC_AR_table <- subset(AR_table_filter1, 0.8 >= rowMeans(AR_table_filter1[,c("C7H8_ES_d0","SmD5_ES_d0")]))
rm(TN_C7H8_ATAC_ftCounts_filt, AR_table_filter1)
#remove row with NA
TN_C7H8_ATAC_AR_table <- na.omit(TN_C7H8_ATAC_AR_table)


#Allelic Ratio boxplot
boxplot.matrix(as.matrix(TN_C7H8_ATAC_AR_table[7:ncol(TN_C7H8_ATAC_AR_table)]), use.cols=TRUE,
               ylab = 'chrRNA Allelic Ratio (Xi/(Xi+Xa))',
               ylim=c(0,1))
abline(h=0.5)

#Label peaks by YY1 binding
C7H8_ConsensusPeaks_YY1anno_chrX <- C7H8_ConsensusPeaks_YY1anno_chrX[which(C7H8_ConsensusPeaks_YY1anno_chrX$X1 %in% TN_C7H8_ATAC_AR_table$Peakid),]
TN_C7H8_ATAC_AR_table <-  mutate(TN_C7H8_ATAC_AR_table,
                                  YY1target = case_when(
                                    C7H8_ConsensusPeaks_YY1anno_chrX$X5 == 1 ~ "YY1bound",
                                    C7H8_ConsensusPeaks_YY1anno_chrX$X5 == 0 ~ "nonYY1"))

#plot ribbon WT vs SmcHD1 KO
#separate WT from SmD5
TN_C7H8_WT_AR_table <- TN_C7H8_ATAC_AR_table[,c(1:6,17,7:11)]
TN_C7H8_SmD5_AR_table <- TN_C7H8_ATAC_AR_table[,c(1:6,17,12:16)]

timepoints <- c(0,3,5,7,15)
colnames(TN_C7H8_WT_AR_table)[8:12] <- as.numeric(timepoints)
colnames(TN_C7H8_SmD5_AR_table)[8:12] <- as.numeric(timepoints)

TN_C7H8_WT_ARsummary <- as.array(apply(TN_C7H8_WT_AR_table[,8:ncol(TN_C7H8_WT_AR_table)], 2, summary))
TN_C7H8_WT_ARsummary_t <- data.frame(timepoints, t(TN_C7H8_WT_ARsummary))
colnames(TN_C7H8_WT_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")

TN_C7H8_SmD5_ARsummary <- as.array(apply(TN_C7H8_SmD5_AR_table[,8:ncol(TN_C7H8_SmD5_AR_table)], 2, summary))
TN_C7H8_SmD5_ARsummary_t <- data.frame(timepoints, t(TN_C7H8_SmD5_ARsummary))
colnames(TN_C7H8_SmD5_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")

C7H8_WTvSmD5_ribbon <- ggplot() +  theme_light() + 
  coord_cartesian(xlim =c(0,15), ylim = c(0,0.8),expand = FALSE) +
  #WT
  geom_ribbon(data=TN_C7H8_WT_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.4, fill="darkgrey") +
  geom_point(data=TN_C7H8_WT_ARsummary_t, aes(x=time,y=Median, group=1)) +
  geom_line(data=TN_C7H8_WT_ARsummary_t, aes(x=time,y=Median, group=1), linetype="dotted") +
  #SmD5
  geom_ribbon(data=TN_C7H8_SmD5_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.2, fill="purple") +
  geom_point(data=TN_C7H8_SmD5_ARsummary_t, aes(x=time,y=Median, group=1), colour="purple") +
  geom_line(data=TN_C7H8_SmD5_ARsummary_t, aes(x=time,y=Median, group=1), colour="purple", linetype="dotted")
C7H8_WTvSmD5_ribbon

#plot boxplot WT vs mutant
TN_C7H8_ATAC_AR_table_melt <- melt(TN_C7H8_ATAC_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length","YY1target"), 
                                    stringsAsFactors = FALSE)
colnames(TN_C7H8_ATAC_AR_table_melt)[(ncol(TN_C7H8_ATAC_AR_table_melt)-1):ncol(TN_C7H8_ATAC_AR_table_melt)] <- c("sample","ratio")
TN_C7H8_ATAC_AR_table_melt$sample <- as.character(TN_C7H8_ATAC_AR_table_melt$sample)

TN_C7H8_ATAC_AR_YY1target_boxplot <- subset(TN_C7H8_ATAC_AR_table_melt, sample %in% c("C7H8_ES_d0","C7H8_NPC_d3","C7H8_NPC_d5","C7H8_NPC_d7","C7H8_NPC_d15")) %>%
  mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
  mutate(sample = factor(sample, levels=c("C7H8_ES_d0","C7H8_NPC_d3","C7H8_NPC_d5","C7H8_NPC_d7","C7H8_NPC_d15"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)

TN_SmD5_ATAC_AR_YY1target_boxplot <- subset(TN_C7H8_ATAC_AR_table_melt, sample %in% c("SmD5_ES_d0","SmD5_NPC_d3", "SmD5_NPC_d5","SmD5_NPC_d7","SmD5_NPC_d15")) %>%
  mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
  mutate(sample = factor(sample, levels=c("SmD5_ES_d0","SmD5_NPC_d3", "SmD5_NPC_d5","SmD5_NPC_d7","SmD5_NPC_d15"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)

grid.arrange(TN_C7H8_ATAC_AR_YY1target_boxplot, TN_SmD5_ATAC_AR_YY1target_boxplot, nrow = 1)

grid.arrange(TN_C7H8_ATAC_AR_YY1target_boxplot, TN_SmD5_ATAC_AR_YY1target_boxplot, nrow = 2)

#only day 3 and day 7
TN_C7H8_ATAC_AR_YY1target_boxplot_day3day7 <- subset(TN_C7H8_ATAC_AR_table_melt, sample %in% c("C7H8_NPC_d3","C7H8_NPC_d7","SmD5_NPC_d3","SmD5_NPC_d7")) %>%
  mutate(YY1target = factor(YY1target, levels=c("nonYY1", "YY1bound"))) %>%
  mutate(sample = factor(sample, levels=c("C7H8_NPC_d3","SmD5_NPC_d3","C7H8_NPC_d7","SmD5_NPC_d7"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=YY1target) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") 
TN_C7H8_ATAC_AR_YY1target_boxplot_day3day7

Save output file of ATAC-seq allelic ratios for iXist-ChrX-Cast vs SmD5

#write.table(TN_C7H8_ATAC_AR_table,
#            row.names = FALSE, col.names = TRUE, quote = FALSE,
#            paste0(data_path,"iXistChrX_Cast_WTvsSmD5_AR_table.txt"), sep="\t")

Reveiwer Suggestion - analysis by CTCF-binding peaks

iXist-ChrX-Dom WT vs SmcHD1 KO (SmA3) - allelic ratio plots by CTCF binding

#Label peaks by CTCF binding
A11B2_ConsensusPeaks_CTCFanno_chrX <- A11B2_ConsensusPeaks_CTCFanno_chrX[which(A11B2_ConsensusPeaks_CTCFanno_chrX$X1 %in% TN_A11B2_ATAC_AR_table$Peakid),]
TN_A11B2_ATAC_AR_table <-  mutate(TN_A11B2_ATAC_AR_table,
                                        CTCFtarget = case_when(
                                          A11B2_ConsensusPeaks_CTCFanno_chrX$X5 == 1 ~ "CTCFbound",
                                          A11B2_ConsensusPeaks_CTCFanno_chrX$X5 == 0 ~ "nonCTCF"))



#plot boxplot WT vs mutant
TN_A11B2_ATAC_AR_table_melt <- melt(TN_A11B2_ATAC_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length","YY1target","CTCFtarget"), 
                                          stringsAsFactors = FALSE)
colnames(TN_A11B2_ATAC_AR_table_melt)[(ncol(TN_A11B2_ATAC_AR_table_melt)-1):ncol(TN_A11B2_ATAC_AR_table_melt)] <- c("sample","ratio")
TN_A11B2_ATAC_AR_table_melt$sample <- as.character(TN_A11B2_ATAC_AR_table_melt$sample)

TN_A11B2_ATAC_AR_CTCFtarget_boxplot <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("A11B2_ES_d0","A11B2_NPC_d3","A11B2_NPC_d5","A11B2_NPC_d7","A11B2_NPC_d15")) %>%
  mutate(CTCFtarget = factor(CTCFtarget, levels=c("nonCTCF", "CTCFbound"))) %>%
  mutate(sample = factor(sample, levels=c("A11B2_ES_d0","A11B2_NPC_d3","A11B2_NPC_d5","A11B2_NPC_d7","A11B2_NPC_d15"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=CTCFtarget) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  stat_compare_means(method = "wilcox.test", label="p.format",label.y=1, vjust = 2)

TN_SmA3_ATAC_AR_CTCFtarget_boxplot <- subset(TN_A11B2_ATAC_AR_table_melt, sample %in% c("SmA3_ES_d0","SmA3_NPC_d3", "SmA3_NPC_d5","SmA3_NPC_d7","SmA3_NPC_d15")) %>%
  mutate(CTCFtarget = factor(CTCFtarget, levels=c("nonCTCF", "CTCFbound"))) %>%
  mutate(sample = factor(sample, levels=c("SmA3_ES_d0","SmA3_NPC_d3", "SmA3_NPC_d5","SmA3_NPC_d7","SmA3_NPC_d15"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=CTCFtarget) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  stat_compare_means(method = "wilcox.test", label="p.format",label.y=1, vjust = 2)

grid.arrange(TN_A11B2_ATAC_AR_CTCFtarget_boxplot, TN_SmA3_ATAC_AR_CTCFtarget_boxplot, nrow = 1)

grid.arrange(TN_A11B2_ATAC_AR_CTCFtarget_boxplot, TN_SmA3_ATAC_AR_CTCFtarget_boxplot, nrow = 2)

iXist-ChrX-Cast WT vs SmcHD1 KO (SmD5) - allelic ratio plots by CTCF

#Label peaks by YY1 binding
C7H8_ConsensusPeaks_CTCFanno_chrX <- C7H8_ConsensusPeaks_CTCFanno_chrX[which(C7H8_ConsensusPeaks_CTCFanno_chrX$X1 %in% TN_C7H8_ATAC_AR_table$Peakid),]
TN_C7H8_ATAC_AR_table <-  mutate(TN_C7H8_ATAC_AR_table,
                                  CTCFtarget = case_when(
                                    C7H8_ConsensusPeaks_CTCFanno_chrX$X5 == 1 ~ "CTCFbound",
                                    C7H8_ConsensusPeaks_CTCFanno_chrX$X5 == 0 ~ "nonCTCF"))

#plot boxplot WT vs mutant
TN_C7H8_ATAC_AR_table_melt <- melt(TN_C7H8_ATAC_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length","YY1target","CTCFtarget"), 
                                    stringsAsFactors = FALSE)
colnames(TN_C7H8_ATAC_AR_table_melt)[(ncol(TN_C7H8_ATAC_AR_table_melt)-1):ncol(TN_C7H8_ATAC_AR_table_melt)] <- c("sample","ratio")
TN_C7H8_ATAC_AR_table_melt$sample <- as.character(TN_C7H8_ATAC_AR_table_melt$sample)

TN_C7H8_ATAC_AR_CTCFtarget_boxplot <- subset(TN_C7H8_ATAC_AR_table_melt, sample %in% c("C7H8_ES_d0","C7H8_NPC_d3","C7H8_NPC_d5","C7H8_NPC_d7","C7H8_NPC_d15")) %>%
  mutate(CTCFtarget = factor(CTCFtarget, levels=c("nonCTCF", "CTCFbound"))) %>%
  mutate(sample = factor(sample, levels=c("C7H8_ES_d0","C7H8_NPC_d3","C7H8_NPC_d5","C7H8_NPC_d7","C7H8_NPC_d15"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=CTCFtarget) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)

TN_SmD5_ATAC_AR_CTCFtarget_boxplot <- subset(TN_C7H8_ATAC_AR_table_melt, sample %in% c("SmD5_ES_d0","SmD5_NPC_d3", "SmD5_NPC_d5","SmD5_NPC_d7","SmD5_NPC_d15")) %>%
  mutate(CTCFtarget = factor(CTCFtarget, levels=c("nonCTCF", "CTCFbound"))) %>%
  mutate(sample = factor(sample, levels=c("SmD5_ES_d0","SmD5_NPC_d3", "SmD5_NPC_d5","SmD5_NPC_d7","SmD5_NPC_d15"))) %>%
  ggplot(aes(x=sample,y=ratio,fill=CTCFtarget) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  stat_compare_means(method = "t.test", label="p.format",label.y=1, vjust = 2)

grid.arrange(TN_C7H8_ATAC_AR_CTCFtarget_boxplot, TN_SmD5_ATAC_AR_CTCFtarget_boxplot, nrow = 1)

grid.arrange(TN_C7H8_ATAC_AR_CTCFtarget_boxplot, TN_SmD5_ATAC_AR_CTCFtarget_boxplot, nrow = 2)