Load required packages and functions

#Packages
library(readr)
library(stringr)
library(ggplot2)
library(ggridges)
library(stringr)
library(dplyr)
library(forcats)
library(minpack.lm)
library(mefa4)
library(reshape)
library(gridExtra)
library(ggpubr)

#Functions
Allelic.Filter.OverFracSamples <- function(counts_table,cutoff,fraction){
  counts_table_g1 <- counts_table[,seq(8,ncol(counts_table),3)]
  counts_table_g2 <- counts_table[,seq(9,ncol(counts_table),3)]
  counts_g1g2 <- counts_table_g1 + counts_table_g2
  counts_table_filter <- data.frame()
  for (i in c(1:nrow(counts_g1g2))){
    pass <- length((which(counts_g1g2[i,] > 10) == "TRUE"))
    if (pass >= fraction*(ncol(counts_g1g2))){
      counts_table_filter <- rbind(counts_table_filter,counts_table[i,])
    }
  }
  return(counts_table_filter)
}
Allelic.Ratio <- function(g1_counts,g2_counts){
  Xa <- as.numeric(g1_counts)
  Xi <- as.numeric(g2_counts)
  AllelicRatio <- ( Xi / (Xi + Xa))
  return(AllelicRatio)
}
Allelic.Ratio.Reverse <- function(g1_counts,g2_counts){
  Xi <- as.numeric(g1_counts)
  Xa <- as.numeric(g2_counts)
  AllelicRatio <- ( Xi / (Xi + Xa))
  return(AllelicRatio)
}
chrX1.filter <- function(counts_table){
  chrRNA_counts_table_chrX <- filter(counts_table, !grepl("chrX",counts_table$Chr))
  chrRNA_counts_table_chrX <-  counts_table[which(grepl("chrX",counts_table$Chr) == TRUE), ]
  numextract <- function(string){
    str_extract(string, "\\-*\\d+\\.*\\d*")
  }
  chrRNA_chrX1_filter_End <- cbind(chrRNA_counts_table_chrX[1], as.numeric(numextract(chrRNA_counts_table_chrX$End)))
  chrRNA_chrX1_filter_End <- chrRNA_chrX1_filter_End[which(chrRNA_chrX1_filter_End$`as.numeric(numextract(chrRNA_counts_table_chrX$End))` < 103483233), ]
  chrRNA_filter_chrX1 <- counts_table[
    which(counts_table$Peakid %in% chrRNA_chrX1_filter_End$Peakid ), ]
  return(chrRNA_filter_chrX1)
}
AR.table <- function(counts_table){
  AllelicRatio_table <- counts_table[1:6]
  for (j in seq(8,ncol(counts_table),3)){
    AllelicRatio_table <- cbind(AllelicRatio_table,
                                Allelic.Ratio(as.matrix(counts_table[j]),
                                              as.matrix(counts_table[j+1]) ) )
  }
  return(AllelicRatio_table)
}
AR.table.reverse <- function(counts_table){
  AllelicRatio_table <- counts_table[1:6]
  for (j in seq(8,ncol(counts_table),3)){
    AllelicRatio_table <- cbind(AllelicRatio_table,
                                Allelic.Ratio.Reverse(as.matrix(counts_table[j]),
                                                      as.matrix(counts_table[j+1]) ) )
  }
  return(AllelicRatio_table)
}
initialAR.filter <- function(AR_table,lower,upper){
  AR_table_filter1 <- subset(AR_table,
                             lower <= AR_table[7])
  AR_table_filter2 <- subset(AR_table_filter1,
                             upper >= AR_table_filter1[7])
  return(AR_table_filter2)
}
give.n <- function(x){
  return(c(y = 0, label = length(x)))}

YY1-T7 ChIP

Load in counts tables

ChIP_data_path <- "/path/to/YY1_T7ChIP/ftCounts/output/" 
#Load ftCounts outputs for both aF1 and cC3
aF1_counts_table <- read_table2(paste0(ChIP_data_path,"YY1_ChIP/YY1_counts/aF1_ChIP_ftCounts"),  skip = 1)
colnames(aF1_counts_table) <- c("Peakid", "Chr","Start", "End","Strand","Length",
                                "YY1_T7_aF1_ES_d0","YY1_T7_aF1_ES_d0.genome1","YY1_T7_aF1_ES_d0.genome2",
                                "YY1_T7_aF1_NPC_d3","YY1_T7_aF1_NPC_d3.genome1","YY1_T7_aF1_NPC_d3.genome2",
                                "YY1_T7_aF1_NPC_d5","YY1_T7_aF1_NPC_d5.genome1","YY1_T7_aF1_NPC_d5.genome2",
                                "YY1_T7_aF1_NPC_d7","YY1_T7_aF1_NPC_d7.genome1","YY1_T7_aF1_NPC_d7.genome2",
                                "YY1_T7_aF1_NPC_d18","YY1_T7_aF1_NPC_d18.genome1","YY1_T7_aF1_NPC_d18.genome2",
                                "YY1_T7_aF1_ES_d6","YY1_T7_aF1_ES_d6.genome1","YY1_T7_aF1_ES_d6.genome2")
aF1_names <- c("aF1_d0_ChIP","aF1_d3_ChIP","aF1_d5_ChIP","aF1_d7_ChIP","aF1_d18_ChIP","aF1_ESd6_ChIP")
                                       
cC3_counts_table <- read_table2(paste0(ChIP_data_path,"YY1_ChIP/YY1_counts/cC3_ChIP_ftCounts"),  skip = 1)
colnames(cC3_counts_table) <- c("Peakid", "Chr","Start", "End","Strand","Length",
                                "YY1_T7_cC3_ES_d0","YY1_T7_cC3_ES_d0.genome1","YY1_T7_cC3_ES_d0.genome2",
                                "YY1_T7_cC3_NPC_d3","YY1_T7_cC3_NPC_d3.genome1","YY1_T7_cC3_NPC_d3.genome2",
                                "YY1_T7_cC3_NPC_d5","YY1_T7_cC3_NPC_d5.genome1","YY1_T7_cC3_NPC_d5.genome2",
                                "YY1_T7_cC3_NPC_d7","YY1_T7_cC3_NPC_d7.genome1","YY1_T7_cC3_NPC_d7.genome2",
                                "YY1_T7_cC3_NPC_d28","YY1_T7_cC3_NPC_d28.genome1","YY1_T7_cC3_NPC_d28.genome2",
                                "YY1_T7_cC3_ES_d6","YY1_T7_cC3_ES_d6.genome1","YY1_T7_cC3_ES_d6.genome2")
cC3_names <- c("cC3_d0_ChIP","cC3_d3_ChIP","cC3_d5_ChIP","cC3_d7_ChIP","cC3_d28_ChIP","cC3_ESd6_ChIP")

Filter and make Allelic Ratio tables

#cC3
cC3_allelicFilter <- Allelic.Filter.OverFracSamples(cC3_counts_table[,1:21],10,0.8)
cC3_counts_table <- cC3_counts_table[which(cC3_counts_table$Peakid %in% cC3_allelicFilter$Peakid),]
YY1_T7ChIP_cC3_AR_table <- AR.table.reverse(cC3_counts_table)
colnames(YY1_T7ChIP_cC3_AR_table)[7:ncol(YY1_T7ChIP_cC3_AR_table)] <- cC3_names
YY1_T7ChIP_cC3_AR_table <- initialAR.filter(YY1_T7ChIP_cC3_AR_table,0.2,0.8)

YY1_T7ChIP_cC3_AR_table_melt <- melt.data.frame(YY1_T7ChIP_cC3_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length"))
colnames(YY1_T7ChIP_cC3_AR_table_melt)[7:8] <- c("time", "ratio")

#aF1
aF1_allelicFilter <- Allelic.Filter.OverFracSamples(aF1_counts_table[,1:21],10,0.8)
aF1_counts_table <- aF1_counts_table[which(aF1_counts_table$Peakid %in% aF1_allelicFilter$Peakid),]
aF1_counts_table <- chrX1.filter(aF1_counts_table)
YY1_T7ChIP_aF1_AR_table <- AR.table(aF1_counts_table)
YY1_T7ChIP_aF1_AR_table <- na.omit(YY1_T7ChIP_aF1_AR_table)
colnames(YY1_T7ChIP_aF1_AR_table)[7:ncol(YY1_T7ChIP_aF1_AR_table)] <- aF1_names
YY1_T7ChIP_aF1_AR_table <- initialAR.filter(YY1_T7ChIP_aF1_AR_table,0.2,0.8)

YY1_T7ChIP_aF1_AR_table_melt <- melt.data.frame(YY1_T7ChIP_aF1_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length"))
colnames(YY1_T7ChIP_aF1_AR_table_melt)[7:8] <- c("time", "ratio")

#save AR tables to files
write.table(YY1_T7ChIP_aF1_AR_table, row.names = FALSE, col.names = TRUE, quote = FALSE,
            paste0(ChIP_data_path,"Files/YY1_T7ChIP_aF1_AR_table.txt"), sep="\t")
write.table(YY1_T7ChIP_cC3_AR_table, row.names = FALSE, col.names = TRUE, quote = FALSE,
            paste0(ChIP_data_path,"Files/YY1_T7ChIP_cC3_AR_table.txt"), sep="\t")

Allelic Ratio Boxplots

cC3_box <- mutate(YY1_T7ChIP_cC3_AR_table_melt, 
                  time = factor(time, levels=c("cC3_d0_ChIP", "cC3_d3_ChIP","cC3_d5_ChIP","cC3_d7_ChIP","cC3_d28_ChIP","cC3_ESd6_ChIP"))) %>%
  ggplot(aes(x=time,y=ratio)) + 
  geom_boxplot() + theme_bw() + scale_y_continuous(expand = c(0.0, 0),limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  theme(axis.ticks = element_line(colour = "grey27"))
aF1_box <- mutate(YY1_T7ChIP_aF1_AR_table_melt, 
                  time = factor(time, levels=c("aF1_d0_ChIP", "aF1_d3_ChIP","aF1_d5_ChIP","aF1_d7_ChIP","aF1_d18_ChIP","aF1_ESd6_ChIP"))) %>%
  ggplot(aes(x=time,y=ratio)) + 
  geom_boxplot() + theme_bw() + scale_y_continuous(expand = c(0.0, 0),limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  theme(axis.ticks = element_line(colour = "grey27"))

#exclude Firre and Dxz4 locii locus 
cC3_exclude_box <- subset(YY1_T7ChIP_cC3_AR_table_melt,
                          YY1_T7ChIP_cC3_AR_table_melt$Start < 50563120 | YY1_T7ChIP_cC3_AR_table_melt$Start > 50635321) %>% #exclude Firre locus
  subset(Start < 75725458 | Start > 75764699) %>% #exclude Dxz4 locus
  mutate(time = factor(time, levels=c("cC3_d0_ChIP", "cC3_d3_ChIP","cC3_d5_ChIP","cC3_d7_ChIP","cC3_d28_ChIP","cC3_ESd6_ChIP"))) %>%
  ggplot(aes(x=time,y=ratio)) + 
  geom_boxplot() + theme_bw() + scale_y_continuous(expand = c(0.0, 0),limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  theme(axis.ticks = element_line(colour = "grey27"))

aF1_exclude_box <- subset(YY1_T7ChIP_aF1_AR_table_melt,
       YY1_T7ChIP_aF1_AR_table_melt$Start < 50563120 | YY1_T7ChIP_aF1_AR_table_melt$Start > 50635321) %>% #exclude Firre locus
  subset(Start < 75725458 | Start > 75764699) %>% #exclude Dxz4 locus
  mutate(time = factor(time, levels=c("aF1_d0_ChIP", "aF1_d3_ChIP","aF1_d5_ChIP","aF1_d7_ChIP","aF1_d18_ChIP","aF1_ESd6_ChIP"))) %>%
  ggplot(aes(x=time,y=ratio)) + 
  geom_boxplot() + theme_bw() + scale_y_continuous(expand = c(0.0, 0),limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  theme(axis.ticks = element_line(colour = "grey27"))

grid.arrange(aF1_box, cC3_box, nrow = 1)

grid.arrange(aF1_exclude_box, cC3_exclude_box, nrow = 1)

Ribbon plot of chrRNA vs ATAC vs YY1 ChIP

data_path <- "/path/to/folder/with/chrRNA/data/"
ATAC_analysis_path <- "/path/to/your/ATAC/analysis/folder/"
## A11B2
# Load ChrRNA data
A11B2_AR_table_merged <- read_delim(paste0(data_path,"A11B2_AR_table_merged_discreet.txt"), 
                            delim = "\t", escape_double = FALSE, trim_ws = TRUE)

chrRNA_timepoints_merge <- c(0,1,2,3,5,7,18)
colnames(A11B2_AR_table_merged)[7:ncol(A11B2_AR_table_merged)] <- as.numeric(chrRNA_timepoints_merge)
chrRNA_ARsummary <- as.array(apply(A11B2_AR_table_merged[,7:ncol(A11B2_AR_table_merged)], 2, summary))
chrRNA_ARsummary_t <- data.frame(chrRNA_timepoints_merge,
                               t(chrRNA_ARsummary))
colnames(chrRNA_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")

#Load ATAC data
ATAC_NPC_AR_table_merge <- read_table2(paste0(ATAC_analysis_path,"ATAC_NPC_AR_table_merge.txt"))
ATAC_peaks_EDM <- read_delim(paste0(ATAC_analysis_path,"ATAC_peaks_EDM.bed"), 
                             delim = "\t", escape_double = FALSE, col_names = FALSE, trim_ws = TRUE)

colnames(ATAC_peaks_EDM) <-c("Chr","Start","End","Label","Halftime","Peakid")
ATAC_NPC_AR_table_merge <- ATAC_NPC_AR_table_merge[which(ATAC_NPC_AR_table_merge$Peakid %in% ATAC_peaks_EDM$Peakid),]

ATAC_timepoints_merge <- c(0,1,3,6,17)
colnames(ATAC_NPC_AR_table_merge)[7:ncol(ATAC_NPC_AR_table_merge)] <- as.numeric(ATAC_timepoints_merge)
ATAC_ARsummary <- as.array(apply(ATAC_NPC_AR_table_merge[,7:ncol(ATAC_NPC_AR_table_merge)], 2, summary))
ATAC_ARsummary_t <- data.frame(ATAC_timepoints_merge,
                                     t(ATAC_ARsummary))
colnames(ATAC_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")

# aF1 T7-ChIP
YY1ChIP_timepoints_merge <- c(0,3,5,7,18)
YY1ChIP_AR_table <- YY1_T7ChIP_aF1_AR_table[,1:11]
colnames(YY1ChIP_AR_table)[7:ncol(YY1ChIP_AR_table)] <- as.numeric(YY1ChIP_timepoints_merge)
YY1ChIP_ARsummary <- as.array(apply(YY1ChIP_AR_table[,7:ncol(YY1ChIP_AR_table)], 2, summary))
YY1ChIP_ARsummary_t <- data.frame(YY1ChIP_timepoints_merge,
                               t(YY1ChIP_ARsummary))
colnames(YY1ChIP_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")

#plot ribbons
aF1_ribbon <- ggplot() +  theme_light() + 
  coord_cartesian(xlim =c(0,18), ylim = c(0,0.8),expand = FALSE) +
  #chrRNA
  geom_ribbon(data=chrRNA_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.4, fill="darkgrey") +
  geom_point(data=chrRNA_ARsummary_t, aes(x=time,y=Median, group=1)) +
  geom_line(data=chrRNA_ARsummary_t, aes(x=time,y=Median, group=1), linetype="dotted") +
  #ATAC
  geom_ribbon(data=ATAC_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.2, fill="blue") +
  geom_point(data=ATAC_ARsummary_t, aes(x=time,y=Median, group=1), colour="blue") +
  geom_line(data=ATAC_ARsummary_t, aes(x=time,y=Median, group=1), colour="blue", linetype="dotted") +
  #YY1 ChIP
  geom_ribbon(data=YY1ChIP_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.2, fill="orange") +
  geom_point(data=YY1ChIP_ARsummary_t, aes(x=time,y=Median, group=1), colour="orange") +
  geom_line(data=YY1ChIP_ARsummary_t, aes(x=time,y=Median, group=1), colour="orange", linetype="dotted")


## C7H8
# Load ChrRNA data

C7H8_AR_table_merged <- na.omit(read_delim(paste0(data_path,"C7H8_AR_table_merged_discreet.txt"), 
                            delim = "\t", escape_double = FALSE, trim_ws = TRUE)) #this na.omit is important
#colnames(C7H8_AR_table_merged) #check column names
chrRNA_timepoints_merge <- c(0,1,2,3,5,7,28)
colnames(C7H8_AR_table_merged)[7:ncol(C7H8_AR_table_merged)] <- as.numeric(chrRNA_timepoints_merge)
chrRNA_ARsummary <- as.array(apply(C7H8_AR_table_merged[,7:ncol(C7H8_AR_table_merged)], 2, summary))
chrRNA_ARsummary_t <- data.frame(chrRNA_timepoints_merge,
                                 t(chrRNA_ARsummary))
colnames(chrRNA_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")
#cC3 YY1-T7-ChIP
YY1ChIP_timepoints_merge <- c(0,3,5,7,18)
YY1ChIP_AR_table <- YY1_T7ChIP_cC3_AR_table[,1:11]
colnames(YY1ChIP_AR_table)[7:ncol(YY1ChIP_AR_table)] <- as.numeric(YY1ChIP_timepoints_merge)
YY1ChIP_ARsummary <- as.array(apply(YY1ChIP_AR_table[,7:ncol(YY1ChIP_AR_table)], 2, summary))
YY1ChIP_ARsummary_t <- data.frame(YY1ChIP_timepoints_merge,
                                  t(YY1ChIP_ARsummary))
colnames(YY1ChIP_ARsummary_t) <- c("time","Min","Lower","Median","Mean","Upper","Max")

#plot ribbons
cC3_ribbon <- ggplot() +  theme_light() + 
  coord_cartesian(xlim =c(0,15), ylim = c(0,0.8),expand = FALSE) +
  #chrRNA
  geom_ribbon(data=chrRNA_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.4, fill="darkgrey") +
  geom_point(data=chrRNA_ARsummary_t, aes(x=time,y=Median, group=1)) +
  geom_line(data=chrRNA_ARsummary_t, aes(x=time,y=Median, group=1), linetype="dotted") +
  #YY1 ChIP
  geom_ribbon(data=YY1ChIP_ARsummary_t, aes(x=time, y=Median, group=1, ymin=Lower, ymax=Upper), alpha=0.2, fill="orange") +
  geom_point(data=YY1ChIP_ARsummary_t, aes(x=time,y=Median, group=1), colour="orange") +
  geom_line(data=YY1ChIP_ARsummary_t, aes(x=time,y=Median, group=1), colour="orange", linetype="dotted")

grid.arrange(aF1_ribbon, cC3_ribbon, nrow = 1)

More YY1-T7 ChIP plots

Print to .bed files of allelically-analysable peaks

aF1_allelic_peaks.bed <- data.frame(YY1_T7ChIP_aF1_AR_table[2:4],YY1_T7ChIP_aF1_AR_table$Peakid)
write.table(aF1_allelic_peaks.bed, row.names = FALSE, col.names = FALSE, quote = FALSE,
            paste0(ChIP_data_path,"YY1_ChIP/YY1_peaks/aF1_allelic_peaks.bed"), sep="\t")
cC3_allelic_peaks.bed <- data.frame(YY1_T7ChIP_cC3_AR_table[2:4],YY1_T7ChIP_cC3_AR_table$Peakid)
write.table(cC3_allelic_peaks.bed, row.names = FALSE, col.names = FALSE, quote = FALSE,
            paste0(ChIP_data_path,"YY1_ChIP/YY1_peaks/cC3_allelic_peaks.bed"), sep="\t")

Plot heatmap of cC3 peaks by allelic ratio

  • Plot heatmaps of allelic ratio of YY1 ChIP-seq over the timecourse of Xist-induction.
  • I also re-plot the ATAC heatmap for visual comparison with YY1-ChIP.
  • One can use the “plotly” package to identify individual peak IDs, and then check their exact genomic locations by loading the bed files (produced above) into a genome browser
#scale each with respect to starting ratio
YY1_T7ChIP_cC3_AR_table_norm <- data.frame(YY1_T7ChIP_cC3_AR_table[,1:6], sweep(YY1_T7ChIP_cC3_AR_table[,7:ncol(YY1_T7ChIP_cC3_AR_table)], 1, YY1_T7ChIP_cC3_AR_table$cC3_d0_ChIP, "/"))
YY1_T7ChIP_cC3_AR_table_norm <- data.frame(YY1_T7ChIP_cC3_AR_table_norm[,1:6], log2(YY1_T7ChIP_cC3_AR_table_norm[7:ncol(YY1_T7ChIP_cC3_AR_table_norm)]))
YY1_T7ChIP_cC3_AR_table_norm_melt <- melt.data.frame(YY1_T7ChIP_cC3_AR_table_norm, id=c("Peakid","Chr","Start","End","Strand","Length"))
colnames(YY1_T7ChIP_cC3_AR_table_norm_melt)[7:8] <- c("time", "ratio")

#plot AR heatmap for cC3 YY1-T7 ChIP
library(viridis)
YY1_T7ChIP_cC3_AR_table_melt <- YY1_T7ChIP_cC3_AR_table_melt %>% arrange(desc(YY1_T7ChIP_cC3_AR_table_melt$Start))
subset(YY1_T7ChIP_cC3_AR_table_melt, 
            time %in% c("cC3_d0_ChIP", "cC3_d3_ChIP","cC3_d5_ChIP","cC3_d7_ChIP","cC3_d28_ChIP")) %>%
  ggplot(aes(rev(time), reorder(Peakid, Start), fill= ratio)) + 
  theme(axis.text.y = element_blank(), panel.background = element_rect(fill = "white")) +  
  geom_tile() + scale_fill_viridis()  +
  theme(panel.background = element_rect(fill = "white"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) + 
  labs(title="YY1 ChIP-seq (cC3) - allelic ratio REs over time course") +
  coord_flip()

#ATAC-seq heat map for comparison
library(reshape2)
ATAC_NPC_AR_table_merge_melt <- melt(ATAC_NPC_AR_table_merge, 
                     id.vars=c("Peakid","Chr","Start","End","Strand","Length"), stringsAsFactors = FALSE)
colnames(ATAC_NPC_AR_table_merge_melt)[7:8] <- c("time", "ratio")
ATAC_NPC_AR_table_merge_melt$time <- as.numeric(as.character(ATAC_NPC_AR_table_merge_melt$time))
mutate(ATAC_NPC_AR_table_merge_melt, time = factor(time, levels=c("17","6","3","1","0"))) %>%
  ggplot(aes(time, reorder(Peakid, Start), fill= ratio)) +
  geom_tile() + scale_fill_viridis() + 
  theme(panel.background = element_rect(fill = "white"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) + 
  labs(title="ATAC-seq (iXist-ChrX-Dom) - allelic ratio REs over time course") +
  coord_flip()

#plot AR heatmap for cC3 YY1-T7 ChIP
YY1_T7ChIP_aF1_AR_table_melt <- YY1_T7ChIP_aF1_AR_table_melt %>% arrange(desc(YY1_T7ChIP_aF1_AR_table_melt$Start))
subset(YY1_T7ChIP_aF1_AR_table_melt, 
       time %in% c("aF1_d0_ChIP", "aF1_d3_ChIP","aF1_d5_ChIP","aF1_d7_ChIP","aF1_d18_ChIP")) %>%
  ggplot(aes(rev(time), reorder(Peakid, Start), fill= ratio)) + 
  theme(axis.text.y = element_blank(), panel.background = element_rect(fill = "white")) +  
  geom_tile() + scale_fill_viridis()  +
  theme(panel.background = element_rect(fill = "white"),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) + 
  labs(title="YY1 ChIP-seq (aF1) - allelic ratio REs over time course") +
  coord_flip()

#Rotate axes for aF1 YY1-ChIP
data.table::setorderv(YY1_T7ChIP_aF1_AR_table_melt, cols = c("Start"))
subset(YY1_T7ChIP_aF1_AR_table_melt, 
       time %in% c("aF1_d0_ChIP", "aF1_d3_ChIP","aF1_d5_ChIP","aF1_d7_ChIP","aF1_d18_ChIP")) %>%
  ggplot(aes(time, Peakid, fill= ratio)) + 
  theme(axis.text.y = element_blank(),panel.background = element_rect(fill = "white")) +
  geom_tile() + scale_fill_viridis() +
  theme(panel.background = element_rect(fill = "white"),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank()) +
  labs(title="YY1 ChIP-seq (aF1) - allelic ratio REs over time course")

YY1 vs OCT4 ChIP

Load YY1 d0 and d6 ES sample

YY1_OCT4_path <- "/path/to/ftCounts/output/YY1vOCT4/" 
YY1_counts_table <- read_delim(paste0(YY1_OCT4_path,"YY1_mESC_d6_ftCounts"), 
                                      delim = "\t", escape_double = FALSE, trim_ws = TRUE, skip = 1)
#colnames(YY1_counts_table) #check column names
colnames(YY1_counts_table) <- c("Peakid", "Chr","Start", "End","Strand","Length",
            "C7H8_YY1ChIP_ES_Rep1_d0","C7H8_YY1ChIP_ES_Rep1_d0.genome1","C7H8_YY1ChIP_ES_Rep1_d0.genome2",
            "C7H8_YY1ChIP_ES_Rep1_d6","C7H8_YY1ChIP_ES_Rep1_d6.genome1","C7H8_YY1ChIP_ES_Rep1_d6.genome2",
            "C7H8_YY1ChIP_ES_Rep2_d0","C7H8_YY1ChIP_ES_Rep2_d0.genome1","C7H8_YY1ChIP_ES_Rep2_d0.genome2",
            "C7H8_YY1ChIP_ES_Rep2_d6","C7H8_YY1ChIP_ES_Rep2_d6.genome1","C7H8_YY1ChIP_ES_Rep2_d6.genome2")

YY1_allelicFilter <- Allelic.Filter.OverFracSamples(YY1_counts_table[,1:18],10,0.8)
YY1_counts_table <- YY1_counts_table[which(YY1_counts_table$Peakid %in% YY1_allelicFilter$Peakid),]
YY1_AR_table <- AR.table.reverse(YY1_counts_table)
YY1_AR_table <- initialAR.filter(YY1_AR_table,0.2,0.8)


colnames(YY1_AR_table)[7:10] <- c("C7H8_YY1ChIP_ES_Rep1_d0","C7H8_YY1ChIP_ES_Rep1_d6",
                                  "C7H8_YY1ChIP_ES_Rep2_d0","C7H8_YY1ChIP_ES_Rep2_d6")
library(reshape2)
YY1_AR_table_melt <- melt.data.frame(YY1_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length"))
colnames(YY1_AR_table_melt)[7:8] <- c("time", "ratio")

YY1_box <- mutate(YY1_AR_table_melt, time = factor(time, 
                                                   levels=c("C7H8_YY1ChIP_ES_Rep1_d0","C7H8_YY1ChIP_ES_Rep1_d6",
                                                            "C7H8_YY1ChIP_ES_Rep2_d0","C7H8_YY1ChIP_ES_Rep2_d6"))) %>%
  ggplot(aes(x=time,y=ratio)) + 
  geom_boxplot() + theme_bw() + scale_y_continuous(expand = c(0.0, 0),limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  theme(axis.ticks = element_line(colour = "grey27"))
YY1_box

Load OCT4 d0 and d6 ES samples

OCT4_counts_table <- read_delim(paste0(YY1_OCT4_path,"OCT4_ftCounts_chrX_allelic"), 
                                      delim = "\t", escape_double = FALSE, trim_ws = TRUE, skip = 1)
#colnames(OCT4_counts_table) #check column names
OCT4_counts_table <- OCT4_counts_table[,c(1:6,30,28:29,
                                          9,7:8,
                                          12,10:11,
                                          15,13:14)]
colnames(OCT4_counts_table) <- c("Peakid", "Chr","Start", "End","Strand","Length",
              "C7H8_ES_OCT4_rep1","C7H8_ES_OCT4_rep1.genome1","C7H8_ES_OCT4_rep1.genome2",
              "C7H8_ES_6ddox_OCT4_rep1","C7H8_ES_6ddox_OCT4_rep1.genome1","C7H8_ES_6ddox_OCT4_rep1.genome2",
              "C7H8_ES_OCT4_rep2","C7H8_ES_OCT4_rep2.genome1","C7H8_ES_OCT4_rep2.genome2",
              "C7H8_ES_6ddox_OCT4_rep2","C7H8_ES_6ddox_OCT4_rep2.genome1","C7H8_ES_6ddox_OCT4_rep2.genome2")

OCT4_allelicFilter <- Allelic.Filter.OverFracSamples(OCT4_counts_table,10,0.8)
OCT4_counts_table <- OCT4_counts_table[which(OCT4_counts_table$Peakid %in% OCT4_allelicFilter$Peakid),]
OCT4_AR_table <- AR.table.reverse(OCT4_counts_table)
OCT4_AR_table_filt <- OCT4_AR_table
OCT4_AR_table_filt[,7] <- 0.5*(OCT4_AR_table[,7] + OCT4_AR_table[,9])

OCT4_AR_table_filt <- initialAR.filter(OCT4_AR_table_filt,0.2,0.8)
OCT4_AR_table <- OCT4_AR_table[which(OCT4_AR_table$Peakid %in% OCT4_AR_table_filt$Peakid),]
colnames(OCT4_AR_table)[7:10] <- c("C7H8_ES_OCT4_rep1","C7H8_ES_6ddox_OCT4_rep1",
                                   "C7H8_ES_OCT4_rep2","C7H8_ES_6ddox_OCT4_rep2")

OCT4_AR_table_melt <- melt.data.frame(OCT4_AR_table, id=c("Peakid","Chr","Start","End","Strand","Length"))
colnames(OCT4_AR_table_melt)[7:8] <- c("time", "ratio")

OCT4_box <- mutate(OCT4_AR_table_melt, 
                   time = factor(time, levels=c("C7H8_ES_OCT4_rep1","C7H8_ES_6ddox_OCT4_rep1","C7H8_ES_OCT4_rep2","C7H8_ES_6ddox_OCT4_rep2"))) %>%
  ggplot(aes(x=time,y=ratio)) + 
  geom_boxplot() + theme_bw() + scale_y_continuous(expand = c(0.0, 0),limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") + 
  theme(axis.ticks = element_line(colour = "grey27"))
OCT4_box

grid.arrange(YY1_box, OCT4_box, nrow = 1)

Dox-NoDox

# Merge YY1 and calculate Dox minus NoDox
YY1_AR_table_merge <- data.frame(YY1_AR_table[,1:6], 
                          0.5*(YY1_AR_table$C7H8_YY1ChIP_ES_Rep1_d0 + YY1_AR_table$C7H8_YY1ChIP_ES_Rep2_d0),
                          0.5*(YY1_AR_table$C7H8_YY1ChIP_ES_Rep1_d6 + YY1_AR_table$C7H8_YY1ChIP_ES_Rep1_d6))
colnames(YY1_AR_table_merge)[7:8] <- c("C7H8_YY1_ES_d0","C7H8_YY1_ES_d6")

YY1_AR_table_merge_DoxMinusNoDox <- data.frame(YY1_AR_table_merge[,1:6],
                                   YY1_AR_table_merge$C7H8_YY1_ES_d6 - YY1_AR_table_merge$C7H8_YY1_ES_d0)
colnames(YY1_AR_table_merge_DoxMinusNoDox)[7] <- "C7H8_YY1_d6minusd0"

#plot violin plot
YY1_AR_table_merge_DoxMinusNoDox_melt <- melt.data.frame(YY1_AR_table_merge_DoxMinusNoDox,
                                                         id=c("Peakid","Chr","Start","End","Strand","Length"))
colnames(YY1_AR_table_merge_DoxMinusNoDox_melt)[7:8] <- c("sample", "ratio")

YY1_violin <- ggplot(data=YY1_AR_table_merge_DoxMinusNoDox_melt, aes(x=sample,y=ratio)) +
  coord_flip() + theme_bw() + theme(legend.position="bottom") + 
  geom_violin(position = position_dodge(0.9), scale="width",width=0.8) +  
  geom_boxplot(width=0.1, position = position_dodge(0.9),outlier.shape=NA) +
  scale_y_continuous(expand = c(0, 0), limits = c(-0.5, 0.2)) +
  geom_hline(yintercept=0.0, linetype="dashed") +
  geom_hline(yintercept=median(YY1_AR_table_merge_DoxMinusNoDox_melt$ratio), linetype="dashed")

# Merge OCT4 and calculate Dox minus NoDox
OCT4_AR_table_merge <- data.frame(OCT4_AR_table[,1:6],
                            0.5*(OCT4_AR_table$C7H8_ES_OCT4_rep1 + OCT4_AR_table$C7H8_ES_OCT4_rep2),
                            0.5*(OCT4_AR_table$C7H8_ES_6ddox_OCT4_rep1 + OCT4_AR_table$C7H8_ES_6ddox_OCT4_rep2))
colnames(OCT4_AR_table_merge)[7:8] <- c("C7H8_OCT4","C7H8_OCT4_d6_ES")
OCT4_AR_DoxMinusNoDox_AR <- data.frame(OCT4_AR_table_merge[,1:6],
                                       OCT4_AR_table_merge$C7H8_OCT4_d6_ES - OCT4_AR_table_merge$C7H8_OCT4)
colnames(OCT4_AR_DoxMinusNoDox_AR)[7] <- "OCT4_C7H8_d6minusd0"

#plot violin plot
OCT4_AR_DoxMinusNoDox_AR_melt <- melt.data.frame(OCT4_AR_DoxMinusNoDox_AR,
                                                 id=c("Peakid","Chr","Start","End","Strand","Length"))
colnames(OCT4_AR_DoxMinusNoDox_AR_melt)[7:8] <- c("sample", "ratio")

OCT4_violin <- ggplot(data=OCT4_AR_DoxMinusNoDox_AR_melt, aes(x=sample,y=ratio)) +
  coord_flip() + theme_bw() + theme(legend.position="bottom") + 
  geom_violin(position = position_dodge(0.9), scale="width",width=0.8) +  
  geom_boxplot(width=0.1, position = position_dodge(0.9),outlier.shape=NA) +
  scale_y_continuous(expand = c(0, 0), limits = c(-0.5, 0.2)) +
  geom_hline(yintercept=0.0, linetype="dashed")  +
  geom_hline(yintercept=median(OCT4_AR_DoxMinusNoDox_AR_melt$ratio), linetype="dashed")

grid.arrange(YY1_violin, OCT4_violin, nrow = 2)