Load required packages and functions

#Packages
library(readr)
library(stringr)
library(ggplot2)
library(ggridges)
library(stringr)
library(dplyr)
library(forcats)
library(minpack.lm)
library(mefa4)
library(reshape)
library(gridExtra)
library(ggpubr)

#Functions
LoadingCountFile <- function(file=""){
  Count <- read.table(file, header=T)
  return(Count)
}
Allelic.Ratio <- function(g1_counts,g2_counts){
  Xa <- as.numeric(g1_counts)
  Xi <- as.numeric(g2_counts)
  AllelicRatio <- ( Xi / (Xi + Xa))
  return(AllelicRatio)
}
Allelic.Ratio.Reverse <- function(g1_counts,g2_counts){
  Xi <- as.numeric(g1_counts)
  Xa <- as.numeric(g2_counts)
  AllelicRatio <- ( Xi / (Xi + Xa))
  return(AllelicRatio)
}
AR.table.reverse <- function(counts_table){
  AllelicRatio_table <- counts_table[1:6]
  for (j in seq(8,ncol(counts_table),3)){
    AllelicRatio_table <- cbind(AllelicRatio_table,
                                Allelic.Ratio.Reverse(as.matrix(counts_table[j]),
                                                      as.matrix(counts_table[j+1]) ) )
  }
  return(AllelicRatio_table)
}
AR.table <- function(counts_table){
  AllelicRatio_table <- counts_table[1:6]
  for (j in seq(8,ncol(counts_table),3)){
    AllelicRatio_table <- cbind(AllelicRatio_table,
                                Allelic.Ratio(as.matrix(counts_table[j]),
                                              as.matrix(counts_table[j+1]) ) )
  }
  return(AllelicRatio_table)
}
chrX1.filter <- function(counts_table){
  chrRNA_counts_table_chrX <- filter(counts_table, !grepl("chrX",counts_table$Chr))
  chrRNA_counts_table_chrX <-  counts_table[which(grepl("chrX",counts_table$Chr) == TRUE), ]
  numextract <- function(string){
    str_extract(string, "\\-*\\d+\\.*\\d*")
  }
  chrRNA_chrX1_filter_End <- cbind(chrRNA_counts_table_chrX[1], as.numeric(numextract(chrRNA_counts_table_chrX$End)))
  chrRNA_chrX1_filter_End <- chrRNA_chrX1_filter_End[which(chrRNA_chrX1_filter_End$`as.numeric(numextract(chrRNA_counts_table_chrX$End))` < 103483233), ]
  chrRNA_filter_chrX1 <- counts_table[
    which(counts_table$Geneid %in% chrRNA_chrX1_filter_End$Geneid ), ]
  return(chrRNA_filter_chrX1)
}
chrRNA.RPM <- function(counts_table){
  #Calculate sum of counts for each sample and divide through (per million) to make RPM
  countsSums <- colSums(counts_table[7:ncol(counts_table)])
  GeneNormFactors <- (rep(countsSums[seq(1,length(countsSums),3)], each=3))/1000000
  chrRNA_RPM <- cbind(counts_table[1:6], sweep(counts_table[7:ncol(counts_table)], 2, GeneNormFactors, "/"))
  return(chrRNA_RPM)
}
ARtable.melt <- function(ARtable){
  ARtable_melt <- melt(ARtable[,c(1:ncol(ARtable))], id=c("Geneid","Chr","Start","End","Strand","Length","YY1target"))
  sample_info <- cbind(
    str_extract(ARtable_melt[,(ncol(ARtable_melt)-1)], "[^_]*$"),
    str_extract(ARtable_melt[,(ncol(ARtable_melt)-1)], ".+?(?=_)"),
    str_extract(ARtable_melt[,(ncol(ARtable_melt)-1)], c("ES|NPC")))
  colnames(sample_info) <- c("day","sample","protocol")
  sample_info <- as.data.frame(sample_info)
  ARtable_melt <- cbind(ARtable_melt[1:(ncol(ARtable_melt)-2)],
                        sample_info$day,
                        sample_info$sample,
                        sample_info$protocol,
                        ARtable_melt$value)
  colnames(ARtable_melt)[(ncol(ARtable_melt)-3):ncol(ARtable_melt)] <- c("day","sample","protocol","ratio")
  return(ARtable_melt)
}
RPMtable.melt <- function(RPMtable){
  RPMtable_melt <- melt(RPMtable[,c(1:ncol(RPMtable))], id=c("Geneid","Chr","Start","End","Strand","Length"))
  sample_info <- as.data.frame(
    cbind(
      str_extract(RPMtable_melt[,(ncol(RPMtable_melt)-1)], c("cC3|aF1")),
      str_extract(RPMtable_melt[,(ncol(RPMtable_melt)-1)], c("d0|2dDox|6dDox")),
      str_extract(RPMtable_melt[,(ncol(RPMtable_melt)-1)], c("un|dTag52h")),
      str_extract(RPMtable_melt[,(ncol(RPMtable_melt)-1)], c("genome1|genome2|sorted"))))
  colnames(sample_info) <- c("line","day","treatment","allele")
  sample_info <- as.data.frame(sample_info)
  sample_info[sample_info == "sorted"] <- "total"
  RPMtable_melt <- data.frame(RPMtable_melt[1:(ncol(RPMtable_melt)-2)],
                               sample_info$line,
                               sample_info$day,
                               sample_info$treatment,
                               sample_info$allele,
                              RPMtable_melt$value)
  colnames(RPMtable_melt)[(ncol(RPMtable_melt)-4):ncol(RPMtable_melt)]  <- c("line","day","treatment","allele","RPM")
  return(RPMtable_melt)
}
AllelicFoldChangetable.melt <- function(AllelicFoldChangetable){
  AllelicFoldChangetable_melt <- melt(AllelicFoldChangetable[,c(1:ncol(AllelicFoldChangetable))], id=c("Geneid","Chr","Start","End","Strand","Length","YY1target"))
  sample_info <- as.data.frame(
    cbind(
      str_extract(AllelicFoldChangetable_melt[,(ncol(AllelicFoldChangetable_melt)-1)], c("cC3|aF1")),
      str_extract(AllelicFoldChangetable_melt[,(ncol(AllelicFoldChangetable_melt)-1)], c("d0|2d|6d")),
      str_extract(AllelicFoldChangetable_melt[,(ncol(AllelicFoldChangetable_melt)-1)], c("g1|g2"))) )
  colnames(sample_info) <- c("line","day","allele")
  sample_info <- as.data.frame(sample_info)
  AllelicFoldChangetable_melt <- data.frame(AllelicFoldChangetable_melt[1:(ncol(AllelicFoldChangetable_melt)-2)],
                                            sample_info$line,
                                            sample_info$day,
                                            sample_info$allele,
                                            AllelicFoldChangetable_melt$value)
  colnames(AllelicFoldChangetable_melt)[(ncol(AllelicFoldChangetable_melt)-3):ncol(AllelicFoldChangetable_melt)]  <- c("line","treatment","allele","log2foldchange")
  return(AllelicFoldChangetable_melt)
}

Load data

data_path <- "/path/to/folder/with/chrRNA/data/"
chrRNA_counts_path <- "/path/to/folder/with/iXistChrX_YY1-FKBP_chrRNA/data/"
GeneInfo_A11B2 <- read_delim(paste0(data_path,"GeneInfo_A11B2.txt"),
                             "\t", escape_double = FALSE, col_names = TRUE, trim_ws = TRUE)
colnames(GeneInfo_A11B2) <-  c("Geneid","Halftime","Starts_numeric","Dist_Xist","InitialTPM",
                               "Distance_Xist","Expression_group","SilencingRate","SmcHD1_dependence")

GeneInfo_C7H8 <- read_delim(paste0(data_path,"GeneInfo_C7H8.txt"), 
                            "\t", escape_double = FALSE, col_names = TRUE, trim_ws = TRUE)
colnames(GeneInfo_C7H8) <-  c("Geneid","Halftime","Starts_numeric","Dist_Xist","InitialTPM",
                              "Distance_Xist","Expression_group","SilencingRate","SmcHD1_dependence")

#Load in files of TSS which have YY1 binding at promoter
GeneTSS_YY1prom <- read.delim(paste0(data_path,"GeneTSS_YY1prom.bed"), header=FALSE)

Load counts files for cC3 and aF1

#Load in cC3
cC3_chrRNA_ES_d0_un <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_d0_un.sortn.count"))
cC3_chrRNA_ES_d0_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_d0_dTag52h.sortn.count"))
cC3_chrRNA_ES_2dDox_un <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_2dDox_un.sortn.count"))
cC3_chrRNA_ES_2dDox_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_2dDox_dTag52h.sortn.count"))
cC3_chrRNA_ES_6dDox_un <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_6dDox_un.sortn.count"))
cC3_chrRNA_ES_6dDox_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"cC3_chrRNA_ES_6dDox_dTag52h.sortn.count"))

cC3_counts_table <- cbind(cC3_chrRNA_ES_d0_un[1:9], cC3_chrRNA_ES_d0_dTag52h[7:9],
                          cC3_chrRNA_ES_2dDox_un[7:9],cC3_chrRNA_ES_2dDox_dTag52h[7:9],
                          cC3_chrRNA_ES_6dDox_un[7:9],cC3_chrRNA_ES_6dDox_dTag52h[7:9])

aF1_chrRNA_ES_d0_un <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_d0_un.sortn.count"))
aF1_chrRNA_ES_d0_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_d0_dTag52h.sortn.count"))
aF1_chrRNA_ES_2dDox_un <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_2dDox_un.sortn.count"))
aF1_chrRNA_ES_2dDox_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_2dDox_dTag52h.sortn.count"))
aF1_chrRNA_ES_6dDox_un <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_6dDox_un.sortn.count"))
aF1_chrRNA_ES_6dDox_dTag52h <- LoadingCountFile(paste0(chrRNA_counts_path,"aF1_chrRNA_ES_6dDox_dTag52h.sortn.count"))

aF1_counts_table <- cbind(aF1_chrRNA_ES_d0_un[1:9], aF1_chrRNA_ES_d0_dTag52h[7:9],
                          aF1_chrRNA_ES_2dDox_un[7:9],aF1_chrRNA_ES_2dDox_dTag52h[7:9],
                          aF1_chrRNA_ES_6dDox_un[7:9],aF1_chrRNA_ES_6dDox_dTag52h[7:9])

Allelic Analysis

#cC3
cC3_counts_table_filt <- cC3_counts_table[which(cC3_counts_table$Geneid %in% GeneInfo_C7H8$Geneid),]
cC3_AR_table <- AR.table.reverse(cC3_counts_table_filt)
colnames(cC3_AR_table)[7:ncol(cC3_AR_table)] <- c("cC3ctrl_ES_d0","cC3dTAG_ES_d0",
                                                  "cC3ctrl_ES_d2","cC3dTAG_ES_d2",
                                                  "cC3ctrl_ES_d6","cC3dTAG_ES_d6")
cC3_AR_table_byYY1 <-  mutate(cC3_AR_table,
                               YY1target = case_when(
                                 cC3_AR_table$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
                                 !(cC3_AR_table$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1")  )

#aF1
aF1_counts_table_filt <- aF1_counts_table[which(aF1_counts_table$Geneid %in% GeneInfo_A11B2$Geneid),]
aF1_AR_table <- AR.table(aF1_counts_table_filt)
colnames(aF1_AR_table)[7:ncol(aF1_AR_table)] <- c("aF1ctrl_ES_d0","aF1dTAG_ES_d0",
                                                  "aF1ctrl_ES_d2","aF1dTAG_ES_d2",
                                                  "aF1ctrl_ES_d6","aF1dTAG_ES_d6")
aF1_AR_table_byYY1 <-  mutate(aF1_AR_table,
                              YY1target = case_when(
                                aF1_AR_table$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
                                !(aF1_AR_table$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1")  )

#melt 
cC3_AR_table_byYY1_melt <- ARtable.melt(cC3_AR_table_byYY1)
aF1_AR_table_byYY1_melt <- ARtable.melt(aF1_AR_table_byYY1)

Plots

Allelic ratio boxplots

#plot just +/- dTAG13
cC3_boxplot <- mutate(cC3_AR_table_byYY1_melt, sample = factor(sample, levels=c("cC3ctrl","cC3dTAG"))) %>%
  mutate(day = factor(day, levels=c("d0","d2","d6"))) %>%
  ggplot(aes(x=day,y=ratio,fill=sample)) + theme_bw() + 
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") +  stat_compare_means(method = "t.test",label.y = 0.9,paired = TRUE)

aF1_boxplot <- mutate(aF1_AR_table_byYY1_melt, sample = factor(sample, levels=c("aF1ctrl","aF1dTAG"))) %>%
  mutate(day = factor(day, levels=c("d0","d2","d6"))) %>%
  ggplot(aes(x=day,y=ratio,fill=sample)) + theme_bw() + 
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") +  stat_compare_means(method = "t.test",label.y = 0.9,paired = TRUE)
aF1_boxplot

cC3_boxplot

#plot boxplot by Yy1target
cC3_boxplot_byYY1 <- subset(cC3_AR_table_byYY1_melt, sample = factor(sample, levels=c("cC3ctrl","cC3dTAG")))  %>%
  mutate(YY1target = factor(YY1target, levels=c("YY1","nonYY1"))) %>%
  mutate(day = factor(day, levels=c("d0","d2","d6"))) %>%
  ggplot(aes(x=day,y=ratio,fill=sample)) + 
  facet_grid(cols = vars(YY1target)) + theme_bw() + coord_cartesian(ylim=c(0,0.7)) + 
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") 


#plot boxplot by Yy1target
aF1_boxplot_byYY1 <- subset(aF1_AR_table_byYY1_melt, sample = factor(sample, levels=c("aF1ctrl","aF1dTAG")))  %>%
  mutate(YY1target = factor(YY1target, levels=c("YY1","nonYY1"))) %>%
  mutate(day = factor(day, levels=c("d0","d2","d6"))) %>%
  ggplot(aes(x=day,y=ratio,fill=sample)) + 
  facet_grid(cols = vars(YY1target)) + theme_bw() + coord_cartesian(ylim=c(0,0.7)) + 
  geom_boxplot() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed")

aF1_boxplot_byYY1

cC3_boxplot_byYY1

Reads Per Million - Xist and other genes

#cC3
cC3_RPM_table <- chrRNA.RPM(cC3_counts_table)
cC3_RPM_table <- as.matrix(cC3_RPM_table[,c(seq(7,ncol(cC3_RPM_table),3))])
colnames(cC3_RPM_table) <- colnames(cC3_AR_table)[7:ncol(cC3_AR_table)]
cC3_RPM_table <- as.data.frame(cC3_RPM_table)
cC3_RPM_table <- cbind(cC3_counts_table$Geneid, cC3_RPM_table)
colnames(cC3_RPM_table) <- c("Geneid","cC3_d0_un","cC3_d0_dTAG","cC3_d2_un","cC3_d2_dTAG","cC3_d6_un","cC3_d6_dTAG")
sample <- c("Geneid","cC3_d0_un","cC3_d0_dTAG","cC3_d2_un","cC3_d2_dTAG","cC3_d6_un","cC3_d6_dTAG")
cC3_RPM_table <- as.data.frame(cC3_RPM_table)
cC3_RPM_table_melt <- melt(cC3_RPM_table, id=c("Geneid"))
colnames(cC3_RPM_table_melt)[2:3] <- c("sample","RPM")

cC3_Xist <- ggplot(subset(cC3_RPM_table_melt, Geneid %in% c("Xist")), aes(x=sample, y=RPM)) + ylim(0,8000) +
  geom_bar(stat = "identity", width=0.4) + theme_bw() + facet_wrap(~Geneid, scales="free_y")

#aF1
aF1_RPM_table <- chrRNA.RPM(aF1_counts_table)
aF1_RPM_table <- as.matrix(aF1_RPM_table[,c(seq(7,ncol(aF1_RPM_table),3))])
colnames(aF1_RPM_table) <- colnames(aF1_AR_table)[7:ncol(aF1_AR_table)]
aF1_RPM_table <- as.data.frame(aF1_RPM_table)
aF1_RPM_table <- cbind(aF1_counts_table$Geneid, aF1_RPM_table)
colnames(aF1_RPM_table) <- c("Geneid","aF1_d0_un","aF1_d0_dTAG","aF1_d2_un","aF1_d2_dTAG","aF1_d6_un","aF1_d6_dTAG")
sample <- c("Geneid","aF1_d0_un","aF1_d0_dTAG","aF1_d2_un","aF1_d2_dTAG","aF1_d6_un","aF1_d6_dTAG")
aF1_RPM_table <- as.data.frame(aF1_RPM_table)
aF1_RPM_table_melt <- melt(aF1_RPM_table, id=c("Geneid"))
colnames(aF1_RPM_table_melt)[2:3] <- c("sample","RPM")

aF1_Xist <- ggplot(subset(aF1_RPM_table_melt, Geneid %in% c("Xist")), aes(x=sample, y=RPM)) + ylim(0,8000) +
  geom_bar(stat = "identity", width=0.4) + theme_bw() + facet_wrap(~Geneid, scales="free_y")

grid.arrange(aF1_Xist, cC3_Xist, nrow = 1)

#Check some other genes involved in XCI in case they can explain the greater silencing in YY1 dTAG
cC3_Genes <- ggplot(subset(cC3_RPM_table_melt, Geneid %in% c("Spen","Pcgf3","Hnrnpk","Xist","Zfp42","Rnf12","Rlim")), 
                    aes(x=sample, y=RPM)) +
  geom_bar(stat = "identity", width=0.4) + theme_bw() + facet_wrap(~Geneid, scales="free_y")
cC3_Genes

Allelic RPM

cC3_RPM_table <- as.data.frame(chrRNA.RPM(cC3_counts_table))
aF1_RPM_table <- as.data.frame(chrRNA.RPM(aF1_counts_table))

cC3_RPM_table_all <- cC3_RPM_table[,c(1,7:ncol(cC3_RPM_table))]
RPM_table_all <- merge(cC3_RPM_table,aF1_RPM_table)
RPM_table_melt <- RPMtable.melt(RPM_table_all)

#Ddx3x, Hcfc1, Ogt, Rbmx
cC3_allelic <- subset(RPM_table_melt, Geneid %in% c("Hcfc1","Ddx3x","Ogt","Rbmx","Xist"))  %>%
  subset(allele %in% c("genome1","genome2")) %>%
  mutate(allele = factor(allele, levels=c("genome2","genome1"))) %>%
  subset(line %in% c("cC3")) %>%
  mutate(day = factor(day, levels=c("d0","2dDox","6dDox"))) %>%
  mutate(treatment = factor(treatment, levels=c("un","dTag52h"))) %>%
  ggplot(aes(x=day,y=RPM,fill=treatment)) +
  geom_bar(stat = "identity", width=0.4, position="dodge") + 
  facet_grid(rows=vars(Geneid), cols= vars(allele),scales="free_y") + theme_bw()

cC3_allelic

aF1_allelic <-  subset(RPM_table_melt, Geneid %in% c("Hcfc1","Ddx3x","Ogt","Rbmx","Xist"))  %>%
  subset(allele %in% c("genome1","genome2")) %>%
  mutate(allele = factor(allele, levels=c("genome1","genome2"))) %>%
  subset(line %in% c("aF1")) %>%
  mutate(day = factor(day, levels=c("d0","2dDox","6dDox"))) %>%
  mutate(treatment = factor(treatment, levels=c("un","dTag52h"))) %>%
  ggplot(aes(x=day,y=RPM,fill=treatment)) +
  geom_bar(stat = "identity", width=0.4, position="dodge") + 
  facet_grid(rows=vars(Geneid), cols= vars(allele),scales="free_y") + theme_bw()

aF1_allelic

Allelic Log2 Fold Change plot

library(IDPmisc) #need to remove NAs in logFC tables

#aF1
aF1_RPM_table_filt <- aF1_RPM_table[which(aF1_RPM_table$Geneid %in% GeneInfo_A11B2$Geneid),]
aF1_Allelic_FoldChange <- data.frame(aF1_RPM_table_filt[,c(1:6)],
                                     log2(aF1_RPM_table_filt$aF1_chrRNA_ES_d0_dTag52h.genome1.sorted.bam / 
                                            aF1_RPM_table_filt$aF1_chrRNA_ES_d0_un.genome1.sorted.bam),
                                     log2(aF1_RPM_table_filt$aF1_chrRNA_ES_d0_dTag52h.genome2.sorted.bam / 
                                            aF1_RPM_table_filt$aF1_chrRNA_ES_d0_un.genome2.sorted.bam),
                                     log2(aF1_RPM_table_filt$aF1_chrRNA_ES_2dDox_dTag52h.genome1.sorted.bam /
                                            aF1_RPM_table_filt$aF1_chrRNA_ES_2dDox_un.genome1.sorted.bam),
                                     log2(aF1_RPM_table_filt$aF1_chrRNA_ES_2dDox_dTag52h.genome2.sorted.bam /
                                            aF1_RPM_table_filt$aF1_chrRNA_ES_2dDox_un.genome2.sorted.bam),
                                     log2(aF1_RPM_table_filt$aF1_chrRNA_ES_6dDox_dTag52h.genome1.sorted.bam /
                                            aF1_RPM_table_filt$aF1_chrRNA_ES_6dDox_un.genome1.sorted.bam),
                                     log2(aF1_RPM_table_filt$aF1_chrRNA_ES_6dDox_dTag52h.genome2.sorted.bam /
                                            aF1_RPM_table_filt$aF1_chrRNA_ES_6dDox_un.genome2.sorted.bam))
colnames(aF1_Allelic_FoldChange)[7:ncol(aF1_Allelic_FoldChange)] <- c("aF1_d0_g1","aF1_d0_g2",
                                                                      "aF1_2d_g1","aF1_2d_g2",
                                                                      "aF1_6d_g1","aF1_6d_g2")
aF1_Allelic_FoldChange_filt <- NaRV.omit(aF1_Allelic_FoldChange)
aF1_Allelic_FoldChange_yy1 <- mutate(aF1_Allelic_FoldChange_filt,
                                     YY1target = case_when(
                                       aF1_Allelic_FoldChange_filt$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
                                       !(aF1_Allelic_FoldChange_filt$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1")  )
aF1_Allelic_FoldChange_nonfilt_yy1 <- mutate(aF1_Allelic_FoldChange,
                                     YY1target = case_when(
                                       aF1_Allelic_FoldChange$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
                                       !(aF1_Allelic_FoldChange$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1")  )

aF1_Allelic_FoldChange_melt <- AllelicFoldChangetable.melt(aF1_Allelic_FoldChange_yy1)

#cC3
cC3_RPM_table_filt <- cC3_RPM_table[which(cC3_RPM_table$Geneid %in% GeneInfo_C7H8$Geneid),]
cC3_Allelic_FoldChange <- data.frame(cC3_RPM_table_filt[,c(1:6)],
                                     log2(cC3_RPM_table_filt$cC3_chrRNA_ES_d0_dTag52h.genome1.sorted.bam /
                                            cC3_RPM_table_filt$cC3_chrRNA_ES_d0_un.genome1.sorted.bam),
                                     log2(cC3_RPM_table_filt$cC3_chrRNA_ES_d0_dTag52h.genome2.sorted.bam /
                                            cC3_RPM_table_filt$cC3_chrRNA_ES_d0_un.genome2.sorted.bam),
                                     log2(cC3_RPM_table_filt$cC3_chrRNA_ES_2dDox_dTag52h.genome1.sorted.bam /
                                            cC3_RPM_table_filt$cC3_chrRNA_ES_2dDox_un.genome1.sorted.bam),
                                     log2(cC3_RPM_table_filt$cC3_chrRNA_ES_2dDox_dTag52h.genome2.sorted.bam /
                                            cC3_RPM_table_filt$cC3_chrRNA_ES_2dDox_un.genome2.sorted.bam),
                                     log2(cC3_RPM_table_filt$cC3_chrRNA_ES_6dDox_dTag52h.genome1.sorted.bam /
                                            cC3_RPM_table_filt$cC3_chrRNA_ES_6dDox_un.genome1.sorted.bam),
                                     log2(cC3_RPM_table_filt$cC3_chrRNA_ES_6dDox_dTag52h.genome2.sorted.bam /
                                            cC3_RPM_table_filt$cC3_chrRNA_ES_6dDox_un.genome2.sorted.bam))
colnames(cC3_Allelic_FoldChange)[7:ncol(cC3_Allelic_FoldChange)] <- c("cC3_d0_g1","cC3_d0_g2",
                                                                      "cC3_2d_g1","cC3_2d_g2",
                                                                      "cC3_6d_g1","cC3_6d_g2")
cC3_Allelic_FoldChange_filt <- NaRV.omit(cC3_Allelic_FoldChange)
cC3_Allelic_FoldChange_yy1 <- mutate(cC3_Allelic_FoldChange_filt,
                                     YY1target = case_when(
                                       cC3_Allelic_FoldChange_filt$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
                                       !(cC3_Allelic_FoldChange_filt$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1")  )
cC3_Allelic_FoldChange_nonfilt_yy1 <- mutate(cC3_Allelic_FoldChange,
                                             YY1target = case_when(
                                               cC3_Allelic_FoldChange$Geneid %in% GeneTSS_YY1prom[,4] ~ "YY1",
                                               !(cC3_Allelic_FoldChange$Geneid %in% GeneTSS_YY1prom[,4]) ~ "nonYY1")  )


cC3_Allelic_FoldChange_melt <- AllelicFoldChangetable.melt(cC3_Allelic_FoldChange_yy1)


# plot boxplots of log2foldchange
#aF1
aF1_log2foldchange <- mutate(aF1_Allelic_FoldChange_melt, treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
  ggplot(aes(x=treatment,y=log2foldchange,fill=allele) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() + 
  scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
  geom_hline(yintercept=0, linetype="dashed") 

#cC3
cC3_log2foldchange <- mutate(cC3_Allelic_FoldChange_melt, treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
  ggplot(aes(x=treatment,y=log2foldchange,fill=allele) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() + 
  scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
  geom_hline(yintercept=0, linetype="dashed") # +  stat_compare_means(method = "t.test",label.y = 3,paired = TRUE) # + facet_grid(cols= vars(YY1target))

grid.arrange(aF1_log2foldchange, cC3_log2foldchange, nrow = 1)

# plot boxplots of log2foldchange by YY1, plot all alleles
aF1_Allelic_FoldChange_melt$alleleYY1 <- interaction(aF1_Allelic_FoldChange_melt$YY1target,aF1_Allelic_FoldChange_melt$allele) 
aF1_log2foldchange_byYY1_all <- mutate(aF1_Allelic_FoldChange_melt, treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
  ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() + 
  scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
  geom_hline(yintercept=0, linetype="dashed") 

cC3_Allelic_FoldChange_melt$alleleYY1 <- interaction(cC3_Allelic_FoldChange_melt$YY1target,cC3_Allelic_FoldChange_melt$allele) 
cC3_log2foldchange_byYY1_all <- mutate(cC3_Allelic_FoldChange_melt, treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
  ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() + 
  scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
  geom_hline(yintercept=0, linetype="dashed") 

grid.arrange(aF1_log2foldchange_byYY1_all, cC3_log2foldchange_byYY1_all, nrow = 1)

# Plot only shows allele containing _iXist_, but with calculations of p values Wilcoxon rank-sum test
cC3_Xi_pvals <- subset(cC3_Allelic_FoldChange_melt, allele %in% c("g1")) %>%
  mutate(treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
  ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) + 
  theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() + 
  scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
  geom_hline(yintercept=0, linetype="dashed") + 
  stat_compare_means(method = "wilcox.test", label="p.format",label.y=1)
cC3_Xi_pvals

aF1_Xi_pvals <- subset(aF1_Allelic_FoldChange_melt, allele %in% c("g2")) %>%
  mutate(treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
  ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) + 
  theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() + 
  scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
  geom_hline(yintercept=0, linetype="dashed") + 
  stat_compare_means(method = "wilcox.test", label="p.format",label.y=1)
aF1_Xi_pvals

# p values for Xa differences with a Wilcoxon rank-sum test
cC3_Xi_pvals <- subset(cC3_Allelic_FoldChange_melt, allele %in% c("g2")) %>%
  mutate(treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
  ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) + 
  theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() + 
  scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
  geom_hline(yintercept=0, linetype="dashed") + 
  stat_compare_means(method = "wilcox.test", label="p.format",label.y=1)
cC3_Xi_pvals

aF1_Xi_pvals <- subset(aF1_Allelic_FoldChange_melt, allele %in% c("g1")) %>%
  mutate(treatment = factor(treatment, levels=c("d0","2d","6d"))) %>%
  ggplot(aes(x=treatment,y=log2foldchange,fill=alleleYY1) ) + 
  theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot() + 
  scale_y_continuous(expand = c(0, 0), limits = c(-4, 4)) +
  geom_hline(yintercept=0, linetype="dashed") + 
  stat_compare_means(method = "wilcox.test", label="p.format",label.y=1)
aF1_Xi_pvals