Load required files for analysis

ATAC_analysis_path <- "/path/to/your/ATAC/analysis/folder/"
path_to_ATAC_data <- paste0(ATAC_analysis_path, "ATAC_ftCounts_chrX1")

Load packages and functions

# Packages
library(readr)
library(stringr)
library(ggplot2)
library(ggpubr) 
library(ggridges)
library(stringr)
library(dplyr)
library(forcats)
library(minpack.lm)
library(mefa4)
library(reshape)
library(gridExtra)

# Functions
Allelic.Filter <- function(counts_table,cutoff){
  counts_table_g1 <- counts_table[,seq(8,ncol(counts_table),3)]
  counts_table_g2 <- counts_table[,seq(9,ncol(counts_table),3)]
  counts_table_g1g2 <- counts_table_g1 + counts_table_g2
  
  counts_table_g1g2_combinedtable <- cbind(counts_table[1:5],counts_table_g1g2)
  for (i in c(6:ncol(counts_table_g1g2_combinedtable))){
    counts_table_g1g2_combinedtable <- subset(counts_table_g1g2_combinedtable,
                                              counts_table_g1g2_combinedtable[i] >= cutoff)}
  counts_table_filter <- counts_table[
    which(counts_table$Start %in% counts_table_g1g2_combinedtable$Start), ]
  rm(counts_table_g1,counts_table_g2,counts_table_g1g2,counts_table_g1g2_combinedtable)
  return(counts_table_filter)
}
Allelic.Filter.OverFracSamples <- function(counts_table,cutoff,fraction){
  counts_table_g1 <- counts_table[,seq(8,ncol(counts_table),3)]
  counts_table_g2 <- counts_table[,seq(9,ncol(counts_table),3)]
  counts_g1g2 <- counts_table_g1 + counts_table_g2
  counts_table_filter <- data.frame()
  for (i in c(1:nrow(counts_g1g2))){
    pass <- length((which(counts_g1g2[i,] > 10) == "TRUE"))
    if (pass >= fraction*(ncol(counts_g1g2))){
      counts_table_filter <- rbind(counts_table_filter,counts_table[i,])
    }
  }
  return(counts_table_filter)
}
Allelic.Ratio <- function(g1_counts,g2_counts){
  Xa <- as.numeric(g1_counts)
  Xi <- as.numeric(g2_counts)
  AllelicRatio <- ( Xi / (Xi + Xa))
  return(AllelicRatio)
}
AR.table <- function(counts_table){
  AllelicRatio_table <- counts_table[1:6]
  for (j in seq(8,ncol(counts_table),3)){
    AllelicRatio_table <- cbind(AllelicRatio_table,
                                Allelic.Ratio(as.matrix(counts_table[j]),
                                              as.matrix(counts_table[j+1]) ) )
  }
  return(AllelicRatio_table)
}
extract.samplenames <- function(counts_table){
  column_names <- colnames(counts_table)[seq(7,ncol(counts_table),3)]
  samplenames <- sub(".sorted.bam", "", column_names)
  return(samplenames)
}
initialAR.filter <- function(AR_table,lower,upper){
  AR_table_filter1 <- subset(AR_table,
                             lower <= AR_table[7])
  AR_table_filter2 <- subset(AR_table_filter1,
                             upper >= AR_table_filter1[7])
  return(AR_table_filter2)
}

Read in the counts matrics (the output from running featurecounts over ATAC-seq peak set)

ATAC_NPC_counts_table <- read_delim(path_to_ATAC_data, "\t", escape_double = FALSE, trim_ws = TRUE,skip = 1)
colnames(ATAC_NPC_counts_table) <- c("Peakid", "Chr","Start", "End","Strand","Length",
                                     "NPC_0d_Rep1","NPC_0d_Rep1.genome1","NPC_0d_Rep1.genome2",
                                     "NPC_0d_Rep2","NPC_0d_Rep2.genome1","NPC_0d_Rep2.genome2",
                                     "NPC_0d_Rep3","NPC_0d_Rep3.genome1","NPC_0d_Rep3.genome2",
                                     "NPC_1d_Rep1","NPC_1d_Rep1.genome1","NPC_1d_Rep1.genome2",
                                     "NPC_1d_Rep2","NPC_1d_Rep2.genome1","NPC_1d_Rep2.genome2",
                                     "NPC_3d_Rep1","NPC_3d_Rep1.genome1","NPC_3d_Rep1.genome2",
                                     "NPC_3d_Rep2","NPC_3d_Rep2.genome1","NPC_3d_Rep2.genome2",
                                     "NPC_6d_Rep1","NPC_6d_Rep1.genome1","NPC_6d_Rep1.genome2",
                                     "NPC_6d_Rep2","NPC_6d_Rep2.genome1","NPC_6d_Rep2.genome2",
                                     "NPC_17d_Rep1","NPC_17d_Rep1.genome1","NPC_17d_Rep1.genome2",
                                     "NPC_17d_Rep2","NPC_17d_Rep2.genome1","NPC_17d_Rep2.genome2")
samplenames <- c("NPC_0d_Rep1","NPC_0d_Rep2","NPC_0d_Rep3",
                 "NPC_1d_Rep1","NPC_1d_Rep2",
                 "NPC_3d_Rep1","NPC_3d_Rep2",
                 "NPC_6d_Rep1","NPC_6d_Rep2",
                 "NPC_17d_Rep1","NPC_17d_Rep2")
samples_merged <- c("NPC_0d","NPC_1d","NPC_3d","NPC_6d","NPC_17d")

Apply allelic filters and make allelic ratio table:

#filter to only keep peaks with at least 10 allelic reads in >80% of samples
ATAC_NPC_counts_table_prefilter <- ATAC_NPC_counts_table
ATAC_NPC_counts_table <- Allelic.Filter.OverFracSamples(ATAC_NPC_counts_table,10,0.8)
#calculate allelic ratio for each peak
ATAC_NPC_AR_table <- AR.table(ATAC_NPC_counts_table)
colnames(ATAC_NPC_AR_table)[7:ncol(ATAC_NPC_AR_table)] <- samplenames

#merge d0 replicates to apply "initial allelic ratio filter" and remove constitutive monoallelic peaks, which are likely artifacts of SNP misannotation
ATAC_0h_AR_table_merge0d <- cbind(ATAC_NPC_AR_table[1:6],
                                (1/3)*(ATAC_NPC_AR_table$NPC_0d_Rep1 + ATAC_NPC_AR_table$NPC_0d_Rep2 + ATAC_NPC_AR_table$NPC_0d_Rep3))
ATAC_0h_AR_table_merge0d <- initialAR.filter(ATAC_0h_AR_table_merge0d,0.2,0.8)
ATAC_NPC_AR_table <- ATAC_NPC_AR_table[which(
  ATAC_NPC_AR_table$Peakid %in% ATAC_0h_AR_table_merge0d$Peakid),]
rm(ATAC_0h_AR_table_merge0d)

Plots of Allelic Ratio and agreement between replicates

#simple allelic ratio boxplot for all individual samples
boxplot.matrix(as.matrix(ATAC_NPC_AR_table[7:ncol(ATAC_NPC_AR_table)]), use.cols=TRUE,
               ylab = 'chrRNA Allelic Ratio (Xi/(Xi+Xa))',
               ylim=c(0,1))
abline(h=0.5)

ATAC_NPC_AR_table_merge <- cbind(ATAC_NPC_AR_table[1:6],
                                     (1/3)*(ATAC_NPC_AR_table$NPC_0d_Rep1 + ATAC_NPC_AR_table$NPC_0d_Rep2 + ATAC_NPC_AR_table$NPC_0d_Rep3),
                                     0.5*(ATAC_NPC_AR_table$NPC_1d_Rep1 + ATAC_NPC_AR_table$NPC_1d_Rep2),
                                     0.5*(ATAC_NPC_AR_table$NPC_3d_Rep1 + ATAC_NPC_AR_table$NPC_3d_Rep2),
                                     0.5*(ATAC_NPC_AR_table$NPC_6d_Rep1 + ATAC_NPC_AR_table$NPC_6d_Rep2),
                                     0.5*(ATAC_NPC_AR_table$NPC_17d_Rep1 + ATAC_NPC_AR_table$NPC_17d_Rep2))
colnames(ATAC_NPC_AR_table_merge)[7:ncol(ATAC_NPC_AR_table_merge)] <- samples_merged

ATAC_NPC_AR_table_merge_melt <- melt(ATAC_NPC_AR_table_merge, id=c("Peakid","Chr","Start","End","Strand","Length"), stringsAsFactors = FALSE)
colnames(ATAC_NPC_AR_table_merge_melt)[7:8] <- c("time", "ratio")
ATAC_NPC_AR_table_merge_melt$time <- as.character(ATAC_NPC_AR_table_merge_melt$time)

#allelic ratio boxplot with replicates for each timepoint merged 
data <- ATAC_NPC_AR_table_merge_melt
p <- ggplot(data = data, aes(x=time,y=ratio,group=Peakid) )
p + theme_bw() + theme(axis.text.x=element_blank()) +
  geom_boxplot(aes(x=as.character(time[order(time)]), y=ratio, group=as.character(time))) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) +
  geom_hline(yintercept=0.5, linetype="dashed") 

# plots of correlations between replicates
p1 <- ggplot(ATAC_NPC_AR_table, aes(x=NPC_1d_Rep1, y=NPC_1d_Rep2)) + 
  geom_point(shape=16) + theme_bw() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1))  + scale_x_continuous(expand = c(0, 0), limits = c(0, 1)) + 
  geom_abline(intercept=0,slope=1, linetype="dashed")
p2 <- ggplot(ATAC_NPC_AR_table, aes(x=NPC_3d_Rep1, y=NPC_3d_Rep2)) + 
  geom_point(shape=16) + theme_bw() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1))  + scale_x_continuous(expand = c(0, 0), limits = c(0, 1)) + 
  geom_abline(intercept=0,slope=1, linetype="dashed") 
p3 <- ggplot(ATAC_NPC_AR_table, aes(x=NPC_6d_Rep1, y=NPC_6d_Rep2)) + 
  geom_point(shape=16) + theme_bw() +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1))  + scale_x_continuous(expand = c(0, 0), limits = c(0, 1)) + 
  geom_abline(intercept=0,slope=1, linetype="dashed") 
grid.arrange(p1, p2, p3,nrow = 1)

cor(x=ATAC_NPC_AR_table$NPC_1d_Rep1,y=ATAC_NPC_AR_table$NPC_1d_Rep2, method=c("pearson"))
## [1] 0.6593499
cor(x=ATAC_NPC_AR_table$NPC_1d_Rep1,y=ATAC_NPC_AR_table$NPC_1d_Rep2, method=c("spearman"))
## [1] 0.6138063
cor(ATAC_NPC_AR_table$NPC_3d_Rep1,ATAC_NPC_AR_table$NPC_3d_Rep2, method=c("pearson"))
## [1] 0.721086
cor(ATAC_NPC_AR_table$NPC_3d_Rep1,ATAC_NPC_AR_table$NPC_3d_Rep2, method=c("spearman"))
## [1] 0.6862435
cor(ATAC_NPC_AR_table$NPC_6d_Rep1,ATAC_NPC_AR_table$NPC_6d_Rep2, method=c("pearson"))
## [1] 0.6827913
cor(ATAC_NPC_AR_table$NPC_6d_Rep1,ATAC_NPC_AR_table$NPC_6d_Rep2, method=c("spearman"))
## [1] 0.6323546

Define peaks biallelically-accessible in NPCs - by AR > 0.2 in the average of the the two d17 NPC replicates

biaccessible_threshold <- 0.2

ATAC_d17_AR <- ATAC_NPC_AR_table[,c(1:6,16,17)]
biaccessible_peaks <- subset(ATAC_d17_AR, 
                            (1/2)*(ATAC_d17_AR$NPC_17d_Rep1 + ATAC_d17_AR$NPC_17d_Rep2) >= biaccessible_threshold)
ATAC_NPC_biacc <- ATAC_NPC_AR_table[which(ATAC_NPC_AR_table$Peakid %in% biaccessible_peaks$Peakid),]
ATAC_NPC_silenced <- ATAC_NPC_AR_table[which(ATAC_NPC_AR_table$Peakid %notin% ATAC_NPC_biacc$Peakid),]


NPC_scatter <- ggplot(ATAC_NPC_silenced, aes(x=NPC_17d_Rep1, y=NPC_17d_Rep2)) + 
  geom_point(shape=16) + theme_bw() +
  scale_y_continuous(expand = c(0,0), limits = c(0,1)) + scale_x_continuous(expand = c(0,0), limits = c(0,1)) + 
  geom_point(data=ATAC_NPC_biacc, aes(x=NPC_17d_Rep1, y=NPC_17d_Rep2), shape=16, color="darkred") +
  geom_hline(yintercept=biaccessible_threshold, linetype="dashed") + geom_vline(xintercept=biaccessible_threshold, linetype="dashed") +
  geom_abline(intercept=0,slope=1, linetype="dashed") 

NPC_scatter

cor(ATAC_NPC_AR_table$NPC_17d_Rep1,ATAC_NPC_AR_table$NPC_17d_Rep2, method=c("pearson"))
## [1] 0.7811577

Save allelic ratio tables and list of biallelically accessible peaks in NPCs

write.table(ATAC_NPC_biacc$Peakid,
            row.names = FALSE, col.names = FALSE, quote = FALSE,
            paste0(ATAC_analysis_path,"ATAC_NPCbiacc.txt"), sep="\t")
write.table(ATAC_NPC_AR_table,
            row.names = FALSE, col.names = TRUE, quote = FALSE,
            paste0(ATAC_analysis_path,"ATAC_NPC_AR_table.txt"), sep="\t")
write.table(ATAC_NPC_AR_table_merge,
            row.names = FALSE, col.names = TRUE, quote = FALSE,
            paste0(ATAC_analysis_path,"ATAC_NPC_AR_table_merge.txt"), sep="\t")