Load files produced by first analysis script:

#directory for allelic ATAC-seq analysis
ATAC_analysis_path <- "/path/to/your/ATAC/analysis/folder/"
#paths to input files
path_to_ATAC_AR_table <- paste0(ATAC_analysis_path,"ATAC_NPC_AR_table.txt")
path_to_ATAC_AR_table_merge <- paste0(ATAC_analysis_path,"ATAC_NPC_AR_table_merge.txt")
path_to_NPCbiacc <- paste0(ATAC_analysis_path,"ATAC_NPCbiacc.txt")
ATAC_NPC_AR_table <- read_delim(path_to_ATAC_AR_table, delim = "\t", escape_double = FALSE, trim_ws = TRUE) #input ATAC_NPC table
ATAC_NPC_AR_table_merge <- read_delim(path_to_ATAC_AR_table_merge, delim = "\t", escape_double = FALSE, trim_ws = TRUE) #merged ATAC_NPC table (used later)
ATAC_biaccpeaks <- data.frame(read_csv(path_to_NPCbiacc, col_names = FALSE)) #biallelic peaks in NPC (used later)
colnames(ATAC_biaccpeaks) <- "Peakid"

Set up of model

AR_table_forEDM <- data.frame(ATAC_NPC_AR_table)

#check samples and timepoints
colnames(AR_table_forEDM)
##  [1] "Peakid"       "Chr"          "Start"        "End"          "Strand"      
##  [6] "Length"       "NPC_0d_Rep1"  "NPC_0d_Rep2"  "NPC_0d_Rep3"  "NPC_1d_Rep1" 
## [11] "NPC_1d_Rep2"  "NPC_3d_Rep1"  "NPC_3d_Rep2"  "NPC_6d_Rep1"  "NPC_6d_Rep2" 
## [16] "NPC_17d_Rep1" "NPC_17d_Rep2"
timepoints <- c(0,0,0,1,1,3,3,6,6,17,17)
y_f <- 0.1
y_0 <- 0.4
theta <- 2
TimeFrac <- 0.5 # set to calculate to halftime

#estimate initial parameters from running whole dataset with model
colnames(AR_table_forEDM)[7:ncol(AR_table_forEDM)] <- timepoints
AR_table_forEDM_melt <- melt(AR_table_forEDM, id=c("Peakid","Chr","Start","End","Strand","Length"), stringsAsFactors = FALSE)
colnames(AR_table_forEDM_melt)[7:8] <- c("time", "ratio")
AR_table_forEDM_melt$time <- as.numeric(as.character(AR_table_forEDM_melt$time))
plot(AR_table_forEDM_melt$time,AR_table_forEDM_melt$ratio, ylim=c(0,0.8))
overall_lm <- nlsLM(ratio ~ yf + y0*exp(-time/th),
                    data = AR_table_forEDM_melt[,7:8], start = list(y0 = y_0, yf = y_f, th = theta))
coef(overall_lm)
##        y0        yf        th 
## 0.3274329 0.1443578 5.3073134
y_0 <- coef(overall_lm)[1]
y_f <- coef(overall_lm)[2]
theta <- coef(overall_lm)[3]
curve((y_f + y_0*exp(-x/theta)), add=TRUE, col="red" )

AR_matrix_forEDM <- as.matrix(AR_table_forEDM[7:ncol(AR_table_forEDM)])
par(mfrow=c(1,1))
boxplot.matrix(AR_matrix_forEDM, use.cols=TRUE,
               ylab = 'chrRNA Allelic Ratio (Xi/(Xi+Xa))',
               ylim=c(0,1),
               names = colnames(AR_matrix_forEDM))
abline(h=0.5)

Apply model to individual peak

This loops through each peak in the input table (AR_matrix_forEDM) to fit a simple exponential decay model of the form: \[y = y_f + y_0 \cdot e^{-tk}\] where: allelic ratio = \(y\), time = \(t\), final allelic value = \(y_f\), initial allelic ratio = \(y_0 + y_f\)

The “nlsLM” function from the “minpack.lm” R package is used for model fitting. NAs are produced if it is not possible to fit the model or calculate a halftime. Each is peak is plotted showing the fitted decay curve (solid red line) and halftime (dashed vertical line)

par(mfrow=c(2,4))
OutTableEDM <- data.frame()
data_2D <- data.frame() # for plotting overall data in 2D
res_table <- data.frame() #for testing output of overall fit
EDM_FitValues <- data.frame() #for testing output of overall fit
for (row in 1:nrow(AR_matrix_forEDM)){
  #set up for single Peaks, plot data points 
  Peakid <- AR_table_forEDM[row,1]
  Start <- AR_table_forEDM[row,3]
  End <- AR_table_forEDM[row,3]
  Peak <- as.data.frame(cbind(timepoints,AR_matrix_forEDM[row,]))
  colnames(Peak) <- c('time','ratio')
  ratio <- Peak$ratio
  time <- Peak$time
  #data_2D <- rbind(data_2D,Peak)
  #plot(log(time),ratio) #plot log-log
  plot(time,ratio, ylim=c(0,1),xlim=c(--0.1,17), xlab = Peakid, axes=FALSE,xaxs="i",yaxs="i")
  box(which = "plot", lty = "solid", col="grey27")
  axis(1,c(0,1,3,6,17),tck=-0.01,col="grey27")
  axis(2,c(0,0.25,0.5,0.75,1),tck=-0.01,col="grey27")
  #do model to non-zero asymptote if escapees 
    cc <- try(nlsLM(ratio ~ yf + y0*exp(-time/th),
                    data = Peak, start = list(y0 = y_0, yf = y_f, th = theta)))
    if (is(cc,"try-error")) {
      EDM_start <- NA
      EDM_final <- NA
      EDM_rate <- NA
      ResStError <- NA
      EDM_halftime <- NA
    } 
    else {
      #perform model fitting
      EDMfit <- nlsLM(ratio ~ (yf + y0*exp(-time/th)),
                      data = Peak, start = list(y0 = y_0, yf = y_f, th = theta))
      Peak_y0 <- coef(EDMfit)[1]
      Peak_yf <- coef(EDMfit)[2]
      Peak_th <- coef(EDMfit)[3]
      curve((Peak_yf + Peak_y0*exp(-x/Peak_th)), add=TRUE, col="red" )
      #Compute model information
      residuals <- residuals(EDMfit)
      fitval <- fitted.values(EDMfit)
      res_add <- cbind(fitval,residuals)
      res_table <- rbind(res_table,res_add)
      #points(fitval,residuals)
      ResStError <- sqrt(sum((residuals(EDMfit)*(residuals(EDMfit)))))/2
      #Rate parameter
      EDM_start <- as.numeric(Peak_y0) + as.numeric(Peak_yf)   #initial AR
      abline(h=EDM_start, lty=5)
      abline(h=EDM_start/2, lty=5)
      EDM_final <- as.numeric(Peak_yf)
      abline(h=EDM_final, lty=5)
      EDM_halftime <- -Peak_th*log((TimeFrac*(Peak_y0+Peak_yf)-Peak_yf)/Peak_y0)
      EDM_halftime[is.nan(EDM_halftime)] <- NA
      abline(v = EDM_halftime, lty=5)
      if (EDM_final < 0){EDM_final <- 0} #set final AR to 0 if the asymptote is below zero
      EDM_rate <- as.numeric(Peak_th) #rate constant
      #make NA if slope reversed or AR increases
      if (EDM_rate < 0 | EDM_final > EDM_start){EDM_start <- EDM_final <- EDM_rate <- ResStError <- EDM_halftime <- NA}
      #make everything NA if poor model fit
      #if (ResStError > 0.1){ EDM_start <- EDM_final <- EDM_rate <- ResStError <- EDM_halftime <- NA} 
      #make NA if can't produce halftime
      if (is.na(EDM_halftime) == TRUE | EDM_halftime > 17){EDM_halftime <- NA} 
    }
  EDMinfo <- cbind(Peakid, Start, EDM_start, EDM_final, EDM_rate, EDM_halftime, ResStError)
  #EDMpeaklist <- rbind(EDMpeaklist,peakname,stringsAsFactors=FALSE)
  OutTableEDM <- rbind(OutTableEDM,EDMinfo)
  OutTableEDM[,3:7] <- as.numeric(unlist(OutTableEDM[,3:7]))
  EDM_FitValues <- rbind(EDM_FitValues, fitval)
}

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates
## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates
## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates
## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates
## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates
## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates
## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates
## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates
## Error in nlsModel(formula, mf, start, wts) : 
##   singular gradient matrix at initial parameter estimates

Investigate output of model

#band plot of residuals
par(mfrow=c(1,1))
library(gplots)
bandplot(res_table$fitval,res_table$residuals,cex=0.6)
abline(h=0,col="green") 
#plot vertical line at average alleic ratio d0
abline(v=median(ATAC_NPC_AR_table_merge$NPC_0d),col="green")
abline(v=0.5*(median(ATAC_NPC_AR_table_merge$NPC_0d)),col="green")

#summary values and density of halftimes
summary(OutTableEDM$EDM_halftime)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##  0.09269  1.69460  3.85830  5.02628  7.69859 16.94135      164
par(mfrow=c(1,1))
plot(density(OutTableEDM$EDM_halftime, na.rm = TRUE,adjust = 0.6), xlim=c(-0.5,17), ylim=c(0,0.2), xaxs="i",yaxs="i") 

Define two binary groups of “depleted” and “persistent” peaks

#include in EDM output table all peaks with fitted halftimes plus biallelic-accessible peaks 
OutTableEDM_subset <- subset(OutTableEDM,
                             !is.na(OutTableEDM$EDM_halftime) | OutTableEDM$Peakid %in% ATAC_biaccpeaks$Peakid )
#set threshold to have equal numbers of depleted and persistent peaks
depleted_threshold <- 5.4
OutTableEDM_persistent <- subset(OutTableEDM_subset, OutTableEDM_subset$EDM_halftime > depleted_threshold | OutTableEDM_subset$Peakid %in% ATAC_biaccpeaks$Peakid)
OutTableEDM_depleted <- subset(OutTableEDM_subset, !OutTableEDM_subset$Peakid %in% OutTableEDM_persistent$Peakid)

#plot this threshold on density plot of halftimes
plot(density(OutTableEDM_subset$EDM_halftime, na.rm = TRUE, adjust = 0.6), xlim=c(-1,18), ylim=c(0,0.2), xaxs="i",yaxs="i") 
abline(v=(depleted_threshold))

#make bed file of "depleted" peaks
depleted_peaks.bed <- ATAC_NPC_AR_table[which(ATAC_NPC_AR_table$Peakid %in% OutTableEDM_depleted$Peakid),]
depleted_peaks.bed <- data.frame(depleted_peaks.bed[2:4],"depleted_peak", OutTableEDM_depleted$EDM_halftime,depleted_peaks.bed$Peakid)

#make bed file of "persistent" peaks
persistent_peaks.bed <- ATAC_NPC_AR_table[which(ATAC_NPC_AR_table$Peakid %in% OutTableEDM_persistent$Peakid),]
persistent_peaks.bed <- data.frame(persistent_peaks.bed[2:4],"persistent_peak", OutTableEDM_persistent$EDM_halftime,persistent_peaks.bed$Peakid)

#bed file  with both annotated
colnames(persistent_peaks.bed)[4] <- colnames(depleted_peaks.bed)[4] <- "label"
colnames(persistent_peaks.bed)[5] <- colnames(depleted_peaks.bed)[5] <- "halftime"
colnames(persistent_peaks.bed)[6] <- colnames(depleted_peaks.bed)[6] <- "Peakid"
all_peaks_EDM.bed <- bind_rows(depleted_peaks.bed,persistent_peaks.bed)

Write outputs to files

#EDM table of all depleted or persistent peaks
write.table(OutTableEDM_subset, row.names = FALSE, col.names = TRUE, quote = FALSE,
            paste0(ATAC_analysis_path,"OutTableEDM_subset.txt"), sep="\t")

#bed file of "depleted" peaks
write.table(depleted_peaks.bed, row.names = FALSE, col.names = FALSE, quote = FALSE,
            paste0(ATAC_analysis_path,"depleted_peaks.bed"), sep="\t")

#bed file of "persistent" peaks
write.table(persistent_peaks.bed, row.names = FALSE, col.names = FALSE, quote = FALSE,
            paste0(ATAC_analysis_path,"persistent_peaks.bed"), sep="\t")

#bed file of of all EDM peaks with labels
write.table(all_peaks_EDM.bed, row.names = FALSE, col.names = FALSE, quote = FALSE,
            paste0(ATAC_analysis_path,"ATAC_peaks_EDM.bed"), sep="\t")