-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPBC.R
More file actions
60 lines (49 loc) · 1.88 KB
/
PBC.R
File metadata and controls
60 lines (49 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
##### ================= usage manual ================= #####
# Rscript PBC.R input.bam out.res.tmp
# arguments:
# 1st: the input bam file.
# 2nd: the output result file. The 1st line is PBC1, and 2nd line is PBC2
### get parameters
if (T) {
args = commandArgs(trailingOnly=TRUE)
input_file = args[1]
output_name = args[2]
}
### define the function of PBC1 and PBC2
if (T) {
PBC <- function(IP) {
# load ChIP sample if necessary
if (is.character(IP)) {
if (!file.exists(IP))
stop(paste("File", IP, "does NOT exist."))
else
aln <- GenomicAlignments::readGAlignments(IP)
} else if (class(IP) == "GAlignments") {
aln <- IP
} else {
stop("IP must be a file path or a GAlignments object.")
}
# convert GAlignments object to data.table for fast aggregation
aln <- data.table::data.table(
strand=as.factor(BiocGenerics::as.vector(GenomicAlignments::strand(aln))),
seqnames=as.factor(BiocGenerics::as.vector(GenomicAlignments::seqnames(aln))),
pos=ifelse(GenomicAlignments::strand(aln) == "+", start(aln), end(aln))
)
# aggregate reads by position and count them
readsPerPosition <- aln[,list(count=.N), by=list(strand, seqnames, pos)]$count
# PBC = positions with exactly 1 read / positions with at least 1 read
PBC1 <- sum(readsPerPosition == 1) / length(readsPerPosition)
PBC2 <- sum(readsPerPosition == 1) / sum(readsPerPosition == 2)
PBC <- c(PBC1,PBC2)
return(PBC)
}
}
### get PBC value result and output the result
if (T) {
results=PBC(input_file)
print(paste0(basename(input_file),": [PBC1:",results[1],"];"," [PBC2:",results[2],"]"))
}
### write output
if (T) {
write.table(results, output_name, quote=F, sep='\t', col.names=F, row.names=F)
}