Bioinformatics Asked by Exhausted on August 22, 2021
There is a function does need an input as selected genomic site across genomic regions. But my genomic region bed file already is a subset of interesting site (like promoters). I want this function to work not complaining about genomic site
However when I leave genomic site says
> prepare_elements_regions_tf <- prepare_elements("regions.bed",
+ window_size = 50000)
Successfully read elements
Error in utils::read.delim(sites_file, stringsAsFactors = FALSE, header = FALSE) :
argument "sites_file" is missing, with no default
The whole function is here
https://github.com/wenmm/ActiveDriverWGS/blob/master/R/ActiveDriverWGS.R
But part of function that nags is this
prepare_elements = function(elements_file, sites_file, window_size = 50000, mc.cores = 1, recovery_dir = NA) {
try_error = "try-error"
element_coords_filename = paste0(recovery_dir, "/ADWGS_elements_coords_recovery_file.rsav")
element_coords_1 = NULL
load_result = suppressWarnings(try(load(element_coords_filename), silent = TRUE))
if (class(load_result) == try_error) {
element_coords_1 = prepare_element_coords_from_BED(elements_file)
if (!is.na(recovery_dir)) {
save(element_coords_1, file = element_coords_filename)
}
cat("Successfully read elementsn")
} else {
cat("Recovered elementsn")
}
element_coords = coords_sanity(element_coords_1)
element_3n_filename = paste0(recovery_dir, "/ADWGS_element_3n_recovery_file.rsav")
element_3n = NULL
load_result = suppressWarnings(try(load(element_3n_filename), silent = TRUE))
if (class(load_result) == try_error) {
element_3n = add_trinucs_by_elements(get_3n(element_coords))
if (!is.na(recovery_dir)) {
save(element_3n, file = element_3n_filename)
}
}
window_coords_filename = paste0(recovery_dir, "/ADWGS_window_coords_recovery_file.rsav")
window_coords = NULL
load_result = suppressWarnings(try(load(window_coords_filename), silent = TRUE))
if (class(load_result) == try_error) {
window_coords = coords_sanity(get_window_coords(element_coords, window_size))
if (!is.na(recovery_dir)) {
save(window_coords, file = window_coords_filename)
}
}
window_3n_filename = paste0(recovery_dir, "/ADWGS_window_3n_recovery_file.rsav")
window_3n = NULL
load_result = suppressWarnings(try(load(window_3n_filename), silent = TRUE))
if (class(load_result) == try_error) {
window_3n = add_trinucs_by_elements(get_3n(window_coords))
if (!is.na(recovery_dir)) {
save(window_3n, file = window_3n_filename)
}
}
raw_site_coords_filename = paste0(recovery_dir, "/ADWGS_raw_site_coords_recovery_file.rsav")
site_coords_filename = paste0(recovery_dir, "/ADWGS_site_coords_recovery_file.rsav")
raw_site_coords = NULL
site_coords = NULL
sites_load_result = suppressWarnings(try(load(site_coords_filename), silent = TRUE))
if (class(sites_load_result) == try_error) {
load_result = suppressWarnings(try(load(raw_site_coords_filename), silent = TRUE))
if (class(load_result) == try_error) {
raw_site_coords = utils::read.delim(sites_file, stringsAsFactors = FALSE, header = FALSE)
colnames(raw_site_coords) = c("chr", "starts", "ends", "site_info")
raw_site_coords$chr = gsub("chr", "", raw_site_coords$chr, ignore.case = TRUE)
raw_site_coords$chr = paste0("chr", raw_site_coords$chr)
if (!is.na(recovery_dir)) {
save(raw_site_coords, file = raw_site_coords_filename)
}
cat("Successfully read active sitesn")
} else {
cat("Recovered active sitesn")
}
if (!is.null(raw_site_coords[[1]])) {
# first only keep sites that overlap with element coordinates
site_gr = GenomicRanges::GRanges(raw_site_coords$chr,
IRanges::IRanges(start=raw_site_coords$starts, end=raw_site_coords$ends))
element_gr = GenomicRanges::GRanges(element_coords$chr,
IRanges::IRanges(start=element_coords$starts, end=element_coords$ends))
rm(raw_site_coords)
overlaps_found = GenomicRanges::findOverlaps(element_gr, site_gr)
site_here_gr = site_gr[S4Vectors::subjectHits(overlaps_found)]
rm(site_gr, element_gr, overlaps_found)
site_coords = do.call(rbind, parallel::mclapply(1:nrow(element_coords),
get_sites_per_element, element_coords, site_here_gr, mc.cores=mc.cores))
rm(site_here_gr)
site_coords = coords_sanity(site_coords)
}
if (!is.na(recovery_dir)) {
save(site_coords, file = site_coords_filename)
}
} else {
cat("Recovered active sitesn")
}
rm(sites_load_result, element_coords)
site_3n_filename = paste0(recovery_dir, "/ADWGS_site_3n_recovery_file.rsav")
site_3n = NULL
load_result = suppressWarnings(try(load(site_3n_filename), silent = TRUE))
if (class(load_result) == try_error) {
site_3n = get_3n(site_coords)
if (!is.na(recovery_dir)) {
save(site_3n, file = site_3n_filename)
}
}
list(element_coords=element_coords_1, element_3n=element_3n, window_coords=window_coords, window_3n=window_3n, site_coords=site_coords, site_3n=site_3n)
}
How I get rid of nagging part?
Think about it this way - your elements_file
already has only the sites of interest and hence does not need to be subject to a subset operation. In other words, if you were to have a sites_file
, it would have the same content as the elements_file
.
Can you see where I am going with this?
Answered by Ram RS on August 22, 2021
Get help from others!
Recent Answers
Recent Questions
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP