Extract subranges that match pattern

extract_matchranges(gr, bsgenome, pattern, plot = FALSE)

Arguments

gr

GRanges-class

bsgenome

BSgenome{BSgenome-class}

pattern

string: search pattern in extended IUPAC alphabet

plot

TRUE or FALSE (default)

Value

GRanges-class

Examples

# PE example #------------ require(magrittr) bsgenome <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38 gr <- char_to_granges(c(PRNP = 'chr20:4699600:+', # snp HBB = 'chr11:5227002:-', # snp HEXA = 'chr15:72346580-72346583:-', # del CFTR = 'chr7:117559593-117559595:+'), # ins bsgenome) gr %<>% extend_for_pe() pattern <- strrep('N',20) %>% paste0('NGG') extract_matchranges(gr, bsgenome, pattern, plot = TRUE)
#> GRanges object with 10 ranges and 3 metadata columns: #> seqnames ranges strand | targetname targetstart #> <Rle> <IRanges> <Rle> | <character> <integer> #> CFTR_f chr7 117559575-117559597 + | CFTR 117559593 #> CFTR_r chr7 117559603-117559625 - | CFTR 117559593 #> HBB_f chr11 5226984-5227006 + | HBB 5227002 #> HBB_r chr11 5227000-5227022 - | HBB 5227002 #> HEXA_f_1 chr15 72346551-72346573 + | HEXA 72346580 #> HEXA_f_2 chr15 72346558-72346580 + | HEXA 72346580 #> PRNP_f_1 chr20 4699568-4699590 + | PRNP 4699600 #> PRNP_f_2 chr20 4699569-4699591 + | PRNP 4699600 #> PRNP_f_3 chr20 4699575-4699597 + | PRNP 4699600 #> PRNP_f_4 chr20 4699578-4699600 + | PRNP 4699600 #> targetend #> <integer> #> CFTR_f 117559595 #> CFTR_r 117559595 #> HBB_f 5227002 #> HBB_r 5227002 #> HEXA_f_1 72346583 #> HEXA_f_2 72346583 #> PRNP_f_1 4699600 #> PRNP_f_2 4699600 #> PRNP_f_3 4699600 #> PRNP_f_4 4699600 #> ------- #> seqinfo: 595 sequences (1 circular) from hg38 genome
# TFBS examples #-------------- bsgenome <- BSgenome.Mmusculus.UCSC.mm10::BSgenome.Mmusculus.UCSC.mm10 bedfile <- system.file('extdata/SRF.bed', package='multicrispr') gr <- bed_to_granges(bedfile, 'mm10') %>% extend()
#> Read SRF.bed into GRanges
#> 1974 ranges on 21 chromosomes
extract_matchranges(gr, bsgenome, pattern = strrep('N',20) %>% paste0('NGG'))
#> GRanges object with 6616 ranges and 5 metadata columns: #> seqnames ranges strand | name #> <Rle> <IRanges> <Rle> | <character> #> chr1:4712628-4712643:-_5 chr1 4712616-4712638 - | SRF_MA0083.3 #> chr1:4712628-4712643:-_4 chr1 4712617-4712639 - | SRF_MA0083.3 #> chr1:4712628-4712643:-_3 chr1 4712624-4712646 - | SRF_MA0083.3 #> chr1:4712628-4712643:-_2 chr1 4712630-4712652 - | SRF_MA0083.3 #> chr1:4712628-4712643:-_1 chr1 4712631-4712653 - | SRF_MA0083.3 #> ... ... ... ... . ... #> chrY:79048176-79048191:-_2 chrY 79048172-79048194 - | SRF_MA0083.3 #> chrY:79048176-79048191:-_1 chrY 79048179-79048201 - | SRF_MA0083.3 #> chrY:89126494-89126509:-_3 chrY 89126489-89126511 - | SRF_MA0083.3 #> chrY:89126494-89126509:-_2 chrY 89126496-89126518 - | SRF_MA0083.3 #> chrY:89126494-89126509:-_1 chrY 89126497-89126519 - | SRF_MA0083.3 #> score targetname targetstart #> <numeric> <character> <integer> #> chr1:4712628-4712643:-_5 10.4954 chr1:4712628-4712643:- 4712628 #> chr1:4712628-4712643:-_4 10.4954 chr1:4712628-4712643:- 4712628 #> chr1:4712628-4712643:-_3 10.4954 chr1:4712628-4712643:- 4712628 #> chr1:4712628-4712643:-_2 10.4954 chr1:4712628-4712643:- 4712628 #> chr1:4712628-4712643:-_1 10.4954 chr1:4712628-4712643:- 4712628 #> ... ... ... ... #> chrY:79048176-79048191:-_2 9.73780 chrY:79048176-79048191:- 79048176 #> chrY:79048176-79048191:-_1 9.73780 chrY:79048176-79048191:- 79048176 #> chrY:89126494-89126509:-_3 4.54393 chrY:89126494-89126509:- 89126494 #> chrY:89126494-89126509:-_2 4.54393 chrY:89126494-89126509:- 89126494 #> chrY:89126494-89126509:-_1 4.54393 chrY:89126494-89126509:- 89126494 #> targetend #> <integer> #> chr1:4712628-4712643:-_5 4712643 #> chr1:4712628-4712643:-_4 4712643 #> chr1:4712628-4712643:-_3 4712643 #> chr1:4712628-4712643:-_2 4712643 #> chr1:4712628-4712643:-_1 4712643 #> ... ... #> chrY:79048176-79048191:-_2 79048191 #> chrY:79048176-79048191:-_1 79048191 #> chrY:89126494-89126509:-_3 89126509 #> chrY:89126494-89126509:-_2 89126509 #> chrY:89126494-89126509:-_1 89126509 #> ------- #> seqinfo: 66 sequences (1 circular) from mm10 genome