UCSC_GC_percent_HOWTO

2209 days ago by macieksk

%r library(rtracklayer) session <- browserSession("UCSC") gene.query <- ucscTableQuery(session) tnames<-trackNames(gene.query) #tnames zawiera nazwy wszystkich trackow wraz z krotkimi opisami head(tnames,30) 
       
                5% Lowest S               AceView Genes            
Affy Exon Array 
               "ntSssTop5p"                   "acembly"            
"affyExonArray" 
                 Affy GNF1H                Affy RNA Loc             
Affy U133 
                "affyGnf1h"       "wgEncodeAffyRnaChip"             
"affyU133" 
             Affy U133Plus2                    Affy U95             
All SNPs(132) 
            "affyU133Plus2"                   "affyU95"             
"snp132" 
              All SNPs(135)                 Allen Brain             
Alt Events 
                   "snp135"             "allenBrainAli"             
"knownAlt" 
                     Arrays                    Assembly             
BAC End Pairs 
           "genotypeArrays"                      "gold"             
"bacEndPairs" 
                  BU ORChID              Broad ChromHMM             
Broad Histone 
         "wgEncodeBuOrchid"          "wgEncodeBroadHmm"     
"wgEncodeBroadHistone" 
              Burge RNA-seq                        CCDS             
CD34 DnaseI 
"burgeRnaSeqGemMapperAlign"                  "ccdsGene"             
"eioJcviNAS" 
                  CGAP SAGE                      COSMIC          
CSHL Long RNA-seq 
                 "cgapSage"                    "cosmic"   
"wgEncodeCshlLongRnaSeq" 
            CSHL Sm RNA-seq             Caltech RNA-seq            
Cand. Gene Flow 
  "wgEncodeCshlShortRnaSeq"     "wgEncodeCaltechRnaSeq"             
"ntOoaHaplo" 
            Chromosome Band  Chromosome Band (Ideogram)           
Common SNPs(132) 
                 "cytoBand"              "cytoBandIdeo"             
"snp132Common" 
#Szukamy tracku z GC w opisie (track nie nazywa sie "GC Percent"!!!) tnames[grep("GC",names(tnames))] 
       
   GC Percent     MGC Genes 
    "gc5Base" "mgcFullMrna" 
gene.query <- ucscTableQuery(session, "gc5Base") tableNames(gene.query) 
       
[1] "gc5Base"
GCper<-getTable(gene.query) 
       
Error in .local(object, ...) : tabular output format not available
#Aha, tak nie za dziala, bo dane sa w innym formacie (wig), nie tabelkowym (o takiej mozliwosci jest wspomniane w pomocy) #Ale wystarczy przeczytac ?getTable #...a wlasciwie tylko przyklady z tego helpa, by sciagnac dane w nast sposob: 
       
library(IRanges) #Sciagamy dane tylko dla konkretnego przedzialu z chr12 GC.query <- ucscTableQuery(session, "gc5Base", GRangesForUCSCGenome("hg19", "chr12",IRanges(57795963, 57815592))) GC.query 
       
Get track 'gc5Base' within hg19:chr12:57795963-57815592
GCper<-track(GC.query) #downloading... 
       
class(GCper) GCper 
       
[1] "UCSCData"
attr(,"package")
[1] "rtracklayer"
UCSC track 'GC Percent'
UCSCData with 3925 rows and 1 value column across 1 space
        space               ranges   |     score
     <factor>            <IRanges>   | <numeric>
1       chr12 [57795966, 57795970]   |   39.3701
2       chr12 [57795971, 57795975]   |   59.8425
3       chr12 [57795976, 57795980]   |   39.3701
4       chr12 [57795981, 57795985]   |   79.5276
5       chr12 [57795986, 57795990]   |   39.3701
6       chr12 [57795991, 57795995]   |   59.8425
7       chr12 [57795996, 57796000]   |   19.6850
8       chr12 [57796001, 57796005]   |   79.5276
9       chr12 [57796006, 57796010]   |   59.8425
...       ...                  ... ...       ...
3917    chr12 [57815546, 57815550]   |   59.8425
3918    chr12 [57815551, 57815555]   |   39.3701
3919    chr12 [57815556, 57815560]   |   59.8425
3920    chr12 [57815561, 57815565]   |    0.0000
3921    chr12 [57815566, 57815570]   |   59.8425
3922    chr12 [57815571, 57815575]   |   59.8425
3923    chr12 [57815576, 57815580]   |   39.3701
3924    chr12 [57815581, 57815585]   |   59.8425
3925    chr12 [57815586, 57815590]   |   59.8425
GCper.df<-as.data.frame(GCper) dim(GCper.df) head(GCper.df 
       
[1] 3925    5
  space    start      end width   score
1 chr12 57795966 57795970     5 39.3701
2 chr12 57795971 57795975     5 59.8425
3 chr12 57795976 57795980     5 39.3701
4 chr12 57795981 57795985     5 79.5276
5 chr12 57795986 57795990     5 39.3701
6 chr12 57795991 57795995     5 59.8425
#Podsumowujac - musicie sciagac dane dla kazdego genu/rejonu oddzielnie, gdyz nie da sie sciagnac calej tabeli (zreszta byla by bardzo duza)