sadII_who_data

2333 days ago by macieksk

%r library(lattice) #Set output widths better .adjustWidth <- function(...){ options(width=10000) ; TRUE} .adjustWidthCallBack <- addTaskCallback(.adjustWidth) 
       
#Ściągnij dane WHO z sieci download.file("http://www.exploredata.net/ftp/WHO.csv","./data/who.csv","wget") 
       
--2012-02-26 22:26:43--  http://www.exploredata.net/ftp/WHO.csv
Resolving www.exploredata.net... 212.143.17.175
Connecting to www.exploredata.net|212.143.17.175|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 260096 (254K) [application/octet-stream]
Saving to: `./data/who.csv'

 0% [                                       ] 0           --.-K/s   
42% [===============>                       ] 110,209      528K/s
86% [================================>      ] 224,897      531K/s
100%[======================================>] 260,096      511K/s
in 0.5s    

2012-02-26 22:26:45 (511 KB/s) - `./data/who.csv' saved
[260096/260096]
dir("./data") 
       
[1] "who.csv"
who.data<-read.csv("./data/who.csv") 
       
head(who.data[,1:10]) # vv Kliknij na lewy brzeg wyniku, by zmienic sposob wyswietlania na bardziej czytelny 
       
              Country CountryID Continent
Adolescent.fertility.rate.... Adult.literacy.rate....
Gross.national.income.per.capita..PPP.international...
Net.primary.school.enrolment.ratio.female....
Net.primary.school.enrolment.ratio.male....
Population..in.thousands..total Population.annual.growth.rate....
1         Afghanistan         1         1                          
151                    28.0                                         
NA                                            NA                    
NA                           26088                               4.0
2             Albania         2         2                           
27                    98.7                                          
6000                                            93                  
94                            3172                               0.6
3             Algeria         3         3                           
6                    69.9                                           
5940                                            94                  
96                           33351                               1.5
4             Andorra         4         2                           
NA                      NA                                          
NA                                            83                    
83                              74                               1.0
5              Angola         5         3                          
146                    67.4                                         
3890                                            49                  
51                           16557                               2.8
6 Antigua and Barbuda         6         4                           
NA                      NA                                          
15130                                            NA                 
NA                              84                               1.3
# Poprawiamy brzydkie nazwy kolumn less.dots<- function(name.vector) gsub("\\.\\.",".",name.vector) # Wywolaj polecenie: ?gsub by dowiedziec sie co robi funkcja gsub i<-1 while( any( colnames(who.data) != less.dots(colnames(who.data)) ) ) { print(i); i<-i+1 colnames(who.data) <- less.dots(colnames(who.data)) } 
       
[1] 1
[1] 2
[1] 3
colnames(who.data) 
       
dim(who.data) #summary(who.data) 
       
[1] 202 358
#histogram(~ Gross.national.income.per.capita.PPP.international. , data=who.data ) histogram(~ Gross.national.income.per.capita.PPP.international. , data=who.data , breaks=50,type='count') dev.off() 
       
null device 
          1 
png(width=1000,height=500) histogram(~ Gross.national.income.per.capita.PPP.international. | factor(Continent) , data=who.data , breaks=10) dev.off() 
       
null device 
          1 
png(width=1000,height=500) #histogram(~ Population.in.thousands.total | factor(Continent) , data=who.data) histogram(~ Population.in.thousands.total | factor(Continent) , data=who.data , scales=list(x=list(log=10)), breaks=15) dev.off() 
       
null device 
          1 
# Roznica pomiedzy dana typu "numeric", a factor print("------numeric---") head(who.data$Continent) summary(who.data$Continent) print("------factor---") head(factor(who.data$Continent)) summary(factor(who.data$Continent)) 
       
[1] "------numeric---"
[1] 1 2 3 2 3 4
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   2.000   3.000   3.579   5.000   7.000 
[1] "------factor---"
[1] 1 2 3 2 3 4
Levels: 1 2 3 4 5 6 7
 1  2  3  4  5  6  7 
21 51 48  7 31 35  9 
who.data$Continent.name <- factor( who.data$Continent) levels(who.data$Continent.name) <- c( "A", "Europe", "B","C","D","E","F") summary(who.data$Continent.name) 
       
     A Europe      B      C      D      E      F 
    21     51     48      7     31     35      9 
grep("fertility",colnames(who.data),value=TRUE) 
       
[1] "Adolescent.fertility.rate."     
"Total.fertility.rate.per.woman." "Adolescent_fertility_rate"      
png(width=800,height=800) xyplot(Total.fertility.rate.per.woman. ~ Gross.national.income.per.capita.PPP.international. , data=who.data, group = Continent.name, auto.key=TRUE, scales=list(x=list(log=10),y=list(log=2)) ) dev.off() 
       
null device 
          1 
#Narysuj ten wykres w rozbiciu na kontynenty 
       
%html Przeczytaj <a href=http://www.ats.ucla.edu/stat/r/faq/overlay_trellis.htm>przyklad</a> i dodaj krzywe regresji do powyzszych wykresow 
       
Przeczytaj przyklad i dodaj krzywe regresji do powyzszych wykresow
# # Jakie proponujesz rozwiazanie problemu przeludnienia na Ziemi? # # Jakich danych,analiz brakuje by ocenic sensownosc Twojego rozwiazania? # ( <?-- --?> ) 
       
png(width=1000,height=500) histogram(~ Math_achievement_8th_grade , data=who.data, breaks=20, type="c") dev.off() 
       
#Podreczniki do pakietu lattice: # http://lmdvr.r-forge.r-project.org/figures/figures.html # http://cran.r-project.org/web/packages/lattice/index.html 
       
%html Przykład ze strony <a href="http://lmdvr.r-forge.r-project.org/figures/figures.html">http://lmdvr.r-forge.r-project.org/figures/figures.html</a> 
       
#install.packages("maps") library("maps") png(width=1000,height=1000) w.map<-map("world", plot=TRUE, fill = FALSE) dev.off() 
       
p.map<-try(map("world","Poland", plot=TRUE, fill = FALSE),silent=T) class(p.map) dev.off() 
       
p.map$range 
       
who.map.data<-as.data.frame(do.call(rbind, lapply(who.data$Country, function(cntry ){ res <- list(mp.x=NA,mp.y=NA) idx<-which( w.map$names == cntry) mp<-try(map("world",cntry, plot=FALSE, fill = FALSE),silent=TRUE) if (class(mp)!="map") return (res) list(mp.x=mean(mp$range[1:2]), mp.y=mean(mp$range[3:4])) } ))) head(who.map.data) 
       
png(width=800,height=800) library(lattice) xyplot(mp.y ~ mp.x , data = cbind(who.data, who.map.data) ,group=Continent.name, panel = function(...) { panel.xyplot(w.map$x, w.map$y,type="l",pch=1,col="grey76") panel.xyplot(...) }, auto.key=TRUE, xlim = w.map$range[1:2], ylim = w.map$range[3:4], xlab = NULL, ylab = NULL, zlab = NULL, aspect = diff(w.map$range[3:4]) / diff(w.map$range[1:2]), par.settings = list(axis.line = list(col = "transparent"), box.3d = list(col = "transparent", alpha = 0)) ) dev.off() 
       
#############333 
       
#Wylawiamy columny z danymi numerycznymi num.cols<- sapply(colnames(who.data), function(cn)is.numeric(who.data[,cn]) ) num.cols<- colnames(who.data)[num.cols] 
       
pc<-prcomp(who.data[,num.cols]) 
       
Error in svd(x, nu = 0) : infinite or missing values in 'x'
who.ndata<-who.data[,num.cols] 
       
mean.na<-as.vector(apply(is.na(who.data[,num.cols]),2,mean)) num.cols[which(mean.na>0.80)] 
       
 [1]
"Antiretroviral.therapy.coverage.among.HIV.infected.pregt.women.for.\
PMTCT."     
"Children.aged.6.59.months.who.received.vitamin.A.supplementation." 
"Children.aged.lt.5.years.with.ARI.symptoms.taken.to.facility."     
"Children.aged.lt.5.years.with.diarrhoea.receiving.ORT."            
"Community.and.traditional.health.workers.density.per.10.000.populat\
ion."       
"Environment.and.public.health.workers.density.per.10.000.population\
."          
"Laboratory.health.workers.density.per.10.000.population."          
"Prevalence.of.condom.use.by.young.people.15.24.years.at.higher.risk\
.sex.female"
"Prevalence.of.condom.use.by.young.people.15.24.years.at.higher.risk\
.sex.male"   "Aid_given"                                            
"Coal_production"                                                   
"Coal_production_per_person"                                        
"Math_achievement_4th_grade"                                        
"Math_achievement_8th_grade"                                        
"Nuclear_consumption"                                               
"Nuclear_consumption_per_person"                                    
pc<-prcomp(who.data[,num.cols[which(mean.na>0.50)]],na.action=na.omit) 
       
Error in svd(x, nu = 0) : infinite or missing values in 'x'