sadII_lab13_mixedmodels_EM

2247 days ago by macieksk

library(flexmix) # http://cran.r-project.org/web/packages/flexmix/vignettes/flexmix-intro.pdf # http://en.wikibooks.org/wiki/Data_Mining_Algorithms_In_R/Clustering/Expectation_Maximization_(EM) 
       
Loading required package: lattice
Loading required package: modeltools
Loading required package: stats4
Loading required package: multcomp
Loading required package: mvtnorm
Loading required package: survival
Loading required package: splines
data("NPreg") head(NPreg) 
       
         x        yn yp yb class id1 id2
1 4.176633 22.380379  4  0     1   1   1
2 1.201631  5.111575  3  0     1   1   1
3 2.295006 13.251058  9  0     1   2   1
4 5.965868 30.285240  3  1     1   2   1
5 2.358083 14.764508  4  0     1   3   2
6 7.637061 42.833760  1  1     1   3   2
png(w=1200) xyplot(yn+yp+yb~x,data=NPreg,groups=class, scale=list(y="free")) dev.off() 
       
null device 
          1 
m1 <- flexmix(yn ~ x + I(x^2), data = NPreg,k=2) m1 
       
Call:
flexmix(formula = yn ~ x + I(x^2), data = NPreg, k = 2)

Cluster sizes:
  1   2 
100 100 

convergence after 15 iterations
summary(m1) 
       
Call:
flexmix(formula = yn ~ x + I(x^2), data = NPreg, k = 2)

       prior size post>0 ratio
Comp.1 0.506  100    141 0.709
Comp.2 0.494  100    145 0.690

'log Lik.' -642.5453 (df=9)
AIC: 1303.091   BIC: 1332.775 
names(attributes(m1)) 
       
 [1] "group"       "formula"     "posterior"   "weights"     "iter" 
"cluster"    
 [7] "logLik"      "df"          "control"     "size"       
"converged"   "k0"         
[13] "model"       "prior"       "components"  "concomitant" "call" 
"k"          
[19] "class"      
m1@components 
       
$Comp.1
$Comp.1[[1]]
$coef
(Intercept)           x      I(x^2) 
 14.7169997   9.8467881  -0.9683692 

$sigma
[1] 3.479486



$Comp.2
$Comp.2[[1]]
$coef
(Intercept)           x      I(x^2) 
 -0.2097691   4.8179839   0.0361377 

$sigma
[1] 3.476714
xyplot(yn~x,data=NPreg,groups=m1@cluster) xyplot(yn~x,data=NPreg,groups=cut(m1@posterior$scaled[,1],5),auto.key=TRUE) dev.off() 
       
null device 
          1 

# Sprawdzamy statystyczna istotnosc parametrow rm1 <- refit(m1) summary(rm1) 
       
$Comp.1
            Estimate Std. Error z value  Pr(>|z|)    
(Intercept) 14.71317    1.32377  11.115 < 2.2e-16 ***
x            9.84833    0.59158  16.647 < 2.2e-16 ***
I(x^2)      -0.96852    0.05526 -17.526 < 2.2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

$Comp.2
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -0.208527   1.009206 -0.2066   0.8363    
x            4.814469   0.509610  9.4474   <2e-16 ***
I(x^2)       0.036540   0.049755  0.7344   0.4627    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
# Teraz inny model rozproszenia - poisson m1p <- flexmix(yp ~ x, data = NPreg, k = 2, model = FLXMRglm(family = "poisson")) summary(m1p) 
       
Call:
flexmix(formula = yp ~ x, data = NPreg, k = 2, model =
FLXMRglm(family = "poisson"))

       prior size post>0 ratio
Comp.1 0.468   88    200 0.440
Comp.2 0.532  112    197 0.569

'log Lik.' -440.6426 (df=5)
AIC: 891.2853   BIC: 907.7769 
lapply(1:2,function(com) parameters(m1p, component = com)) 
       
[[1]]
                     Comp.1
coef.(Intercept) 1.24857482
coef.x           0.06390134

[[2]]
                    Comp.2
coef.(Intercept)  1.870674
coef.x           -0.164047
# Narysuj wynik jakosci dopasowania modelu Poissona 
       
############## Grzebiemy w silniku EM # http://cran.r-project.org/web/packages/flexmix/vignettes/flexmix-intro.pdf # str. 13 
       
mymclust <- function ( formula = .~. , diagonal = TRUE ) { require ("mvtnorm") retval <- new ("FLXMC", weighted = TRUE ,formula = formula , dist = "mvnorm", name = "my model - based clustering") retval@defineComponent <- expression ({ logLik <- function (x , y) { #Multivariate Normal distribution density dmvnorm (y , mean = center , sigma = cov , log = TRUE ) } predict <- function (x) { matrix ( center , nrow = nrow (x), ncol = length ( center ), byrow = TRUE ) } new ("FLXcomponent", parameters = list ( center = center , cov = cov ), df = df , logLik = logLik , predict = predict ) }) retval@fit <- function (x , y , w , ...) { para <- cov.wt (y , wt = w )[ c ("center", "cov")] #Weighted covariance df <- (3 * ncol (y) + ncol (y )^2)/2 if ( diagonal ) { para$cov <- diag ( diag ( para$cov )) df <- 2 * ncol (y) } with ( para , eval ( retval@defineComponent )) } retval } 
       
?dmvnorm 
       
Mvnorm                 package:mvtnorm                 R
Documentation

_M_u_l_t_i_v_a_r_i_a_t_e _N_o_r_m_a_l
_D_e_n_s_i_t_y _a_n_d _R_a_n_d_o_m
_D_e_v_i_a_t_e_s

_D_e_s_c_r_i_p_t_i_o_n:

     These functions provide the density function and a random
number
     generator for the multivariate normal distribution with mean
equal
     to ‘mean’ and covariance matrix ‘sigma’.

_U_s_a_g_e:

     dmvnorm(x, mean, sigma, log=FALSE)
     rmvnorm(n, mean = rep(0, nrow(sigma)), sigma =
diag(length(mean)),
             method=c("eigen", "svd", "chol"))
     
_A_r_g_u_m_e_n_t_s:

       x: Vector or matrix of quantiles. If ‘x’ is a matrix, each
row
          is taken to be a quantile.

       n: Number of observations.

    mean: Mean vector, default is ‘rep(0, length = ncol(x))’.

   sigma: Covariance matrix, default is ‘diag(ncol(x))’.

     log: Logical; if ‘TRUE’, densities d are given as log(d).

  method: Matrix decomposition used to determine the matrix root of
          ‘sigma’, possible methods are eigenvalue decomposition
          (‘"eigen"’, default), singular value decomposition
(‘"svd"’),
          and Cholesky decomposition (‘"chol"’).

_A_u_t_h_o_r(_s):

     Friedrich Leisch and Fabian Scheipl

_S_e_e _A_l_s_o:

     ‘pmvnorm’, ‘rnorm’, ‘qmvnorm’

_E_x_a_m_p_l_e_s:

     dmvnorm(x=c(0,0))
     dmvnorm(x=c(0,0), mean=c(1,1))
     
     sigma <- matrix(c(4,2,2,3), ncol=2)
     x <- rmvnorm(n=500, mean=c(1,2), sigma=sigma)
     colMeans(x)
     var(x)
     
     x <- rmvnorm(n=500, mean=c(1,2), sigma=sigma, method="chol")
     colMeans(x)
     var(x)
     
     plot(x)
     
data("Nclus") Nclus<-as.data.frame(Nclus) xyplot(V1~V2,data=Nclus) dev.off() 
       
null device 
          1 
m2 <- flexmix(as.matrix(Nclus) ~ 1, k = 4, # Modelujemy dane jako zbior 4ech klastrow o stalych centrach data=Nclus, model = mymclust(diagonal=TRUE)) # Diagonal==TRUE --> model rozproszen jest symetryczny wokol centrow m2 summary(m2) 
       
Warning message:
closing unused connection 4
(/srv/nbuser/.sage//temp/sage/18269//interface//tmp18270) 

Call:
flexmix(formula = as.matrix(Nclus) ~ 1, data = Nclus, k = 4, model =
mymclust(diagonal = TRUE))

Cluster sizes:
  1   2   3   4 
213 149  92  96 

convergence after 175 iterations

Call:
flexmix(formula = as.matrix(Nclus) ~ 1, data = Nclus, k = 4, model =
mymclust(diagonal = TRUE))

       prior size post>0 ratio
Comp.1 0.397  213    488 0.436
Comp.2 0.269  149    174 0.856
Comp.3 0.159   92    165 0.558
Comp.4 0.175   96    172 0.558

'log Lik.' -2447.299 (df=19)
AIC: 4932.599   BIC: 5014.487 
xyplot(V2~V1,data=Nclus,groups=m2@cluster) dev.off() 
       
null device 
          1 
m3 <- flexmix(as.matrix(Nclus) ~ 1, k = 4, # Modelujemy dane jako zbior 4ech klastrow o stalych centrach data=Nclus, model = mymclust(diagonal=FALSE)) summary(m3) 
       
Call:
flexmix(formula = as.matrix(Nclus) ~ 1, data = Nclus, k = 4, model =
mymclust(diagonal = FALSE))

       prior size post>0 ratio
Comp.1 0.177   96    132 0.727
Comp.2 0.368  204    247 0.826
Comp.3 0.182  100    112 0.893
Comp.4 0.272  150    176 0.852

'log Lik.' -2223.677 (df=23)
AIC: 4493.355   BIC: 4592.483 
xyplot(V2 ~ V1, data=Nclus, groups=m3@cluster) dev.off() 
       
null device 
          1 
# Zadanie: Przerob funkcje mymclust tak aby dopasowywala model rozproszenia z rozkladu t-studenta (funkcja gestosci: dmvt) # *: niech df rozkladu t-studenta takze dopasowuje sie podczas ewaluacji modelu. - sprawdz jaki to ma wplyw na wyniki # Podpowiedz: do estymacji parametrow chmury wielowymiarowej rozkladu t (zamiast "cov.wt") uzyj funkcji library(sn) ?mst.mle 
       
WARNING: Output truncated!  
full_output.txt







mst.mle                   package:sn                   R
Documentation

_M_a_x_i_m_u_m _l_i_k_e_l_i_h_o_o_d
_e_s_t_i_m_a_t_i_o_n _f_o_r _a
(_m_u_l_t_i_v_a_r_i_a_t_e) _s_k_e_w-_t
_d_i_s_t_r_i_b_u_t_i_o_n

_D_e_s_c_r_i_p_t_i_o_n:

     Fits a skew-t (ST) or multivariate skew-t (MST) distribution to
     data, or fits a linear regression model with (multivariate)
skew-t
     errors, using maximum likelihood estimation.

_U_s_a_g_e:

     mst.mle(X, y, freq, start, fixed.df=NA, trace=FALSE,
        algorithm = c("nlminb","Nelder-Mead", "BFGS", "CG", "SANN"),
control=list())
     st.mle(X, y, freq, start, fixed.df=NA, trace=FALSE,
        algorithm = c("nlminb","Nelder-Mead", "BFGS", "CG", "SANN"),
control=list())
     
_A_r_g_u_m_e_n_t_s:

       y: a matrix (for ‘mst.mle’) or a vector (for ‘st.mle’).  If
‘y’
          is a matrix, rows refer to observations, and columns to
          components of the multivariate distribution.

       X: a matrix of covariate values.  If missing, a one-column
          matrix of 1's is created; otherwise, it must have the same
          number of rows of ‘y’.  If ‘X’ is supplied, then it must
          include a column of 1's.

    freq: a vector of weights. If missing, a vector of 1's is
created;
          otherwise it must have length equal to the number of rows
of
          ‘y’.

   start: for ‘mst.mle’, a list contaning the components
          ‘beta’,‘Omega’, ‘alpha’, ‘df’ of the type described below;
          for ‘st.mle’, a vector whose components contain analogous
          ingredients as before, with the exception that the scale
          parameter is the square root of ‘Omega’.  In both cases,
the
          ‘dp’ component of the returned list from a previous call
has
          the required format and it can be used as a new ‘start’.
If
          the ‘start’ parameter is missing, initial values are
selected
          by the function.

fixed.df: a scalar value containing the degrees of freedom (df), if
          these must be taked as fixed, or ‘NA’ (default value) if
‘df’
          is a parameter to be estimated.

   trace: logical value which controls printing of the algorithm
          convergence. If ‘trace=TRUE’, details are printed. Default
          value is ‘FALSE’.

algorithm: a character string which selects the numerical
optimization
          procedure used to maximize the loglikelihood function. If
          this string is set equal to ‘"nlminb"’, then this function
is
          called; in all other cases, ‘optim’ is called, with
‘method’
          set equal to the given string. Default value is
‘"nlminb"’.

...

    call: a string containing the calling statement.

      dp: for ‘mst.mle’, this is a list containing the direct
          parameters ‘beta’, ‘Omega’, ‘alpha’. Here, ‘beta’ is a
matrix
          of regression coefficients with
          ‘dim(beta)=c(ncol(X),ncol(y))’, ‘Omega’ is a covariance
          matrix of order ‘ncol(y)’, ‘alpha’ is a vector of shape
          parameters of length ‘ncol(y)’.  For ‘st.mle’, ‘dp’ is a
          vector of length ‘ncol(X)+3’, containing ‘c(beta, omega,
          alpha, df)’, where ‘omega’ is the square root of ‘Omega’.

      se: a list containing the components ‘beta’, ‘alpha’, ‘info’.
          Here, ‘beta’ and ‘alpha’ are the standard errors for the
          corresponding point estimates; ‘info’ is the observed
          information matrix for the working parameter, as explained
          below.

algorithm: the list returned by the chose optimizer, either ‘nlminb’
or
          ‘optim’, plus an item with the ‘name’ of the selected
          algorithm; see the documentation of either ‘nlminb’ or
          ‘optim’ for explanation of the other components.

_B_a_c_k_g_r_o_u_n_d:

     The family of multivariate skew-t distributions is an extension
of
     the multivariate Student's t family, via the introduction of a
     ‘shape’ parameter which regulates skewness; when ‘shape=0’, the
     skew-t distribution reduces to the usual t distribution.  When
     ‘df=Inf’ the distribution reduces to the multivariate
skew-normal
     one; see ‘dmsn’. See the reference below for additional
     information.

_R_e_f_e_r_e_n_c_e_s:

     Azzalini, A. and Capitanio, A. (2003).  Distributions generated
by
     perturbation of symmetry with emphasis on a multivariate skew
_t_
     distribution.  The full version of the paper published in
abriged
     form in _J.Roy. Statist. Soc. B_ *65*, 367-389, is available at
     <URL: http://azzalini.stat.unipd.it/SN/se-ext.ps>

_S_e_e _A_l_s_o:

     ‘dmst’,‘msn.mle’,‘mst.fit’, ‘nlminb’, ‘optim’

_E_x_a_m_p_l_e_s:

     data(ais, package="sn")
     attach(ais)
     X.mat <- model.matrix(~lbm+sex)
     b <- sn.mle(X.mat, bmi)
     # 
     b <- mst.mle(y=cbind(Ht,Wt))
     #
     # a multivariate regression case:
     a <- mst.mle(X=cbind(1,Ht,Wt), y=bmi,
control=list(x.tol=1e-6))
     #
     # refine the previous outcome
     a1 <- mst.mle(X=cbind(1,Ht,Wt), y=bmi,
control=list(x.tol=1e-9), start=a$dp)
     
# Wykonajcie cwiczenia z #http://cran.r-project.org/web/packages/flexmix/vignettes/regression-examples.pdf # Beta blockers rozdzial 3.2, mozna przeczytac 3.1 dla wprowadzenia