#delimit ;
version 6;
clear;
set trace off ;
set more off ;
set memory 4m;
if "$S_OS"=="Windows" { ;
local wmfr window manage forward results ;
local wmfg window manage forward graph ;
} ;
`wmfr';
noi di _n(4) in whi
_col(12) "Regression and so on" _n
_col(18) "Stanislav Kolenikov" _n
_col(24) "skolenik@recep.glasnet.ru" _n
_d(48) "-" _n(2) ;
* tutorial version: 2.1
global F9 `"tutorial aboutreg;"';
* change this line if you rename this file
noi di in gre _n(2)
"This tutorial can be discontinued at any time by pressing "
in wh "Break" in gr " (Ctrl-Break at" _n
"the keyboard or Break icon at the toolbar). Press "
in whi "Enter" in gre " or " in whi "Space" in gre
" when you see " _n in blu "--more--" in gre " message." _n(2)
"The tutorial can be restarted by pressing" in whi " F9" in gre "."
_n(4)
;
cap pro drop More ;
pro def More ;
set more on ;
version 2.1 ;
%_pause ;
more ;
set more off ;
* if "$S_OS"=="Windows" { ;
* tut_wait ;
* } ;
end ;
global SEPAR `" _n in whi _d(79) "-" _n in gre"';
More;
noi di _n $SEPAR in gre
"In this tutorial we shall discuss some regression diagnostic techniques" _n
"and some regression remedies such as heteroskedasticity correction and" _n
"transformation of the dependent variable towards normality." _n(2)
"If you are continuing or repeating this tutorial, input the number of the" _n
"part you stopped last time, or just press " in whi "Enter" in gre
" to continue:" _n(2)
"1. From the very beginning" _n
"2. First regression -- naive and primitive OLS" _n
"3. Regression diagnostics commands" _n
"4. Tests for heteroskedasticity" _n
"5. Tests for nonlinearity" _n
"6. Tests for multicollinearity" _n
"7. Normality of residuals" _n
"8. Heteroskedasticiy correction" _n
"9. Transformation towards normality for the dependent variable" _n(2)
"Now, please press the number of the section or "
in whi "Enter" in gre " to continue:" _n(2)
_req(regtut);
global regtut=ltrim("$regtut");
if "$regtut"~="" { ;
cap confirm integer number $regtut ;
if _rc~=0 { ;
di in red "Oh, I am crashing!.." _n in whi "Actually, restarting." _n(3) ;
More ;
noi tutorial aboutreg ;
exit ;
} ;
} ;
else { global regtut=1} ;
if "$regtut" == "" | $regtut==1 { ;
noi di in gre
"We shall be using Stata's auto data for our purpose to demonstrate what" _n
"can be done with the real world data." $SEPAR;
local stadir: sysdir STATA ;
noi di in whi `". use "`stadir'auto" "';
push use `"`stadir'auto"' ;
noi use `"`stadir'auto"' ;
cap rename headroom hdroom;
More;
noi di $SEPAR in gre
"Now, we want to investigate how the price of a car is determined. Our" _n
"basic hypothesis (in economic sense of the word, rather than in terms of" _n
"statistical hypotheses) would be that larger cars are likely to be more" _n
"expensive -- they are more comfortable, etc. Also, less fuel consuming" _n
"cars can be more attractive to customers. Finally, foreign cars might be" _n
"different in pricing due to tariffs or dumping.";
More;
noi di in gre _n
"We shall be using the following variables in our analysis:" $SEPAR
_n(2) in whi ". describe price weight mpg foreign";
push describe price weight mpg foreign;
More;
noi describe price weight mpg foreign;
noi di _n in whi ". codebook price";
push codebook price;
More;
noi codebook price;
noi di _n in whi ". codebook weight";
push codebook weight;
More;
noi codebook weight;
noi di _n in whi ". codebook mpg";
push codebook mpg;
More;
noi codebook mpg;
noi di _n in whi ". codebook foreign";
push codebook foreign;
More;
noi codebook foreign;
} /* end of if 0 */ ;
if "$regtut"=="" | $regtut<=2 { ;
if $regtut==2 { ;
local stadir: sysdir STATA ;
use `stadir'auto ;
cap rename headroom hdroom;
} ;
noi di _n(2) $SEPAR in gre "So, our first regression would be:" $SEPAR
_n(2) in whi ". regress price weight mpg foreign";
push regress price weight mpg foreign;
More;
noi regress price weight mpg foreign;
noi di _n(2) $SEPAR in gre
"We shall refer to it as regression (1) " _n(2)
"Now, what about the quality of the regression? We know that there are lots" _n
"of things that might influence the validity of statistical inference. E.g.," _n
"heteroskedasticity results in inefficient estimates (i.e. those with too" _n
"large standard errors); multicollinearity also increase the variance of" _n
"estimates; and latent nonlinearity can make the whole picture questionable" _n
"due to strong dependencies in the data, in particular, between the residuals" _n
"and predicted values." _n;
More;
} /* end of 1 */ ;
if "$regtut"=="" | $regtut<=3 { ;
if $regtut==3 { ;
local stadir: sysdir STATA ;
use `stadir'auto ;
cap rename headroom hdroom;
regress price weight mpg foreign;
noi di $SEPAR
"Apparently, you skipped the early part of the presentation. In that part," _n
"we loaded the auto data and run our first regression which is supposed to" _n
"be in Stata's memory." $SEPAR _n(3);
More;
} ;
noi di $SEPAR in gre
"Stata has a lot of regression diagnostic tools summarized in " in whi
"help regdiag" in gre _n
"section of Stata help. Take a minute to have a look at them." $SEPAR _n
in whi ". help regdiag";
push help regdiag;
set more on;
noi help regdiag;
More;
set more off;
noi di $SEPAR in gre
"With the results of the previuos estimation in memory, we can run a battery" _n
"of diagnostic tests about our regression (1). First of all, let's get a " _n
"visual impression of how things are going on. " in whi
"rvfplot" in gre
" plots residuals" _n
"vs. fitted values. This graph may show heteroskedasticity and some other" _n
"defects of the regression."
$SEPAR _n(2) in whi ". rvfplot, xlab ylab yline(0)";
push rvfplot, xlab ylab yline(0);
More;
noi rvfplot, xlab ylab yline(0);
version 2.1 ;
%_pause ;
version 6 ;
`wmfg' ;
More;
`wmfr' ;
noi di $SEPAR in gre
"The picture is far from satisfactory. The residuals are correlated with" _n
"the fitted values for small and medium prices (or, rather, fitted prices)" _n
"while at high prices, the variance of the residuals increases substantially" _n
"thus reassuring that residuals sum up to zero. So, we could suspect that" _n
"heteroskedasticity test would reject the null of homoskedasticity, and," _n
"possibly, some other regression tests would also show something bad." _n(2);
} /* end of if 3 */ ;
if "$regtut"=="" | $regtut<=4 { ;
if $regtut==4 { ;
local stadir: sysdir STATA ;
use `stadir'auto ;
cap rename headroom hdroom;
regress price weight mpg foreign;
noi di $SEPAR
"Apparently, you skipped the early part of the presentation. In that part," _n
"we loaded the auto data and run our first regression which is supposed to" _n
"be in Stata's memory." $SEPAR _n(3);
More;
noi di $SEPAR;
} ;
noi di in gre
"Let's first check heteroskedasticity with " in whi "hettest" in gre
" command." _n
"Essentially, it runs regression for log of squared residuals and performs" _n
"a test for this auxilary regression significance."
$SEPAR _n(2) in whi ". hettest";
push hettest;
More;
noi hettest;
noi di _n in whi ". hettest mpg rep78 hdroom trunk weight length turn displ gratio foreign";
push hettest mpg rep78 hdroom trunk weight length turn displ;
noi hettest mpg rep78 hdroom trunk weight length turn displ;
More;
noi di $SEPAR in gre
"Notice the two forms of " in whi "hettest" in gre " command. The first one, without arguments," _n
"is to use the fitted values in the auxilary regression, and second syntax" _n
"uses the arbitrary variables specified by the user." _n(2)
"Yes, we see that the null hypothesis of homoskedasticity is rejected at 1.2%" _n
"level. What else one should know about quality of the regression?" _n(2);
} /* end of if 4 */ ;
if "$regtut"=="" | $regtut<=5 { ;
if $regtut==5 { ;
local stadir: sysdir STATA ;
use `stadir'auto ;
cap rename headroom hdroom;
regress price weight mpg foreign;
noi di $SEPAR
"Apparently, you skipped the early part of the presentation. In that part," _n
"we loaded the auto data and run our first regression which is supposed to" _n
"be in Stata's memory." $SEPAR _n(3);
More;
} ;
noi di $SEPAR in gre
"Our next point would be non-linearity. We shall use Ramsey RESET test for" _n
"this purpose. The underlying regression has the same dependent variable and" _n
"a set of fitted values and their degrees." $SEPAR _n(2) in whi ". ovtest";
push ovtest;
More;
noi ovtest;
noi di in whi ". ovtest, rhs";
push ovtest, rhs;
noi ovtest, rhs;
More;
noi di $SEPAR in gre
"Things are bad in this respect, too. There is also a graphical way to " _n
"check for nonlinearity. The simplest way is to " in whi
"predict" in gre
" fitted values" _n
"and plot the actual values vs. fitted." $SEPAR
_n(2) in whi ". predict xb, xb" _n;
push predict xb, xb;
More;
noi predict xb, xb;
noi di in whi ". graph price xb xb, s(o.) c(.l)";
push graph price xb xb, s(o.) c(.l);
noi graph price xb xb, s(o.) c(.l);
`wmfg' ;
More;
`wmfr' ;
noi di $SEPAR in gre
"I would say that non-linearity is more or less evident: the data are more" _n
"likely to lie on a parabole rather than a straight line at 45 degrees." _n(2)
"A more advanced way to present underlying dependencies is to use non-" _n
"parametric regression. Stata has" in whi " ksm " in gre
"command which performs one" _n
"version of non-parametric regression, LOWESS." _n
"We shall however use another command available in Stata official addition,"
_n "Stata Technical Bulletin." $SEPAR _n(2) in whi ". search nonparametric regression";
push search nonparametric regression;
More;
noi search nonparametric regression;
More;
noi di $SEPAR in gre
"We are interested in the latter; let's check whether the program is" _n
"installed in your system." $SEPAR
_n(2) in whi ". which kernreg";
push which kernreg;
More;
noi cap noi which kernreg;
if _rc==111 { ;
noi di $SEPAR in gre
"You does not seem to have one. We shall try to install it over Internet"
$SEPAR;
noi di in whi ". net from http://www.stata.com" _n;
push net link from http://www.stata.com;
More;
noi cap noi net from http://www.stata.com;
if _rc==631 { ;
noi di $SEPAR in gre
"Hm... no Internet access... then we, unfortunately, would have to skip" _n
"piece with non-parametric regression." _n
local skipnp 1;
More;
} /* no internet access */ ;
else { ;
noi di in whi ". net cd stb" _n;
push net cd stb;
noi net cd stb;
noi di in whi ". net cd stb30";
push net cd stb30;
noi net cd stb30;
noi di in whi ". net install snp10";
push net install snp10;
noi net install snp10;
More;
} /* end of else internet access */ ;
} /* no kernreg */ ;
if "`skipnp'"=="" { ;
noi di $SEPAR in gre
"OK, what can we do with this non-parametric tool at hand?" _n(2)
"Essentially, non-parametric Nadaraya-Watson regression provides estimates" _n
"of the conditional mean which does not rely on any parametric assumptions." _n
"For each value of the dependent variable, it looks at adjacent points and" _n
"calculates the estimate as the weighted sum of those adjacent values." _n
"See W.Ha:rdle, Applied Nonparametric Regression, 1989." _n(2)
"In applying " in whi
"kernreg" in gre
" as it is designed for Stata, one needs to supply" _n
"the width of the kernel (i.e. the main smoothing parameter) which regulates" _n
"how many adjacent points would be taken into account, the shape of the " _n
"weighting function, and the number of points where the estimate is to be" _n
"calculated (the estimation is rather time consuming!)." $SEPAR _n(2)
in whi ". kernreg price xb, b(500) k(3) np(50)";
push kernreg price xb, b(500) k(3) np(50);
More;
noi kernreg price xb, b(500) k(3) np(50);
`wmfg' ;
More;
`wmfr' ;
noi di $SEPAR in gre
"It seems that we undersmoothed the data -- there are too many peaks and" _n
"troughs on the graph for our 74 data points. Let us inflate the kernel" _n
"width (" in whi "bandwidth" in gre " parameter of " in whi "kernreg"
in gre ")." $SEPAR _n(2) in whi ". kernreg price xb, b(5000) k(3) np(50)";
push kernreg price xb, b(5000) k(3) np(50);
More;
noi kernreg price xb, b(5000) k(3) np(50);
`wmfg' ;
More;
`wmfr' ;
noi di $SEPAR in gre
"Now, we rather oversmoothed things and got almost a straight line. Let's" _n
"take something in between." $SEPAR _n(2)
in whi ". kernreg price xb, b(1500) k(3) np(50)";
push kernreg price xb, b(1500) k(3) np(50);
More;
noi kernreg price xb, b(1500) k(3) np(50);
`wmfg' ;
More;
`wmfr' ;
noi di $SEPAR in gre
"OK, now we see something parabolic; let's also try to add the actual values" _n
"to assess the goodness of non-parametric fit. It is rather boring from " _n
"technical point of view, so we shall rather skip the details (though they" _n
"will be reflected in the command history)." $SEPAR; More;
push preserve;
preserve;
push g mv=.;
g mv=.;
push kernreg price xb, b(1500) k(3) np(50) nograph g(npred grid);
kernreg price xb, b(1500) k(3) np(50) nograph g(npred grid);
push stack xb price mv grid mv npred, clear into(fitted price kernel);
stack xb price mv grid mv npred, clear into(fitted price kernel);
push gra price kernel fitted fitted, s(o..) c(.ll) xlab ylab;
gra price kernel fitted fitted, s(o..) c(.ll) xlab ylab;
`wmfg' ;
More;
`wmfr' ;
noi di $SEPAR in gre
"Now we have the whole picture: actual, fitted prices, and the non-parametric" _n
"kernel estimate." $SEPAR _n(2)
;
} /* did not skip non-parametric piece */ ;
} /* end of if 5 */ ;
if "$regtut"=="" | $regtut<=6 { ;
if $regtut==6 { ;
local stadir: sysdir STATA ;
use `stadir'auto ;
cap rename headroom hdroom;
regress price weight mpg foreign;
noi di $SEPAR
"Apparently, you skipped the early part of the presentation. In that part," _n
"we loaded the auto data and run our first regression which is supposed to" _n
"be in Stata's memory." $SEPAR _n(3);
More;
preserve;
} ;
noi di _n(5) $SEPAR in gre
"Now, our next point is multicollinearity. It can be a problem when the" _n
"regressors are dependent (at least, statistically). In our case, we might" _n
"suspect that heavier cars consume more fuel (in absolute terms), and thus" _n
in whi "mpg" in gre " is likely to be correlated with " in whi "weight"
in gre "." $SEPAR;
push restore;
restore;
noi di _n in whi ". pwcorr weight mpg, sig";
push pwcorr weight mpg, sig; More;
noi pwcorr weight mpg, sig;
More;
noi di $SEPAR in gre
"That's it! The correlation is significant at... at a really seriuos level." _n(2)
"Let us remind you the regression (1) we are analysing. As long as we have" _n
"not run any other estimation command so far, we can just type " in whi
"regress" _n in gre "to reproduce our regression." $SEPAR;
noi di in whi ". regress";
push regress; More;
noi regress;
noi di $SEPAR in gre
"The influence of collinearity is assessed with " in whi
"vif" in gre
" command, which is" _n
"an abbreviation for Variance Inflation Factor. Let's try it now." $SEPAR;
noi di in whi ". vif";
push vif; More;
noi vif;
More;
noi di $SEPAR in gre
"Now, we see that the variance of" in whi " weight " in gre
"regression coefficient is" _n
"increased almost fourfold because of the collinearity. (This does not mean" _n
"that in order to get a better estimate, one simply needs to divide the" _n
"variance from the regression output or the estimate itself by this number," _n
"or, rather, square root of it, as VIF relates to variance!) Still," in whi
" weight" _n in gre
"is significant in our regression (1), so we need not bother too much about" _n
"effect of collinearity to the" in whi " weight " in gre
"coefficient estimate. The second" _n
"largest VIF is the one for" in whi " mpg" in gre
", and due to collinearity, the s.e. of" _n
"the respective estimate is inflated 1.7 times. It is usually said that" _n
"multicollinearity should be a concern when some VIFs are greater than 4," _n
"i.e. the standard error of the estimate is effectively doubled. So, "_n
"we shall conclude that we do not have to worry too much about" _n
"multicollinearity here." $SEPAR _n(3);
More;
noi di;
} ; /* end of if 6 */ ;
if "$regtut"=="" | $regtut<=7 { ;
if $regtut==7 { ;
local stadir: sysdir STATA ;
use `stadir'auto ;
cap rename headroom hdroom;
regress price weight mpg foreign;
noi di $SEPAR
"Apparently, you skipped the early part of the presentation. In that part," _n
"we loaded the auto data and run our first regression which is supposed to" _n
"be in Stata's memory." $SEPAR _n(3);
More;
} ;
noi di $SEPAR in gre
"Let us now pay some attention to the issue of residuals normality." _n
"In applied statistical analysis, one must care about normality, since" _n
"to get the correct p-value from F or chi2 statistics, one need to be" _n
"sure that the underlying distribution is normal (or at least close to" _n
"normal, in some sense)." _n(2)
"Non-normal residuals with tolerable variance can indicate that methods" _n
"other than OLS may be suitable to obtain efficient estimates." _n(2)
"We shall start with obtaining residuals."
$SEPAR;
More;
noi di _n in whi ". predict res, residuals";
push predict res, residuals; More;
noi predict res, residuals;
noi di $SEPAR in gre
"Note that Stata took care to label the new variable." _n(2)
"Stata has a number of tools to check the normality, and, as often, we" _n
"have analytical and graphical tools at our disposal." _n(2)
"The simplest graphical tool is a histogram with overlaid normal curve."
$SEPAR;
noi di in whi ". graph res, bin(15) norm xlab ylab freq";
push graph res, bin(15) norm xlab ylab freq;
More;
`wmfg' ;
graph res, bin(15) norm xlab ylab freq;
More;
`wmfr' ;
noi di $SEPAR
"Well, the results are not really satisfactorily; the data do not seem to" _n
"follow the normal pattern. Another way to obtain even nicer graph is to" _n
"use kernel density estimates." $SEPAR;
noi di in whi ". kdensity res, norm xlab(-4000 (2000) 8000) ylab(0 (5e-5) 0.00025)";
push kdensity res, norm xlab(-4000 (2000) 8000) ylab(0 (5e-5) 0.00025);
More;
noi kdensity res, norm xlab(-4000 (2000) 8000) ylab(0 (5e-5) 0.00025);
`wmfg' ;
More;
`wmfr' ;
noi di $SEPAR
"Finally, a popular way to check for normality is to use normal paper," _n
"quantile plots (" in whi "qnorm" in gre
"), or standardized normal probability plots (" in whi "pnorm" in gre ")." _n
"These plots show how the data should have behaved were they distributed" _n
"normally with the same mean and variance." $SEPAR;
noi di in whi ". qnorm res";
push qnorm res;
More;
noi qnorm res;
`wmfg' ;
More;
`wmfr' ;
noi di $SEPAR
"If the tails are curled clockwise from the line, it means that the tails" _n
"of the distribution are lighter than those for the normal (as with uniform)." _n
"If the tails are bent counterclockwise, they are heavier, as with Student" _n
"distribution. If they are bent in different directions, the distribution" _n
"is skewed, as it is our case." _n(3);
More;
noi di in gre
"The simplest analytical tool is skewness-kurtosis test,"
in whi " sktest" in gre "." _n
"It calculates the third and the fourth central moments and compares them" _n
"with those for normal distribution. Stata can also perform Kolmogorov-" _n
"Smirnov test to compare two distributions (see" in whi
" help ksmirnov " in gre "), but"
_n "its power is not that great when one has to estimate the parameters" _n
"of the distribution."
$SEPAR;
noi di in whi ". sktest res";
push sktest res; More;
noi sktest res;
noi di $SEPAR
"We see that analytical results confirm our suggestions of non-normality" _n
"based on the above plots." $SEPAR
_n(5);
More;
} /* end of if 7 */ ;
if "$regtut"=="" | $regtut<=8 { ;
if $regtut==8 { ;
local stadir: sysdir STATA ;
use `stadir'auto ;
cap rename headroom hdroom;
regress price weight mpg foreign;
noi di $SEPAR
"Apparently, you skipped the early part of the presentation. In that part," _n
"we loaded the auto data and run our first regression which is supposed to" _n
"be in Stata's memory." $SEPAR _n(3);
More;
} ;
noi di $SEPAR in gre
"What shall we do about all that?" _n(2)
"As we started our diagnostics with heteroskedasticity, let us now proceed" _n
"with the corrections for it." _n(2)
"Stata has a built-in means for heteroskedasticity consistent estimation" _n
"of variance for " in whi "regress" in gre " command. It is referred to as"
in whi " robust " in gre "option". _n
"This shortcut is not very convenient, as it can be mixed with robust" _n
"regression (" in whi "rreg" in gre
" in Stata) which is an absolutely different thing." _n
"The underlying estimator of variance is also known in econometrics as" _n
"White estimator, and in some other sciences, as Huber estimator, after" _n
"another author of the method, or as sandwich estimator, due to its" _n
"structure. This is how it works (we shall also remind the first estimation)." $SEPAR;
noi di in whi ". regress";
push regress;
noi regress;
noi di $SEPAR in gre
"Let us save the estimation results for furteher reference..." $SEPAR;
noi di in whi ". estimates hold reg1";
push estimates hold reg1; More;
noi estimates hold reg1;
noi di $SEPAR in gre
"... and do something to improve the quality of our regression." $SEPAR;
noi di _n in whi ". regress price weight mpg foreign, robust";
push regress price weight mpg foreign, robust; More;
noi regress price weight mpg foreign, robust;
More;
noi di $SEPAR in gre
"This would be called regression (2). The only things that changes is the" _n
"variance of the regression coefficient estimates, so almost all the" _n
"reservations made about the quality of the regression (1) also apply to" _n
"regression (2). Note also the absense of the ANOVA table for regression," _n
"as we no longer have residuals that can be treated equivalently for all" _n
"observations. We shall also save these results." $SEPAR _n(2);
noi di in whi ". estimates hold reg2";
push estimates hold reg2;
noi estimates hold reg2;
More;
noi di $SEPAR in gre
"Another way to deal with heteroskedasticity is to use FGLS estimator." _n
"There is no built-in option for that in Stata, but it does have a way" _n
"to specify observation weights (see " in whi "help weights" in gre
"). So, besides" _n
"a good model for variance, some data manipulation would be required." _n(2)
"First of all, we shall use the results of our regression (1), without" _n
in whi "robust" in gre " option, to get residuals and fitted values." _n
"Stata provides an easy way to do that via " in whi "predict" in gre
" command:"
$SEPAR;
noi di in whi ". estimates unhold reg1";
push estimates unhold reg1; More;
noi estimates unhold reg1;
noi di _n in whi ". predict fitted";
push predict fitted; More;
noi predict fitted;
noi di $SEPAR in gre
"Now, we shall be using squared residuals to approximate variance at each" _n
"point, and " in whi "ksm" in gre " command to smooth them out." $SEPAR;
noi di in whi ". g res2=res*res";
push g res2=res*res; More;
noi g res2=res*res;
noi di in whi _n `". lab var res2 "Squared residuals in regression(1)""';
push lab var res2 "Squared residuals in regression(1)";
noi lab var res2 "Squared residuals in regression(1)";
push estimates hold reg1;
estimates hold reg1;
noi di in whi _n ". ksm res2 fitted, lowess gen(predvar)";
push ksm res2 fitted, lowess gen(predvar); More;
noi ksm res2 fitted, lowess gen(predvar);
`wmfg' ;
More;
`wmfr' ;
noi di in whi _n `". lab var predvar "Variance for regression (1) predicted by ksm""';
push lab var predvar "Variance for regression (1) predicted by;
noi lab var predvar "Variance for regression (1) predicted by;
More;
noi di $SEPAR in gre
"We are now prepared for the regression with weights. This will be" _n
"regression (3)." $SEPAR;
noi di in whi ". regress price weight mpg foreign [aw=1/predvar]";
push regress price weight mpg foreign [aw=1/predvar]; More;
noi regress price weight mpg foreign [aw=1/predvar];
More;
noi di $SEPAR in gre
"Let us again remind regression (1) results here." $SEPAR;
noi di in whi ". estimates hold reg3";
push estimates hold reg3;
noi estimates hold reg3;
noi di in whi _n ". estimates unhold reg1";
push estimates unhold reg1;
noi estimates unhold reg1;
noi di in whi _n ". regress";
push regress; More;
noi regress;
noi di $SEPAR in gre
"(Of course we could have just typed " in whi
"regress price weight mpg foreign" in gre "," _n
"but then what did we save our results for?) In this case, it is not clear" _n
"whether weights helped or not. The sign of mpg changed, but the variable" _n
"is not significant, anyway. Overall R2 is liklely to be misleading here," _n
"as long as the residuals were reweighted (and, hence it is not clear what" _n
"is the meaning of the sum of squares)." $SEPAR;
push estimates hold reg1;
estimates hold reg1;
More;
} /* end of if 8 */ ;
if "$regtut"=="" | $regtut<=9 { ;
if $regtut==9 { ;
local stadir: sysdir STATA ;
use `stadir'auto ;
cap rename headroom hdroom;
regress price weight mpg foreign;
noi di $SEPAR
"Apparently, you skipped the early part of the presentation. In that part," _n
"we loaded the auto data and run our first regression which is supposed to" _n
"be in Stata's memory." $SEPAR;
More;
} ;
noi di _n(4) $SEPAR in gre
"Now, we shall try to deal, in some way, with nonnormality. Our dependent" _n
"variable, price, is rather far from being normal, which can be visualized" _n
"on the graph..." $SEPAR;
noi di in whi ". graph price, normal bin(15) xlab ylab freq" ;
push graph price, normal bin(15) xlab ylab freq; More;
noi graph price, normal bin(15) xlab ylab freq;
`wmfg' ;
More;
`wmfr' ;
noi di $SEPAR in gre
"or confirmed analytically." $SEPAR;
noi di in whi ". sktest price";
push sktest price; More;
noi sktest price;
More;
noi di $SEPAR in gre
"Surprisingly, the method proposed against non-normality, known as Box-Cox" _n
"transformation, may also cure some other regression ills such as" _n
"nonlinearity or heteroskedasticity, but, on the other hand, its" _n
"results are lacking transparent interpretation." _n(2) "Essentially, " in whi
"boxcox" in gre " Stata command performs (generalized) power" _n
"transformation of the dependent variable, and chooses automatically, with" _n
"maximum likelihood procedure, the necessary parameter.";
More;
noi di in gre _n
"In its simplest form, " in whi "boxcox" in gre " transforms a variable" _n
"towards something resemlbing normality:" $SEPAR;
noi di in whi ". boxcox price";
push boxcox price; More;
noi boxcox price;
noi di $SEPAR in gre
"With some detalization, we can have a look at the likelihood function" _n
"and calculate confidence intervals. Also, we would tell " in whi "boxcox"
in gre "to predict" _n "the values of the transformed variable." $SEPAR;
noi di in whi ". boxcox price, graph level(95) nolog generate(bcprice)";
push boxcox price, graph level(95) nolog generate(bcprice); More;
noi boxcox price, graph level(95) nolog generate(bcprice);
`wmfg' ;
More;
`wmfr' ;
noi di $SEPAR in gre
"Let's have a look whether the situation improved -- run our simplified" _n
"visual and analytical normality check." $SEPAR _n;
noi di in whi ". graph bcprice, normal bin(15) xlab ylab freq" ;
push graph bcprice, normal bin(15) xlab ylab freq; More;
noi graph bcprice, normal bin(15) xlab ylab freq;
`wmfg' ;
More;
`wmfr' ;
noi di _n in whi ". sktest bcprice";
push sktest bcprice; More;
noi sktest bcprice;
noi di $SEPAR in gre
"Now, things are going better: the graph can hardly be said to represent" _n
"the normal distribution, but the skewness-curtosis test for normality" _n
"based on the first four empirical moments does not reject the hypothesis" _n
"of normality." _n; More;
noi di in gre "We can now run a regression with normal LHS, but " in whi
"boxcox" in gre " can also be used" _n
"in regresssion context, if one specifies the list of the regressors."
$SEPAR;
noi di in whi ". boxcox price weight mpg foreign, g(bcpricer) level(95) nolog";
push boxcox price weight mpg foreign, g(bcpricer) level(95) nolog; More;
noi boxcox price weight mpg foreign, g(bcpricer) level(95) nolog;
noi di in whi ". regress";
push regress; More;
noi regress;
noi di $SEPAR in gre
"The results for the Box-Cox transformation with regression are not too much" _n
"different from what we had without the regression. As for regression itself," _n
"the first order results are not too much different from what we had before." _n
"mpg is again not significant, and R2 improves somewhat. To run regression" _n
"diagnostics tests, however, we would need to honestly run the regression" _n
"that we would call regression (4)." $SEPAR;
noi di _n in whi ". regress bcpricer weight mpg foreign";
push regress bcpricer weight mpg foreign; More;
noi regress bcpricer weight mpg foreign;
noi di $SEPAR in gre
"The results are, of course, identical. Now we shall run a short battery" _n
"of regression diagnostic tests that we are already familiar with." $SEPAR;
noi di in whi ". hettest";
push hettest; More;
noi hettest;
noi di _n in whi ". ovtest";
push ovtest; More;
noi ovtest;
noi di _n in whi ". predict bcres, res";
push predict bcres, res;
noi predict bcres, res;
noi di _n in whi `". lab var bcres "Residuals from regression (4)""';
push lab var bcres "Residuals from regression (4)";
noi lab var bcres "Residuals from regression (4)";
noi di _n in whi ". predict bcfit, xb";
push predict bcfit, xb;
noi predict bcfit, xb;
noi di in whi _n `". lab var bcfit "Fitted values from regression (4)""';
push lab var bcfit "Fitted values from regression (4)";
noi lab var bcfit "Fitted values from regression (4)";
noi di in whi _n ". ksm bcpricer bcfit, lowess";
push ksm bcpricer bcfit, lowess; More;
noi ksm bcpricer bcfit, lowess;
`wmfg' ;
More;
`wmfr' ;
noi di in whi _n ". sktest bcres";
push sktest bcres; More;
noi sktest bcres;
noi di $SEPAR in gre
"So, we see that the residuals in this regression can be considered as" _n
"normal and homoskedastic. That is not bad at all!" $SEPAR; More;
} /* end of if 9 */
noi di _n(8) in gre
"In this tutorial, some basic concepts related to regression diagnoistics" _n
"were considered. Identification of and measures against such regression" _n
"ills as heteroskedasticity, nonlinearity, and non-normality, were proposed." _n
"Do not forget to have a look at " in whi
"help regress" in gre " and " in whi "help regdiag" _n
"to know more about implemenation of regression related commands in Stata!" _n;
More;
noi di in gre _n(6)
"The presentation ends here. If you have any comments, send them to" _n
in blue "skolenik@recep.glasnet.ru" in gre". Thank you." _n(2)
;
* the end of the tutorial
global SEPAR;
exit;