quadstat-8.x-1.x-dev/quadstat.install
quadstat.install
<?php use \Drupal\node\Entity\Node; use \Drupal\file\Entity\File; use Drupal\comment\Plugin\Field\FieldType\CommentItemInterface; /** * @file * Install, update and uninstall hooks for the Quadstat install profile. */ /** * Implements hook_install(). * * Perform actions to set up the site for this profile. * * @see system_install() */ function quadstat_install() { // First, do everything that is done in the standard profile. include_once DRUPAL_ROOT . '/core/profiles/standard/standard.install'; standard_install(); // Add code here to make nodes, terms, etc. \Drupal::configFactory()->getEditable('system.theme')->set('default','seven')->save(); // Create node object with attached file. $node = Node::create([ 'type' => 'article', 'title' => 'Thank you for installing Quadstat', 'uid' => 1, 'body' => [ 'value' => '<p>Remember to <a href="https://www.drupal.org/docs/8/modules/quadstat/configuring-quadstat">configure Quadstat</a> before using.</p><p>Please <a href="https://www.drupal.org/project/issues/quadstat">report</a> any issues.</p><p>Help support <a href="https://www.r-project.org/foundation/donations.html">R</a>.</p>', 'format' => 'full_html', ], 'comment' => [ 'status' => CommentItemInterface::CLOSED ] ]); $node->save(); \Drupal::configFactory()->getEditable('system.site')->set('page.front', '/node/1')->save(); for($i = 0; $i < 5; $i++) { _quadstat_default_content(); } $query = \Drupal::database()->update('file_managed'); $query->fields(['uid' => 1]); $query->execute(); // Create node object with attached file. $node = Node::create([ 'type' => 'application', 'title' => 'rnorm', 'uid' => 1, 'webform' => [ 'target_id' => 'rnorm', 'status' => 'open' ], 'field_application_help' => [ 'value' => _quadstat_rnorm_help(), 'summary' => 'Generates random samples from a normal distribution', 'format' => 'full_html' ] ]); $node->save(); // Create node object with attached file. $node = Node::create([ 'type' => 'application', 'title' => 'plot', 'uid' => 1, 'webform' => [ 'target_id' => 'plot', 'status' => 'open' ], 'field_application_help' => [ 'value' => _quadstat_plot_help(), 'summary' => 'Plot two dataset vectors', 'format' => 'full_html' ] ]); $node->save(); // Create node object with attached file. $node = Node::create([ 'type' => 'application', 'title' => 'stem', 'uid' => 1, 'webform' => [ 'target_id' => 'stem', 'status' => 'open' ], 'field_application_help' => [ 'value' => _quadstat_stem_help(), 'summary' => 'Stem-and-leaf plot', 'format' => 'full_html' ] ]); $node->save(); // Create node object with attached file. $node = Node::create([ 'type' => 'application', 'title' => 'cor', 'uid' => 1, 'webform' => [ 'target_id' => 'cor', 'status' => 'open' ], 'field_application_help' => [ 'value' => _quadstat_cor_help(), 'summary' => 'Compute the correlation coefficient between two variables', 'format' => 'full_html' ] ]); $node->save(); // Create node object with attached file. $node = Node::create([ 'type' => 'application', 'title' => 'lm', 'uid' => 1, 'webform' => [ 'target_id' => 'lm', 'status' => 'open' ], 'field_application_help' => [ 'value' => _quadstat_lm_help(), 'summary' => 'Compute regression slope and y-intercept', 'format' => 'full_html' ] ]); $node->save(); } function _quadstat_default_content() { $user = \Drupal\user\Entity\User::load(1); $data = ''; // Populate for($i = 0; $i < 10; $i++) { for($j = 0; $j < 10; $j++) { $data .= rand(0, 100) . ","; } // Row number and data row $data = rtrim($data, ',') . "\n"; } // Add header column to beginning $header = ''; for($i = 1; $i <= 10; $i++) { $header .= '"' . $i . '",'; } $data = rtrim($header, ',') . "\n" . $data; // Save the newly created CSV dataset $dir = 'public://datasets/admin'; if(file_prepare_directory($dir, FILE_CREATE_DIRECTORY)) { $datafile = file_save_data($data, $dir . '/' . rand(10000,99999) . '.csv'); } // Create node object with attached file. $node = Node::create([ 'type' => 'dataset', 'title' => 'Random Dataset', 'field_dataset_file' => [ 'target_id' => $datafile->get('fid')->get(0)->value, 'format' => 'full_html' ], 'field_dataset_buttons' => '<div id="quadstat-slickgrid-buttons"><input type="button" value="Add Row" id="quadstat-slickgrid-add-row" class="button" /><input type="button" value="Add Column" id="quadstat-slickgrid-add-col" class="button" /></div>', 'field_dataset_grid' => [ 'value' => '<div id="quadstat-slickgrid"></div>', 'format' => 'full_html' ], 'uid' => 1 ]); $node->save(); } function _quadstat_rnorm_help() { $help = <<<EOT <h2>The (non-central) Chi-Squared Distribution</h2> <h3>Description</h3> <p>Density, distribution function, quantile function and random generation for the chi-squared (<i>chi^2</i>) distribution with <code>df</code> degrees of freedom and optional non-centrality parameter <code>ncp</code>. </p> <h3>Usage</h3> <pre>dchisq(x, df, ncp = 0, log = FALSE) pchisq(q, df, ncp = 0, lower.tail = TRUE, log.p = FALSE) qchisq(p, df, ncp = 0, lower.tail = TRUE, log.p = FALSE) rchisq(n, df, ncp = 0) </pre> <h3>Arguments</h3> <table summary="R argblock"> <tbody><tr valign="top"><td><code>x, q</code></td> <td> <p>vector of quantiles.</p> </td></tr> <tr valign="top"><td><code>p</code></td> <td> <p>vector of probabilities.</p> </td></tr> <tr valign="top"><td><code>n</code></td> <td> <p>number of observations. If <code>length(n) > 1</code>, the length is taken to be the number required.</p> </td></tr> <tr valign="top"><td><code>df</code></td> <td> <p>degrees of freedom (non-negative, but can be non-integer).</p> </td></tr> <tr valign="top"><td><code>ncp</code></td> <td> <p>non-centrality parameter (non-negative).</p> </td></tr> <tr valign="top"><td><code>log, log.p</code></td> <td> <p>logical; if TRUE, probabilities p are given as log(p).</p> </td></tr> <tr valign="top"><td><code>lower.tail</code></td> <td> <p>logical; if TRUE (default), probabilities are <i>P[X ≤ x]</i>, otherwise, <i>P[X > x]</i>.</p> </td></tr> </tbody></table> <h3>Details</h3> <p>The chi-squared distribution with <code>df</code><i>= n ≥ 0</i> degrees of freedom has density </p> <p style="text-align: center;"><i>f_n(x) = 1 / (2^(n/2) Γ(n/2)) x^(n/2-1) e^(-x/2)</i></p> <p>for <i>x > 0</i>. The mean and variance are <i>n</i> and <i>2n</i>. </p> <p>The non-central chi-squared distribution with <code>df</code><i>= n</i> degrees of freedom and non-centrality parameter <code>ncp</code> <i>= λ</i> has density </p> <p style="text-align: center;"><i>f(x) = exp(-λ/2) SUM_{r=0}^∞ ((λ/2)^r / r!) dchisq(x, df + 2r) </i></p> <p>for <i>x ≥ 0</i>. For integer <i>n</i>, this is the distribution of the sum of squares of <i>n</i> normals each with variance one, <i>λ</i> being the sum of squares of the normal means; further, <br> <i>E(X) = n + λ</i>, <i>Var(X) = 2(n + 2*λ)</i>, and <i>E((X - E(X))^3) = 8(n + 3*λ)</i>. </p> <p>Note that the degrees of freedom <code>df</code><i>= n</i>, can be non-integer, and also <i>n = 0</i> which is relevant for non-centrality <i>λ > 0</i>, see Johnson <em>et al</em> (1995, chapter 29). In that (noncentral, zero df) case, the distribution is a mixture of a point mass at <i>x = 0</i> (of size <code>pchisq(0, df=0, ncp=ncp)</code> and a continuous part, and <code>dchisq()</code> is <em>not</em> a density with respect to that mixture measure but rather the limit of the density for <i>df -> 0</i>. </p> <p>Note that <code>ncp</code> values larger than about 1e5 may give inaccurate results with many warnings for <code>pchisq</code> and <code>qchisq</code>. </p> <h3>Value</h3> <p><code>dchisq</code> gives the density, <code>pchisq</code> gives the distribution function, <code>qchisq</code> gives the quantile function, and <code>rchisq</code> generates random deviates. </p> <p>Invalid arguments will result in return value <code>NaN</code>, with a warning. </p> <p>The length of the result is determined by <code>n</code> for <code>rchisq</code>, and is the maximum of the lengths of the numerical arguments for the other functions. </p> <p>The numerical arguments other than <code>n</code> are recycled to the length of the result. Only the first elements of the logical arguments are used. </p> <h3>Note</h3> <p>Supplying <code>ncp = 0</code> uses the algorithm for the non-central distribution, which is not the same algorithm used if <code>ncp</code> is omitted. This is to give consistent behaviour in extreme cases with values of <code>ncp</code> very near zero. </p> <p>The code for non-zero <code>ncp</code> is principally intended to be used for moderate values of <code>ncp</code>: it will not be highly accurate, especially in the tails, for large values. </p> <h3>Source</h3> <p>The central cases are computed via the gamma distribution. </p> <p>The non-central <code>dchisq</code> and <code>rchisq</code> are computed as a Poisson mixture central of chi-squares (Johnson <em>et al</em>, 1995, p.436). </p> <p>The non-central <code>pchisq</code> is for <code>ncp < 80</code> computed from the Poisson mixture of central chi-squares and for larger <code>ncp</code> <em>via</em> a C translation of </p> <p>Ding, C. G. (1992) Algorithm AS275: Computing the non-central chi-squared distribution function. <em>Appl.Statist.</em>, <b>41</b> 478–482. </p> <p>which computes the lower tail only (so the upper tail suffers from cancellation and a warning will be given when this is likely to be significant). </p> <p>The non-central <code>qchisq</code> is based on inversion of <code>pchisq</code>. </p> <h3>References</h3> <p>Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) <em>The New S Language</em>. Wadsworth & Brooks/Cole. </p> <p>Johnson, N. L., Kotz, S. and Balakrishnan, N. (1995) <em>Continuous Univariate Distributions</em>, chapters 18 (volume 1) and 29 (volume 2). Wiley, New York. </p> <h3>See Also</h3> <p><a href="Distributions.html">Distributions</a> for other standard distributions. </p> <p>A central chi-squared distribution with <i>n</i> degrees of freedom is the same as a Gamma distribution with <code>shape</code> <i>a = n/2</i> and <code>scale</code> <i>s = 2</i>. Hence, see <code><a href="GammaDist.html">dgamma</a></code> for the Gamma distribution. </p> <h3>Examples</h3> <pre>require(graphics) dchisq(1, df = 1:3) pchisq(1, df = 3) pchisq(1, df = 3, ncp = 0:4) # includes the above x <- 1:10 ## Chi-squared(df = 2) is a special exponential distribution all.equal(dchisq(x, df = 2), dexp(x, 1/2)) all.equal(pchisq(x, df = 2), pexp(x, 1/2)) ## non-central RNG -- df = 0 with ncp > 0: Z0 has point mass at 0! Z0 <- rchisq(100, df = 0, ncp = 2.) graphics::stem(Z0) ## visual testing ## do P-P plots for 1000 points at various degrees of freedom L <- 1.2; n <- 1000; pp <- ppoints(n) op <- par(mfrow = c(3,3), mar = c(3,3,1,1)+.1, mgp = c(1.5,.6,0), oma = c(0,0,3,0)) for(df in 2^(4*rnorm(9))) { plot(pp, sort(pchisq(rr <- rchisq(n, df = df, ncp = L), df = df, ncp = L)), ylab = "pchisq(rchisq(.),.)", pch = ".") mtext(paste("df = ", formatC(df, digits = 4)), line = -2, adj = 0.05) abline(0, 1, col = 2) } mtext(expression("P-P plots : Noncentral "* chi^2 *"(n=1000, df=X, ncp= 1.2)"), cex = 1.5, font = 2, outer = TRUE) par(op) ## "analytical" test lam <- seq(0, 100, by = .25) p00 <- pchisq(0, df = 0, ncp = lam) p.0 <- pchisq(1e-300, df = 0, ncp = lam) stopifnot(all.equal(p00, exp(-lam/2)), all.equal(p.0, exp(-lam/2))) </pre> EOT; return $help; } function _quadstat_plot_help() { $help = <<<EOT <h2>Generic X-Y Plotting</h2> <h3>Description</h3> <p>Generic function for plotting of <span style="font-family: Courier New, Courier; color: #666666;"><b>R</b></span> objects. For more details about the graphical parameter arguments, see <code><a href="par.html">par</a></code>. </p> <p>For simple scatter plots, <code><a href="plot.default.html">plot.default</a></code> will be used. However, there are <code>plot</code> methods for many <span style="font-family: Courier New, Courier; color: #666666;"><b>R</b></span> objects, including <code><a href="../../base/html/function.html">function</a></code>s, <code><a href="../../base/html/data.frame.html">data.frame</a></code>s, <code><a href="../../stats/html/density.html">density</a></code> objects, etc. Use <code>methods(plot)</code> and the documentation for these. </p> <h3>Usage</h3> <pre>plot(x, y, ...) </pre> <h3>Arguments</h3> <table summary="R argblock"> <tbody><tr valign="top"><td><code>x</code></td> <td> <p>the coordinates of points in the plot. Alternatively, a single plotting structure, function or <em>any <span style="font-family: Courier New, Courier; color: #666666;"><b>R</b></span> object with a <code>plot</code> method</em> can be provided.</p> </td></tr> <tr valign="top"><td><code>y</code></td> <td> <p>the y coordinates of points in the plot, <em>optional</em> if <code>x</code> is an appropriate structure.</p> </td></tr> <tr valign="top"><td><code>...</code></td> <td> <p>Arguments to be passed to methods, such as <a href="par.html">graphical parameters</a> (see <code><a href="par.html">par</a></code>). Many methods will accept the following arguments: </p> <dl> <dt><code>type</code></dt><dd><p>what type of plot should be drawn. Possible types are </p> <ul> <li> <p><code>"p"</code> for <b>p</b>oints, </p> </li> <li> <p><code>"l"</code> for <b>l</b>ines, </p> </li> <li> <p><code>"b"</code> for <b>b</b>oth, </p> </li> <li> <p><code>"c"</code> for the lines part alone of <code>"b"</code>, </p> </li> <li> <p><code>"o"</code> for both ‘<b>o</b>verplotted’, </p> </li> <li> <p><code>"h"</code> for ‘<b>h</b>istogram’ like (or ‘high-density’) vertical lines, </p> </li> <li> <p><code>"s"</code> for stair <b>s</b>teps, </p> </li> <li> <p><code>"S"</code> for other <b>s</b>teps, see ‘Details’ below, </p> </li> <li> <p><code>"n"</code> for no plotting. </p> </li></ul> <p>All other <code>type</code>s give a warning or an error; using, e.g., <code>type = "punkte"</code> being equivalent to <code>type = "p"</code> for S compatibility. Note that some methods, e.g. <code><a href="plot.factor.html">plot.factor</a></code>, do not accept this. </p> </dd> <dt><code>main</code></dt><dd><p>an overall title for the plot: see <code><a href="title.html">title</a></code>.</p> </dd> <dt><code>sub</code></dt><dd><p>a sub title for the plot: see <code><a href="title.html">title</a></code>.</p> </dd> <dt><code>xlab</code></dt><dd><p>a title for the x axis: see <code><a href="title.html">title</a></code>.</p> </dd> <dt><code>ylab</code></dt><dd><p>a title for the y axis: see <code><a href="title.html">title</a></code>.</p> </dd> <dt><code>asp</code></dt><dd><p>the <i>y/x</i> aspect ratio, see <code><a href="plot.window.html">plot.window</a></code>.</p> </dd> </dl> </td></tr> </tbody></table> <h3>Details</h3> <p>The two step types differ in their x-y preference: Going from <i>(x1,y1)</i> to <i>(x2,y2)</i> with <i>x1 < x2</i>, <code>type = "s"</code> moves first horizontal, then vertical, whereas <code>type = "S"</code> moves the other way around. </p> <h3>See Also</h3> <p><code><a href="plot.default.html">plot.default</a></code>, <code><a href="plot.formula.html">plot.formula</a></code> and other methods; <code><a href="points.html">points</a></code>, <code><a href="lines.html">lines</a></code>, <code><a href="par.html">par</a></code>. For thousands of points, consider using <code><a href="smoothScatter.html">smoothScatter</a>()</code> instead of <code>plot()</code>. </p> <p>For X-Y-Z plotting see <code><a href="contour.html">contour</a></code>, <code><a href="persp.html">persp</a></code> and <code><a href="image.html">image</a></code>. </p> <h3>Examples</h3> <pre>require(stats) # for lowess, rpois, rnorm plot(cars) lines(lowess(cars)) plot(sin, -pi, 2*pi) # see ?plot.function ## Discrete Distribution Plot: plot(table(rpois(100, 5)), type = "h", col = "red", lwd = 10, main = "rpois(100, lambda = 5)") ## Simple quantiles/ECDF, see ecdf() {library(stats)} for a better one: plot(x <- sort(rnorm(47)), type = "s", main = "plot(x, type = \"s\")") points(x, cex = .5, col = "dark red") </pre> EOT; return $help; } function _quadstat_lm_help() { $help = <<<EOT <h2>Fitting Linear Models</h2> <h3>Description</h3> <p><code>lm</code> is used to fit linear models. It can be used to carry out regression, single stratum analysis of variance and analysis of covariance (although <code><a href="aov.html">aov</a></code> may provide a more convenient interface for these). </p> <h3>Usage</h3> <pre>lm(formula, data, subset, weights, na.action, method = "qr", model = TRUE, x = FALSE, y = FALSE, qr = TRUE, singular.ok = TRUE, contrasts = NULL, offset, ...) </pre> <h3>Arguments</h3> <table summary="R argblock"> <tbody><tr valign="top"><td><code>formula</code></td> <td> <p>an object of class <code>"<a href="formula.html">formula</a>"</code> (or one that can be coerced to that class): a symbolic description of the model to be fitted. The details of model specification are given under ‘Details’.</p> </td></tr> <tr valign="top"><td><code>data</code></td> <td> <p>an optional data frame, list or environment (or object coercible by <code><a href="../../base/html/as.data.frame.html">as.data.frame</a></code> to a data frame) containing the variables in the model. If not found in <code>data</code>, the variables are taken from <code>environment(formula)</code>, typically the environment from which <code>lm</code> is called.</p> </td></tr> <tr valign="top"><td><code>subset</code></td> <td> <p>an optional vector specifying a subset of observations to be used in the fitting process.</p> </td></tr> <tr valign="top"><td><code>weights</code></td> <td> <p>an optional vector of weights to be used in the fitting process. Should be <code>NULL</code> or a numeric vector. If non-NULL, weighted least squares is used with weights <code>weights</code> (that is, minimizing <code>sum(w*e^2)</code>); otherwise ordinary least squares is used. See also ‘Details’,</p> </td></tr> <tr valign="top"><td><code>na.action</code></td> <td> <p>a function which indicates what should happen when the data contain <code>NA</code>s. The default is set by the <code>na.action</code> setting of <code><a href="../../base/html/options.html">options</a></code>, and is <code><a href="na.fail.html">na.fail</a></code> if that is unset. The ‘factory-fresh’ default is <code><a href="na.fail.html">na.omit</a></code>. Another possible value is <code>NULL</code>, no action. Value <code><a href="na.fail.html">na.exclude</a></code> can be useful.</p> </td></tr> <tr valign="top"><td><code>method</code></td> <td> <p>the method to be used; for fitting, currently only <code>method = "qr"</code> is supported; <code>method = "model.frame"</code> returns the model frame (the same as with <code>model = TRUE</code>, see below).</p> </td></tr> <tr valign="top"><td><code>model, x, y, qr</code></td> <td> <p>logicals. If <code>TRUE</code> the corresponding components of the fit (the model frame, the model matrix, the response, the QR decomposition) are returned. </p> </td></tr> <tr valign="top"><td><code>singular.ok</code></td> <td> <p>logical. If <code>FALSE</code> (the default in S but not in <span style="font-family: Courier New, Courier; color: #666666;"><b>R</b></span>) a singular fit is an error.</p> </td></tr> <tr valign="top"><td><code>contrasts</code></td> <td> <p>an optional list. See the <code>contrasts.arg</code> of <code><a href="model.matrix.html">model.matrix.default</a></code>.</p> </td></tr> <tr valign="top"><td><code>offset</code></td> <td> <p>this can be used to specify an <em>a priori</em> known component to be included in the linear predictor during fitting. This should be <code>NULL</code> or a numeric vector of length equal to the number of cases. One or more <code><a href="offset.html">offset</a></code> terms can be included in the formula instead or as well, and if more than one are specified their sum is used. See <code><a href="model.extract.html">model.offset</a></code>.</p> </td></tr> <tr valign="top"><td><code>...</code></td> <td> <p>additional arguments to be passed to the low level regression fitting functions (see below).</p> </td></tr> </tbody></table> <h3>Details</h3> <p>Models for <code>lm</code> are specified symbolically. A typical model has the form <code>response ~ terms</code> where <code>response</code> is the (numeric) response vector and <code>terms</code> is a series of terms which specifies a linear predictor for <code>response</code>. A terms specification of the form <code>first + second</code> indicates all the terms in <code>first</code> together with all the terms in <code>second</code> with duplicates removed. A specification of the form <code>first:second</code> indicates the set of terms obtained by taking the interactions of all terms in <code>first</code> with all terms in <code>second</code>. The specification <code>first*second</code> indicates the <em>cross</em> of <code>first</code> and <code>second</code>. This is the same as <code>first + second + first:second</code>. </p> <p>If the formula includes an <code><a href="offset.html">offset</a></code>, this is evaluated and subtracted from the response. </p> <p>If <code>response</code> is a matrix a linear model is fitted separately by least-squares to each column of the matrix. </p> <p>See <code><a href="model.matrix.html">model.matrix</a></code> for some further details. The terms in the formula will be re-ordered so that main effects come first, followed by the interactions, all second-order, all third-order and so on: to avoid this pass a <code>terms</code> object as the formula (see <code><a href="aov.html">aov</a></code> and <code>demo(glm.vr)</code> for an example). </p> <p>A formula has an implied intercept term. To remove this use either <code>y ~ x - 1</code> or <code>y ~ 0 + x</code>. See <code><a href="formula.html">formula</a></code> for more details of allowed formulae. </p> <p>Non-<code>NULL</code> <code>weights</code> can be used to indicate that different observations have different variances (with the values in <code>weights</code> being inversely proportional to the variances); or equivalently, when the elements of <code>weights</code> are positive integers <i>w_i</i>, that each response <i>y_i</i> is the mean of <i>w_i</i> unit-weight observations (including the case that there are <i>w_i</i> observations equal to <i>y_i</i> and the data have been summarized). </p> <p><code>lm</code> calls the lower level functions <code><a href="lmfit.html">lm.fit</a></code>, etc, see below, for the actual numerical computations. For programming only, you may consider doing likewise. </p> <p>All of <code>weights</code>, <code>subset</code> and <code>offset</code> are evaluated in the same way as variables in <code>formula</code>, that is first in <code>data</code> and then in the environment of <code>formula</code>. </p> <h3>Value</h3> <p><code>lm</code> returns an object of <code><a href="../../base/html/class.html">class</a></code> <code>"lm"</code> or for multiple responses of class <code>c("mlm", "lm")</code>. </p> <p>The functions <code>summary</code> and <code><a href="anova.html">anova</a></code> are used to obtain and print a summary and analysis of variance table of the results. The generic accessor functions <code>coefficients</code>, <code>effects</code>, <code>fitted.values</code> and <code>residuals</code> extract various useful features of the value returned by <code>lm</code>. </p> <p>An object of class <code>"lm"</code> is a list containing at least the following components: </p> <table summary="R valueblock"> <tbody><tr valign="top"><td><code>coefficients</code></td> <td> <p>a named vector of coefficients</p> </td></tr> <tr valign="top"><td><code>residuals</code></td> <td> <p>the residuals, that is response minus fitted values.</p> </td></tr> <tr valign="top"><td><code>fitted.values</code></td> <td> <p>the fitted mean values.</p> </td></tr> <tr valign="top"><td><code>rank</code></td> <td> <p>the numeric rank of the fitted linear model.</p> </td></tr> <tr valign="top"><td><code>weights</code></td> <td> <p>(only for weighted fits) the specified weights.</p> </td></tr> <tr valign="top"><td><code>df.residual</code></td> <td> <p>the residual degrees of freedom.</p> </td></tr> <tr valign="top"><td><code>call</code></td> <td> <p>the matched call.</p> </td></tr> <tr valign="top"><td><code>terms</code></td> <td> <p>the <code><a href="terms.html">terms</a></code> object used.</p> </td></tr> <tr valign="top"><td><code>contrasts</code></td> <td> <p>(only where relevant) the contrasts used.</p> </td></tr> <tr valign="top"><td><code>xlevels</code></td> <td> <p>(only where relevant) a record of the levels of the factors used in fitting.</p> </td></tr> <tr valign="top"><td><code>offset</code></td> <td> <p>the offset used (missing if none were used).</p> </td></tr> <tr valign="top"><td><code>y</code></td> <td> <p>if requested, the response used.</p> </td></tr> <tr valign="top"><td><code>x</code></td> <td> <p>if requested, the model matrix used.</p> </td></tr> <tr valign="top"><td><code>model</code></td> <td> <p>if requested (the default), the model frame used.</p> </td></tr> <tr valign="top"><td><code>na.action</code></td> <td> <p>(where relevant) information returned by <code><a href="model.frame.html">model.frame</a></code> on the special handling of <code>NA</code>s.</p> </td></tr> </tbody></table> <p>In addition, non-null fits will have components <code>assign</code>, <code>effects</code> and (unless not requested) <code>qr</code> relating to the linear fit, for use by extractor functions such as <code>summary</code> and <code><a href="effects.html">effects</a></code>. </p> <h3>Using time series</h3> <p>Considerable care is needed when using <code>lm</code> with time series. </p> <p>Unless <code>na.action = NULL</code>, the time series attributes are stripped from the variables before the regression is done. (This is necessary as omitting <code>NA</code>s would invalidate the time series attributes, and if <code>NA</code>s are omitted in the middle of the series the result would no longer be a regular time series.) </p> <p>Even if the time series attributes are retained, they are not used to line up series, so that the time shift of a lagged or differenced regressor would be ignored. It is good practice to prepare a <code>data</code> argument by <code><a href="ts.union.html">ts.intersect</a>(..., dframe = TRUE)</code>, then apply a suitable <code>na.action</code> to that data frame and call <code>lm</code> with <code>na.action = NULL</code> so that residuals and fitted values are time series. </p> <h3>Note</h3> <p>Offsets specified by <code>offset</code> will not be included in predictions by <code><a href="predict.lm.html">predict.lm</a></code>, whereas those specified by an offset term in the formula will be. </p> <h3>Author(s)</h3> <p>The design was inspired by the S function of the same name described in Chambers (1992). The implementation of model formula by Ross Ihaka was based on Wilkinson & Rogers (1973). </p> <h3>References</h3> <p>Chambers, J. M. (1992) <em>Linear models.</em> Chapter 4 of <em>Statistical Models in S</em> eds J. M. Chambers and T. J. Hastie, Wadsworth & Brooks/Cole. </p> <p>Wilkinson, G. N. and Rogers, C. E. (1973) Symbolic descriptions of factorial models for analysis of variance. <em>Applied Statistics</em>, <b>22</b>, 392–9. </p> <h3>See Also</h3> <p><code><a href="summary.lm.html">summary.lm</a></code> for summaries and <code><a href="anova.lm.html">anova.lm</a></code> for the ANOVA table; <code><a href="aov.html">aov</a></code> for a different interface. </p> <p>The generic functions <code><a href="coef.html">coef</a></code>, <code><a href="effects.html">effects</a></code>, <code><a href="residuals.html">residuals</a></code>, <code><a href="fitted.values.html">fitted</a></code>, <code><a href="vcov.html">vcov</a></code>. </p> <p><code><a href="predict.lm.html">predict.lm</a></code> (via <code><a href="predict.html">predict</a></code>) for prediction, including confidence and prediction intervals; <code><a href="confint.html">confint</a></code> for confidence intervals of <em>parameters</em>. </p> <p><code><a href="lm.influence.html">lm.influence</a></code> for regression diagnostics, and <code><a href="glm.html">glm</a></code> for <b>generalized</b> linear models. </p> <p>The underlying low level functions, <code><a href="lmfit.html">lm.fit</a></code> for plain, and <code><a href="lmfit.html">lm.wfit</a></code> for weighted regression fitting. </p> <p>More <code>lm()</code> examples are available e.g., in <code><a href="../../datasets/html/anscombe.html">anscombe</a></code>, <code><a href="../../datasets/html/attitude.html">attitude</a></code>, <code><a href="../../datasets/html/freeny.html">freeny</a></code>, <code><a href="../../datasets/html/LifeCycleSavings.html">LifeCycleSavings</a></code>, <code><a href="../../datasets/html/longley.html">longley</a></code>, <code><a href="../../datasets/html/stackloss.html">stackloss</a></code>, <code><a href="../../datasets/html/swiss.html">swiss</a></code>. </p> <p><code>biglm</code> in package <a href="https://CRAN.R-project.org/package=biglm"><span class="pkg">biglm</span></a> for an alternative way to fit linear models to large datasets (especially those with many cases). </p> <h3>Examples</h3> <pre>require(graphics) ## Annette Dobson (1990) "An Introduction to Generalized Linear Models". ## Page 9: Plant Weight Data. ctl <- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14) trt <- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69) group <- gl(2, 10, 20, labels = c("Ctl","Trt")) weight <- c(ctl, trt) lm.D9 <- lm(weight ~ group) lm.D90 <- lm(weight ~ group - 1) # omitting intercept anova(lm.D9) summary(lm.D90) opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0)) plot(lm.D9, las = 1) # Residuals, Fitted, ... par(opar) ### less simple examples in "See Also" above </pre> EOT; return $help; } function _quadstat_stem_help() { $help = <<<EOT <h2>Stem-and-Leaf Plots</h2> <h3>Description</h3> <p><code>stem</code> produces a stem-and-leaf plot of the values in <code>x</code>. The parameter <code>scale</code> can be used to expand the scale of the plot. A value of <code>scale = 2</code> will cause the plot to be roughly twice as long as the default. </p> <h3>Usage</h3> <pre>stem(x, scale = 1, width = 80, atom = 1e-08) </pre> <h3>Arguments</h3> <table summary="R argblock"> <tbody><tr valign="top"><td><code>x</code></td> <td> <p>a numeric vector.</p> </td></tr> <tr valign="top"><td><code>scale</code></td> <td> <p>This controls the plot length.</p> </td></tr> <tr valign="top"><td><code>width</code></td> <td> <p>The desired width of plot.</p> </td></tr> <tr valign="top"><td><code>atom</code></td> <td> <p>a tolerance.</p> </td></tr> </tbody></table> <h3>Details</h3> <p>Infinite and missing values in <code>x</code> are discarded. </p> <h3>References</h3> <p>Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) <em>The New S Language</em>. Wadsworth & Brooks/Cole. </p> <h3>Examples</h3> <pre>stem(islands) stem(log10(islands)) </pre> <hr><div style="text-align: center;">[Package <em>graphics</em> version 3.4.0 <a href="00Index.html">Index</a>]</div> </body> EOT; return $help; } function _quadstat_cor_help() { $help = <<<EOT <h2>Correlation, Variance and Covariance (Matrices)</h2> <h3>Description</h3> <p><code>var</code>, <code>cov</code> and <code>cor</code> compute the variance of <code>x</code> and the covariance or correlation of <code>x</code> and <code>y</code> if these are vectors. If <code>x</code> and <code>y</code> are matrices then the covariances (or correlations) between the columns of <code>x</code> and the columns of <code>y</code> are computed. </p> <p><code>cov2cor</code> scales a covariance matrix into the corresponding correlation matrix <em>efficiently</em>. </p> <h3>Usage</h3> <pre>var(x, y = NULL, na.rm = FALSE, use) cov(x, y = NULL, use = "everything", method = c("pearson", "kendall", "spearman")) cor(x, y = NULL, use = "everything", method = c("pearson", "kendall", "spearman")) cov2cor(V) </pre> <h3>Arguments</h3> <table summary="R argblock"> <tbody><tr valign="top"><td><code>x</code></td> <td> <p>a numeric vector, matrix or data frame.</p> </td></tr> <tr valign="top"><td><code>y</code></td> <td> <p><code>NULL</code> (default) or a vector, matrix or data frame with compatible dimensions to <code>x</code>. The default is equivalent to <code>y = x</code> (but more efficient).</p> </td></tr> <tr valign="top"><td><code>na.rm</code></td> <td> <p>logical. Should missing values be removed?</p> </td></tr> <tr valign="top"><td><code>use</code></td> <td> <p>an optional character string giving a method for computing covariances in the presence of missing values. This must be (an abbreviation of) one of the strings <code>"everything"</code>, <code>"all.obs"</code>, <code>"complete.obs"</code>, <code>"na.or.complete"</code>, or <code>"pairwise.complete.obs"</code>.</p> </td></tr> <tr valign="top"><td><code>method</code></td> <td> <p>a character string indicating which correlation coefficient (or covariance) is to be computed. One of <code>"pearson"</code> (default), <code>"kendall"</code>, or <code>"spearman"</code>: can be abbreviated.</p> </td></tr> <tr valign="top"><td><code>V</code></td> <td> <p>symmetric numeric matrix, usually positive definite such as a covariance matrix.</p> </td></tr> </tbody></table> <h3>Details</h3> <p>For <code>cov</code> and <code>cor</code> one must <em>either</em> give a matrix or data frame for <code>x</code> <em>or</em> give both <code>x</code> and <code>y</code>. </p> <p>The inputs must be numeric (as determined by <code><a href="../../base/html/numeric.html">is.numeric</a></code>: logical values are also allowed for historical compatibility): the <code>"kendall"</code> and <code>"spearman"</code> methods make sense for ordered inputs but <code><a href="../../base/html/xtfrm.html">xtfrm</a></code> can be used to find a suitable prior transformation to numbers. </p> <p><code>var</code> is just another interface to <code>cov</code>, where <code>na.rm</code> is used to determine the default for <code>use</code> when that is unspecified. If <code>na.rm</code> is <code>TRUE</code> then the complete observations (rows) are used (<code>use = "na.or.complete"</code>) to compute the variance. Otherwise, by default <code>use = "everything"</code>. </p> <p>If <code>use</code> is <code>"everything"</code>, <code><a href="../../base/html/NA.html">NA</a></code>s will propagate conceptually, i.e., a resulting value will be <code>NA</code> whenever one of its contributing observations is <code>NA</code>.<br> If <code>use</code> is <code>"all.obs"</code>, then the presence of missing observations will produce an error. If <code>use</code> is <code>"complete.obs"</code> then missing values are handled by casewise deletion (and if there are no complete cases, that gives an error). <br> <code>"na.or.complete"</code> is the same unless there are no complete cases, that gives <code>NA</code>. Finally, if <code>use</code> has the value <code>"pairwise.complete.obs"</code> then the correlation or covariance between each pair of variables is computed using all complete pairs of observations on those variables. This can result in covariance or correlation matrices which are not positive semi-definite, as well as <code>NA</code> entries if there are no complete pairs for that pair of variables. For <code>cov</code> and <code>var</code>, <code>"pairwise.complete.obs"</code> only works with the <code>"pearson"</code> method. Note that (the equivalent of) <code>var(double(0), use = *)</code> gives <code>NA</code> for <code>use = "everything"</code> and <code>"na.or.complete"</code>, and gives an error in the other cases. </p> <p>The denominator <i>n - 1</i> is used which gives an unbiased estimator of the (co)variance for i.i.d. observations. These functions return <code><a href="../../base/html/NA.html">NA</a></code> when there is only one observation (whereas S-PLUS has been returning <code>NaN</code>), and fail if <code>x</code> has length zero. </p> <p>For <code>cor()</code>, if <code>method</code> is <code>"kendall"</code> or <code>"spearman"</code>, Kendall's <i>tau</i> or Spearman's <i>rho</i> statistic is used to estimate a rank-based measure of association. These are more robust and have been recommended if the data do not necessarily come from a bivariate normal distribution.<br> For <code>cov()</code>, a non-Pearson method is unusual but available for the sake of completeness. Note that <code>"spearman"</code> basically computes <code>cor(R(x), R(y))</code> (or <code>cov(., .)</code>) where <code>R(u) := rank(u, na.last = "keep")</code>. In the case of missing values, the ranks are calculated depending on the value of <code>use</code>, either based on complete observations, or based on pairwise completeness with reranking for each pair. </p> <p>When there are ties, Kendall's <i>tau_b</i> is computed, as proposed by Kendall (1945). </p> <p>Scaling a covariance matrix into a correlation one can be achieved in many ways, mathematically most appealing by multiplication with a diagonal matrix from left and right, or more efficiently by using <code><a href="../../base/html/sweep.html">sweep</a>(.., FUN = "/")</code> twice. The <code>cov2cor</code> function is even a bit more efficient, and provided mostly for didactical reasons. </p> <h3>Value</h3> <p>For <code>r <- cor(*, use = "all.obs")</code>, it is now guaranteed that <code>all(abs(r) <= 1)</code>. </p> <h3>Note</h3> <p>Some people have noted that the code for Kendall's tau is slow for very large datasets (many more than 1000 cases). It rarely makes sense to do such a computation, but see function <code><a href="../../pcaPP/html/cor.fk.html">cor.fk</a></code> in package <a href="https://CRAN.R-project.org/package=pcaPP"><span class="pkg">pcaPP</span></a>. </p> <h3>References</h3> <p>Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988) <em>The New S Language</em>. Wadsworth & Brooks/Cole. </p> <p>Kendall, M. G. (1938) A new measure of rank correlation, <em>Biometrika</em> <b>30</b>, 81–93. <a href="https://dx.doi.org/10.1093/biomet/30.1-2.81">https://dx.doi.org/10.1093/biomet/30.1-2.81</a> </p> <p>Kendall, M. G. (1945) The treatment of ties in rank problems. <em>Biometrika</em> <b>33</b> 239–251. <a href="https://dx.doi.org/10.1093/biomet/33.3.239">https://dx.doi.org/10.1093/biomet/33.3.239</a> </p> <h3>See Also</h3> <p><code><a href="cor.test.html">cor.test</a></code> for confidence intervals (and tests). </p> <p><code><a href="cov.wt.html">cov.wt</a></code> for <em>weighted</em> covariance computation. </p> <p><code><a href="sd.html">sd</a></code> for standard deviation (vectors). </p> <h3>Examples</h3> <pre>var(1:10) # 9.166667 var(1:5, 1:5) # 2.5 ## Two simple vectors cor(1:10, 2:11) # == 1 ## Correlation Matrix of Multivariate sample: (Cl <- cor(longley)) ## Graphical Correlation Matrix: symnum(Cl) # highly correlated ## Spearman's rho and Kendall's tau symnum(clS <- cor(longley, method = "spearman")) symnum(clK <- cor(longley, method = "kendall")) ## How much do they differ? i <- lower.tri(Cl) cor(cbind(P = Cl[i], S = clS[i], K = clK[i])) ## cov2cor() scales a covariance matrix by its diagonal ## to become the correlation matrix. cov2cor # see the function definition {and learn ..} stopifnot(all.equal(Cl, cov2cor(cov(longley))), all.equal(cor(longley, method = "kendall"), cov2cor(cov(longley, method = "kendall")))) ##--- Missing value treatment: C1 <- cov(swiss) range(eigen(C1, only.values = TRUE)values) # 6.19 1921 ## swM := "swiss" with 3 "missing"s : swM <- swiss colnames(swM) <- abbreviate(colnames(swiss), min=6) swM[1,2] <- swM[7,3] <- swM[25,5] <- NA # create 3 "missing" ## Consider all 5 "use" cases : (C. <- cov(swM)) # use="everything" quite a few NA's in cov.matrix try(cov(swM, use = "all")) # Error: missing obs... C2 <- cov(swM, use = "complete") stopifnot(identical(C2, cov(swM, use = "na.or.complete"))) range(eigen(C2, only.values = TRUE)values) # 6.46 1930 C3 <- cov(swM, use = "pairwise") range(eigen(C3, only.values = TRUE)values) # 6.19 1938 ## Kendall's tau doesn't change much: symnum(Rc <- cor(swM, method = "kendall", use = "complete")) symnum(Rp <- cor(swM, method = "kendall", use = "pairwise")) symnum(R. <- cor(swiss, method = "kendall")) ## "pairwise" is closer componentwise, summary(abs(c(1 - Rp/R.))) summary(abs(c(1 - Rc/R.))) ## but "complete" is closer in Eigen space: EV <- function(m) eigen(m, only.values=TRUE) values summary(abs(1 - EV(Rp)/EV(R.)) / abs(1 - EV(Rc)/EV(R.))) </pre> EOT; return $help; }