quadstat-8.x-1.x-dev/quadstat.install

quadstat.install
<?php

use \Drupal\node\Entity\Node;
use \Drupal\file\Entity\File;
use Drupal\comment\Plugin\Field\FieldType\CommentItemInterface;

/**
 * @file
 * Install, update and uninstall hooks for the Quadstat install profile.
 */

/**
 * Implements hook_install().
 *
 * Perform actions to set up the site for this profile.
 *
 * @see system_install()
 */
function quadstat_install() {
  // First, do everything that is done in the standard profile.
  include_once DRUPAL_ROOT . '/core/profiles/standard/standard.install';
  standard_install();

  // Add code here to make nodes, terms, etc.

  \Drupal::configFactory()->getEditable('system.theme')->set('default','seven')->save();

  // Create node object with attached file.
  $node = Node::create([
    'type'        => 'article', 
    'title'       => 'Thank you for installing Quadstat',
    'uid' => 1,
    'body' => [
      'value' => '<p>Remember to <a href="https://www.drupal.org/docs/8/modules/quadstat/configuring-quadstat">configure Quadstat</a> before using.</p><p>Please <a href="https://www.drupal.org/project/issues/quadstat">report</a> any issues.</p><p>Help support <a href="https://www.r-project.org/foundation/donations.html">R</a>.</p>',
      'format' => 'full_html',
    ],
    'comment' => [
      'status' => CommentItemInterface::CLOSED
    ]
  ]);
  $node->save();

  \Drupal::configFactory()->getEditable('system.site')->set('page.front', '/node/1')->save();

  for($i = 0; $i < 5; $i++) {
    _quadstat_default_content();
  }

  $query = \Drupal::database()->update('file_managed');
  $query->fields(['uid' => 1]);
  $query->execute();

  // Create node object with attached file.
  $node = Node::create([
    'type'        => 'application',
    'title'       => 'rnorm',
    'uid' => 1,
    'webform' => [
      'target_id' => 'rnorm',
      'status' => 'open'
    ],
    'field_application_help' => [
      'value' => _quadstat_rnorm_help(),
      'summary' => 'Generates random samples from a normal distribution',
      'format' => 'full_html'
    ]
  ]);
  $node->save();

  // Create node object with attached file.
  $node = Node::create([
    'type'        => 'application',
    'title'       => 'plot',
    'uid' => 1,
    'webform' => [
      'target_id' => 'plot',
      'status' => 'open'
    ],
    'field_application_help' => [
      'value' => _quadstat_plot_help(),
      'summary' => 'Plot two dataset vectors',
      'format' => 'full_html'
    ]
  ]);
  $node->save();

  // Create node object with attached file.
  $node = Node::create([
    'type'        => 'application',
    'title'       => 'stem',
    'uid' => 1,
    'webform' => [
      'target_id' => 'stem',
      'status' => 'open'
    ],
    'field_application_help' => [
      'value' => _quadstat_stem_help(),
      'summary' => 'Stem-and-leaf plot',
      'format' => 'full_html'
    ]
  ]);
  $node->save();

  // Create node object with attached file.
  $node = Node::create([
    'type'        => 'application',
    'title'       => 'cor',
    'uid' => 1,
    'webform' => [
      'target_id' => 'cor',
      'status' => 'open'
    ],
    'field_application_help' => [
      'value' => _quadstat_cor_help(),
      'summary' => 'Compute the correlation coefficient between two variables',
      'format' => 'full_html'
    ]
  ]);
  $node->save();

  // Create node object with attached file.
  $node = Node::create([
    'type'        => 'application',
    'title'       => 'lm',
    'uid' => 1,
    'webform' => [
      'target_id' => 'lm',
      'status' => 'open'
    ],
    'field_application_help' => [
      'value' => _quadstat_lm_help(),
      'summary' => 'Compute regression slope and y-intercept',
      'format' => 'full_html'
    ]
  ]);
  $node->save();
}

function _quadstat_default_content() {

  $user = \Drupal\user\Entity\User::load(1);
  $data = '';
  // Populate
  for($i = 0; $i < 10; $i++) {
    for($j = 0; $j < 10; $j++) {
      $data .= rand(0, 100) . ",";
    }
    // Row number and data row
    $data = rtrim($data, ',') . "\n";
  }
  // Add header column to beginning
  $header = '';
  for($i = 1; $i <= 10; $i++) {
    $header .= '"' . $i . '",';
  }
  $data = rtrim($header, ',') . "\n" . $data;

  // Save the newly created CSV dataset
  $dir = 'public://datasets/admin';
  if(file_prepare_directory($dir, FILE_CREATE_DIRECTORY)) {
    $datafile = file_save_data($data, $dir . '/' . rand(10000,99999) . '.csv');
  }

  // Create node object with attached file.
  $node = Node::create([
    'type'        => 'dataset',
    'title'       => 'Random Dataset',
    'field_dataset_file' => [
      'target_id' => $datafile->get('fid')->get(0)->value,
      'format' => 'full_html'
    ],
    'field_dataset_buttons' => '<div id="quadstat-slickgrid-buttons"><input type="button" value="Add Row" id="quadstat-slickgrid-add-row" class="button" /><input type="button" value="Add Column" id="quadstat-slickgrid-add-col" class="button" /></div>',
    'field_dataset_grid' => [
       'value' => '<div id="quadstat-slickgrid"></div>',
       'format' => 'full_html'
    ],
    'uid' => 1
  ]);
  $node->save();

}

function _quadstat_rnorm_help() {

$help = <<<EOT
<h2>The (non-central) Chi-Squared Distribution</h2>

<h3>Description</h3>

<p>Density, distribution function, quantile function and random
generation for the chi-squared (<i>chi^2</i>) distribution with
<code>df</code> degrees of freedom and optional non-centrality parameter
<code>ncp</code>.
</p>


<h3>Usage</h3>

<pre>dchisq(x, df, ncp = 0, log = FALSE)
pchisq(q, df, ncp = 0, lower.tail = TRUE, log.p = FALSE)
qchisq(p, df, ncp = 0, lower.tail = TRUE, log.p = FALSE)
rchisq(n, df, ncp = 0)
</pre>


<h3>Arguments</h3>

<table summary="R argblock">
<tbody><tr valign="top"><td><code>x, q</code></td>
<td>
<p>vector of quantiles.</p>
</td></tr>
<tr valign="top"><td><code>p</code></td>
<td>
<p>vector of probabilities.</p>
</td></tr>
<tr valign="top"><td><code>n</code></td>
<td>
<p>number of observations. If <code>length(n) &gt; 1</code>, the length
is taken to be the number required.</p>
</td></tr>
<tr valign="top"><td><code>df</code></td>
<td>
<p>degrees of freedom (non-negative, but can be non-integer).</p>
</td></tr>
<tr valign="top"><td><code>ncp</code></td>
<td>
<p>non-centrality parameter (non-negative).</p>
</td></tr>
<tr valign="top"><td><code>log, log.p</code></td>
<td>
<p>logical; if TRUE, probabilities p are given as log(p).</p>
</td></tr>
<tr valign="top"><td><code>lower.tail</code></td>
<td>
<p>logical; if TRUE (default), probabilities are
<i>P[X ≤ x]</i>, otherwise, <i>P[X &gt; x]</i>.</p>
</td></tr>
</tbody></table>


<h3>Details</h3>

<p>The chi-squared distribution with <code>df</code><i>= n ≥ 0</i>
degrees of freedom has density
</p>
<p style="text-align: center;"><i>f_n(x) = 1 / (2^(n/2) Γ(n/2))  x^(n/2-1) e^(-x/2)</i></p>

<p>for <i>x &gt; 0</i>.  The mean and variance are <i>n</i> and <i>2n</i>.
</p>
<p>The non-central chi-squared distribution with <code>df</code><i>= n</i>
degrees of freedom and non-centrality parameter <code>ncp</code>
<i>= λ</i> has density
</p>
<p style="text-align: center;"><i>f(x) = exp(-λ/2) SUM_{r=0}^∞ ((λ/2)^r / r!) dchisq(x, df + 2r)
  </i></p>

<p>for <i>x ≥ 0</i>.  For integer <i>n</i>, this is the distribution of
the sum of squares of <i>n</i> normals each with variance one,
<i>λ</i> being the sum of squares of the normal means; further,
<br>
<i>E(X) = n + λ</i>, <i>Var(X) = 2(n + 2*λ)</i>, and
<i>E((X - E(X))^3) = 8(n + 3*λ)</i>.
</p>
<p>Note that the degrees of freedom <code>df</code><i>= n</i>, can be
non-integer, and also <i>n = 0</i> which is relevant for
non-centrality <i>λ &gt; 0</i>,
see Johnson <em>et al</em> (1995, chapter 29).
In that (noncentral, zero df) case, the distribution is a mixture of a
point mass at <i>x = 0</i> (of size <code>pchisq(0, df=0, ncp=ncp)</code> and
a continuous part, and <code>dchisq()</code> is <em>not</em> a density with
respect to that mixture measure but rather the limit of the density
for <i>df -&gt; 0</i>.
</p>
<p>Note that <code>ncp</code> values larger than about 1e5 may give inaccurate
results with many warnings for <code>pchisq</code> and <code>qchisq</code>.
</p>


<h3>Value</h3>

<p><code>dchisq</code> gives the density, <code>pchisq</code> gives the distribution
function, <code>qchisq</code> gives the quantile function, and <code>rchisq</code>
generates random deviates.
</p>
<p>Invalid arguments will result in return value <code>NaN</code>, with a warning.
</p>
<p>The length of the result is determined by <code>n</code> for
<code>rchisq</code>, and is the maximum of the lengths of the
numerical arguments for the other functions.
</p>
<p>The numerical arguments other than <code>n</code> are recycled to the
length of the result.  Only the first elements of the logical
arguments are used.
</p>


<h3>Note</h3>

<p>Supplying <code>ncp = 0</code> uses the algorithm for the non-central
distribution, which is not the same algorithm used if <code>ncp</code> is
omitted.  This is to give consistent behaviour in extreme cases with
values of <code>ncp</code> very near zero.
</p>
<p>The code for non-zero <code>ncp</code> is principally intended to be used
for moderate values of <code>ncp</code>: it will not be highly accurate,
especially in the tails, for large values.
</p>


<h3>Source</h3>

<p>The central cases are computed via the gamma distribution.
</p>
<p>The non-central <code>dchisq</code> and <code>rchisq</code> are computed as a
Poisson mixture central of chi-squares (Johnson <em>et al</em>, 1995, p.436).
</p>
<p>The non-central <code>pchisq</code> is for <code>ncp &lt; 80</code> computed from
the Poisson mixture of central chi-squares and for larger <code>ncp</code>
<em>via</em> a C translation of
</p>
<p>Ding, C. G. (1992)
Algorithm AS275: Computing the non-central chi-squared
distribution function. <em>Appl.Statist.</em>, <b>41</b> 478–482.
</p>
<p>which computes the lower tail only (so the upper tail suffers from
cancellation and a warning will be given when this is likely to be
significant).
</p>
<p>The non-central <code>qchisq</code> is based on inversion of <code>pchisq</code>.
</p>


<h3>References</h3>

<p>Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
<em>The New S Language</em>.
Wadsworth &amp; Brooks/Cole.
</p>
<p>Johnson, N. L., Kotz, S. and Balakrishnan, N. (1995)
<em>Continuous Univariate Distributions</em>, chapters 18 (volume 1)
and 29 (volume 2). Wiley, New York.
</p>


<h3>See Also</h3>

<p><a href="Distributions.html">Distributions</a> for other standard distributions.
</p>
<p>A central chi-squared distribution with <i>n</i> degrees of freedom
is the same as a Gamma distribution with <code>shape</code> <i>a = n/2</i> and <code>scale</code> <i>s = 2</i>.  Hence, see
<code><a href="GammaDist.html">dgamma</a></code> for the Gamma distribution.
</p>


<h3>Examples</h3>

<pre>require(graphics)

dchisq(1, df = 1:3)
pchisq(1, df =  3)
pchisq(1, df =  3, ncp = 0:4)  # includes the above

x &lt;- 1:10
## Chi-squared(df = 2) is a special exponential distribution
all.equal(dchisq(x, df = 2), dexp(x, 1/2))
all.equal(pchisq(x, df = 2), pexp(x, 1/2))

## non-central RNG -- df = 0 with ncp &gt; 0:  Z0 has point mass at 0!
Z0 &lt;- rchisq(100, df = 0, ncp = 2.)
graphics::stem(Z0)

## visual testing
## do P-P plots for 1000 points at various degrees of freedom
L &lt;- 1.2; n &lt;- 1000; pp &lt;- ppoints(n)
op &lt;- par(mfrow = c(3,3), mar = c(3,3,1,1)+.1, mgp = c(1.5,.6,0),
          oma = c(0,0,3,0))
for(df in 2^(4*rnorm(9))) {
  plot(pp, sort(pchisq(rr &lt;- rchisq(n, df = df, ncp = L), df = df, ncp = L)),
       ylab = "pchisq(rchisq(.),.)", pch = ".")
  mtext(paste("df = ", formatC(df, digits = 4)), line =  -2, adj = 0.05)
  abline(0, 1, col = 2)
}
mtext(expression("P-P plots : Noncentral  "*
                 chi^2 *"(n=1000, df=X, ncp= 1.2)"),
      cex = 1.5, font = 2, outer = TRUE)
par(op)

## "analytical" test
lam &lt;- seq(0, 100, by = .25)
p00 &lt;- pchisq(0,      df = 0, ncp = lam)
p.0 &lt;- pchisq(1e-300, df = 0, ncp = lam)
stopifnot(all.equal(p00, exp(-lam/2)),
          all.equal(p.0, exp(-lam/2)))
</pre>
EOT;
return $help;
}

function _quadstat_plot_help() {
$help = <<<EOT
<h2>Generic X-Y Plotting</h2>

<h3>Description</h3>

<p>Generic function for plotting of <span style="font-family: Courier New, Courier; color: #666666;"><b>R</b></span> objects.  For more details about
the graphical parameter arguments, see <code><a href="par.html">par</a></code>.
</p>
<p>For simple scatter plots, <code><a href="plot.default.html">plot.default</a></code> will be used.
However, there are <code>plot</code> methods for many <span style="font-family: Courier New, Courier; color: #666666;"><b>R</b></span> objects,
including <code><a href="../../base/html/function.html">function</a></code>s, <code><a href="../../base/html/data.frame.html">data.frame</a></code>s,
<code><a href="../../stats/html/density.html">density</a></code> objects, etc.  Use <code>methods(plot)</code> and
the documentation for these.
</p>


<h3>Usage</h3>

<pre>plot(x, y, ...)
</pre>


<h3>Arguments</h3>

<table summary="R argblock">
<tbody><tr valign="top"><td><code>x</code></td>
<td>
<p>the coordinates of points in the plot. Alternatively, a
single plotting structure, function or <em>any <span style="font-family: Courier New, Courier; color: #666666;"><b>R</b></span> object with a
<code>plot</code> method</em> can be provided.</p>
</td></tr>
<tr valign="top"><td><code>y</code></td>
<td>
<p>the y coordinates of points in the plot, <em>optional</em>
if <code>x</code> is an appropriate structure.</p>
</td></tr>
<tr valign="top"><td><code>...</code></td>
<td>
<p>Arguments to be passed to methods, such as
<a href="par.html">graphical parameters</a> (see <code><a href="par.html">par</a></code>).
Many methods will accept the following arguments:
</p>

<dl>
<dt><code>type</code></dt><dd><p>what type of plot should be drawn.  Possible types are
</p>

<ul>
<li> <p><code>"p"</code> for <b>p</b>oints,
</p>
</li>
<li> <p><code>"l"</code> for <b>l</b>ines,
</p>
</li>
<li> <p><code>"b"</code> for <b>b</b>oth,
</p>
</li>
<li> <p><code>"c"</code> for the lines part alone of <code>"b"</code>,
</p>
</li>
<li> <p><code>"o"</code> for both ‘<b>o</b>verplotted’,
</p>
</li>
<li> <p><code>"h"</code> for ‘<b>h</b>istogram’ like (or
‘high-density’) vertical lines,
</p>
</li>
<li> <p><code>"s"</code> for stair <b>s</b>teps,
</p>
</li>
<li> <p><code>"S"</code> for other <b>s</b>teps, see ‘Details’ below,
</p>
</li>
<li> <p><code>"n"</code> for no plotting.
</p>
</li></ul>

<p>All other <code>type</code>s give a warning or an error; using, e.g.,
<code>type = "punkte"</code> being equivalent to <code>type = "p"</code> for S
compatibility.  Note that some methods,
e.g. <code><a href="plot.factor.html">plot.factor</a></code>, do not accept this.
</p>
</dd>
<dt><code>main</code></dt><dd><p>an overall title for the plot: see <code><a href="title.html">title</a></code>.</p>
</dd>
<dt><code>sub</code></dt><dd><p>a sub title for the plot: see <code><a href="title.html">title</a></code>.</p>
</dd>
<dt><code>xlab</code></dt><dd><p>a title for the x axis: see <code><a href="title.html">title</a></code>.</p>
</dd>
<dt><code>ylab</code></dt><dd><p>a title for the y axis: see <code><a href="title.html">title</a></code>.</p>
</dd>
<dt><code>asp</code></dt><dd><p>the <i>y/x</i> aspect ratio,
see <code><a href="plot.window.html">plot.window</a></code>.</p>
</dd>
</dl>

</td></tr>
</tbody></table>


<h3>Details</h3>

<p>The two step types differ in their x-y preference: Going from
<i>(x1,y1)</i> to <i>(x2,y2)</i> with <i>x1 &lt; x2</i>, <code>type = "s"</code>
moves first horizontal, then vertical, whereas <code>type = "S"</code> moves
the other way around.
</p>


<h3>See Also</h3>

<p><code><a href="plot.default.html">plot.default</a></code>, <code><a href="plot.formula.html">plot.formula</a></code> and other
methods; <code><a href="points.html">points</a></code>, <code><a href="lines.html">lines</a></code>, <code><a href="par.html">par</a></code>.
For thousands of points, consider using <code><a href="smoothScatter.html">smoothScatter</a>()</code>
instead of <code>plot()</code>.
</p>
<p>For X-Y-Z plotting see <code><a href="contour.html">contour</a></code>, <code><a href="persp.html">persp</a></code> and
<code><a href="image.html">image</a></code>.
</p>


<h3>Examples</h3>

<pre>require(stats) # for lowess, rpois, rnorm
plot(cars)
lines(lowess(cars))

plot(sin, -pi, 2*pi) # see ?plot.function

## Discrete Distribution Plot:
plot(table(rpois(100, 5)), type = "h", col = "red", lwd = 10,
     main = "rpois(100, lambda = 5)")

## Simple quantiles/ECDF, see ecdf() {library(stats)} for a better one:
plot(x &lt;- sort(rnorm(47)), type = "s", main = "plot(x, type = \"s\")")
points(x, cex = .5, col = "dark red")
</pre>
EOT;
return $help;
}

function _quadstat_lm_help() {
  $help = <<<EOT
<h2>Fitting Linear Models</h2>

<h3>Description</h3>

<p><code>lm</code> is used to fit linear models.
It can be used to carry out regression,
single stratum analysis of variance and
analysis of covariance (although <code><a href="aov.html">aov</a></code> may provide a more
convenient interface for these).
</p>


<h3>Usage</h3>

<pre>lm(formula, data, subset, weights, na.action,
   method = "qr", model = TRUE, x = FALSE, y = FALSE, qr = TRUE,
   singular.ok = TRUE, contrasts = NULL, offset, ...)
</pre>


<h3>Arguments</h3>

<table summary="R argblock">
<tbody><tr valign="top"><td><code>formula</code></td>
<td>
<p>an object of class <code>"<a href="formula.html">formula</a>"</code> (or one that
can be coerced to that class): a symbolic description of the
model to be fitted.  The details of model specification are given
under ‘Details’.</p>
</td></tr>
<tr valign="top"><td><code>data</code></td>
<td>
<p>an optional data frame, list or environment (or object
coercible by <code><a href="../../base/html/as.data.frame.html">as.data.frame</a></code> to a data frame) containing
the variables in the model.  If not found in <code>data</code>, the
variables are taken from <code>environment(formula)</code>,
typically the environment from which <code>lm</code> is called.</p>
</td></tr>
<tr valign="top"><td><code>subset</code></td>
<td>
<p>an optional vector specifying a subset of observations
to be used in the fitting process.</p>
</td></tr>
<tr valign="top"><td><code>weights</code></td>
<td>
<p>an optional vector of weights to be used in the fitting
process.  Should be <code>NULL</code> or a numeric vector.
If non-NULL, weighted least squares is used with weights
<code>weights</code> (that is, minimizing <code>sum(w*e^2)</code>); otherwise
ordinary least squares is used.  See also ‘Details’,</p>
</td></tr>
<tr valign="top"><td><code>na.action</code></td>
<td>
<p>a function which indicates what should happen
when the data contain <code>NA</code>s.  The default is set by
the <code>na.action</code> setting of <code><a href="../../base/html/options.html">options</a></code>, and is
<code><a href="na.fail.html">na.fail</a></code> if that is unset.  The ‘factory-fresh’
default is <code><a href="na.fail.html">na.omit</a></code>.  Another possible value is
<code>NULL</code>, no action.  Value <code><a href="na.fail.html">na.exclude</a></code> can be useful.</p>
</td></tr>
<tr valign="top"><td><code>method</code></td>
<td>
<p>the method to be used; for fitting, currently only
<code>method = "qr"</code> is supported; <code>method = "model.frame"</code> returns
the model frame (the same as with <code>model = TRUE</code>, see below).</p>
</td></tr>
<tr valign="top"><td><code>model, x, y, qr</code></td>
<td>
<p>logicals.  If <code>TRUE</code> the corresponding
components of the fit (the model frame, the model matrix, the
response, the QR decomposition) are returned.
</p>
</td></tr>
<tr valign="top"><td><code>singular.ok</code></td>
<td>
<p>logical. If <code>FALSE</code> (the default in S but
not in <span style="font-family: Courier New, Courier; color: #666666;"><b>R</b></span>) a singular fit is an error.</p>
</td></tr>
<tr valign="top"><td><code>contrasts</code></td>
<td>
<p>an optional list. See the <code>contrasts.arg</code>
of <code><a href="model.matrix.html">model.matrix.default</a></code>.</p>
</td></tr>
<tr valign="top"><td><code>offset</code></td>
<td>
<p>this can be used to specify an <em>a priori</em> known
component to be included in the linear predictor during fitting.
This should be <code>NULL</code> or a numeric vector of length equal to
the number of cases.  One or more <code><a href="offset.html">offset</a></code> terms can be
included in the formula instead or as well, and if more than one are
specified their sum is used.  See <code><a href="model.extract.html">model.offset</a></code>.</p>
</td></tr>
<tr valign="top"><td><code>...</code></td>
<td>
<p>additional arguments to be passed to the low level
regression fitting functions (see below).</p>
</td></tr>
</tbody></table>


<h3>Details</h3>

<p>Models for <code>lm</code> are specified symbolically.  A typical model has
the form <code>response ~ terms</code> where <code>response</code> is the (numeric)
response vector and <code>terms</code> is a series of terms which specifies a
linear predictor for <code>response</code>.  A terms specification of the form
<code>first + second</code> indicates all the terms in <code>first</code> together
with all the terms in <code>second</code> with duplicates removed.  A
specification of the form <code>first:second</code> indicates the set of
terms obtained by taking the interactions of all terms in <code>first</code>
with all terms in <code>second</code>.  The specification <code>first*second</code>
indicates the <em>cross</em> of <code>first</code> and <code>second</code>.  This is
the same as <code>first + second + first:second</code>.
</p>
<p>If the formula includes an <code><a href="offset.html">offset</a></code>, this is evaluated and
subtracted from the response.
</p>
<p>If <code>response</code> is a matrix a linear model is fitted separately by
least-squares to each column of the matrix.
</p>
<p>See <code><a href="model.matrix.html">model.matrix</a></code> for some further details.  The terms in
the formula will be re-ordered so that main effects come first,
followed by the interactions, all second-order, all third-order and so
on: to avoid this pass a <code>terms</code> object as the formula (see
<code><a href="aov.html">aov</a></code> and <code>demo(glm.vr)</code> for an example).
</p>
<p>A formula has an implied intercept term.  To remove this use either
<code>y ~ x - 1</code> or <code>y ~ 0 + x</code>.  See <code><a href="formula.html">formula</a></code> for
more details of allowed formulae.
</p>
<p>Non-<code>NULL</code> <code>weights</code> can be used to indicate that different
observations have different variances (with the values in
<code>weights</code> being inversely proportional to the variances); or
equivalently, when the elements of <code>weights</code> are positive
integers <i>w_i</i>, that each response <i>y_i</i> is the mean of
<i>w_i</i> unit-weight observations (including the case that there are
<i>w_i</i> observations equal to <i>y_i</i> and the data have been
summarized).
</p>
<p><code>lm</code> calls the lower level functions <code><a href="lmfit.html">lm.fit</a></code>, etc,
see below, for the actual numerical computations.  For programming
only, you may consider doing likewise.
</p>
<p>All of <code>weights</code>, <code>subset</code> and <code>offset</code> are evaluated
in the same way as variables in <code>formula</code>, that is first in
<code>data</code> and then in the environment of <code>formula</code>.
</p>


<h3>Value</h3>

<p><code>lm</code> returns an object of <code><a href="../../base/html/class.html">class</a></code> <code>"lm"</code> or for
multiple responses of class <code>c("mlm", "lm")</code>.
</p>
<p>The functions <code>summary</code> and <code><a href="anova.html">anova</a></code> are used to
obtain and print a summary and analysis of variance table of the
results.  The generic accessor functions <code>coefficients</code>,
<code>effects</code>, <code>fitted.values</code> and <code>residuals</code> extract
various useful features of the value returned by <code>lm</code>.
</p>
<p>An object of class <code>"lm"</code> is a list containing at least the
following components:
</p>
<table summary="R valueblock">
<tbody><tr valign="top"><td><code>coefficients</code></td>
<td>
<p>a named vector of coefficients</p>
</td></tr>
<tr valign="top"><td><code>residuals</code></td>
<td>
<p>the residuals, that is response minus fitted values.</p>
</td></tr>
<tr valign="top"><td><code>fitted.values</code></td>
<td>
<p>the fitted mean values.</p>
</td></tr>
<tr valign="top"><td><code>rank</code></td>
<td>
<p>the numeric rank of the fitted linear model.</p>
</td></tr>
<tr valign="top"><td><code>weights</code></td>
<td>
<p>(only for weighted fits) the specified weights.</p>
</td></tr>
<tr valign="top"><td><code>df.residual</code></td>
<td>
<p>the residual degrees of freedom.</p>
</td></tr>
<tr valign="top"><td><code>call</code></td>
<td>
<p>the matched call.</p>
</td></tr>
<tr valign="top"><td><code>terms</code></td>
<td>
<p>the <code><a href="terms.html">terms</a></code> object used.</p>
</td></tr>
<tr valign="top"><td><code>contrasts</code></td>
<td>
<p>(only where relevant) the contrasts used.</p>
</td></tr>
<tr valign="top"><td><code>xlevels</code></td>
<td>
<p>(only where relevant) a record of the levels of the
factors used in fitting.</p>
</td></tr>
<tr valign="top"><td><code>offset</code></td>
<td>
<p>the offset used (missing if none were used).</p>
</td></tr>
<tr valign="top"><td><code>y</code></td>
<td>
<p>if requested, the response used.</p>
</td></tr>
<tr valign="top"><td><code>x</code></td>
<td>
<p>if requested, the model matrix used.</p>
</td></tr>
<tr valign="top"><td><code>model</code></td>
<td>
<p>if requested (the default), the model frame used.</p>
</td></tr>
<tr valign="top"><td><code>na.action</code></td>
<td>
<p>(where relevant) information returned by
<code><a href="model.frame.html">model.frame</a></code> on the special handling of <code>NA</code>s.</p>
</td></tr>
</tbody></table>
<p>In addition, non-null fits will have components <code>assign</code>,
<code>effects</code> and (unless not requested) <code>qr</code> relating to the linear
fit, for use by extractor functions such as <code>summary</code> and
<code><a href="effects.html">effects</a></code>.
</p>


<h3>Using time series</h3>

<p>Considerable care is needed when using <code>lm</code> with time series.
</p>
<p>Unless <code>na.action = NULL</code>, the time series attributes are
stripped from the variables before the regression is done.  (This is
necessary as omitting <code>NA</code>s would invalidate the time series
attributes, and if <code>NA</code>s are omitted in the middle of the series
the result would no longer be a regular time series.)
</p>
<p>Even if the time series attributes are retained, they are not used to
line up series, so that the time shift of a lagged or differenced
regressor would be ignored.  It is good practice to prepare a
<code>data</code> argument by <code><a href="ts.union.html">ts.intersect</a>(..., dframe = TRUE)</code>,
then apply a suitable <code>na.action</code> to that data frame and call
<code>lm</code> with <code>na.action = NULL</code> so that residuals and fitted
values are time series.
</p>


<h3>Note</h3>

<p>Offsets specified by <code>offset</code> will not be included in predictions
by <code><a href="predict.lm.html">predict.lm</a></code>, whereas those specified by an offset term
in the formula will be.
</p>


<h3>Author(s)</h3>

<p>The design was inspired by the S function of the same name described
in Chambers (1992).  The implementation of model formula by Ross Ihaka
was based on Wilkinson &amp; Rogers (1973).
</p>


<h3>References</h3>

<p>Chambers, J. M. (1992)
<em>Linear models.</em>
Chapter 4 of <em>Statistical Models in S</em>
eds J. M. Chambers and T. J. Hastie, Wadsworth &amp; Brooks/Cole.
</p>
<p>Wilkinson, G. N. and Rogers, C. E. (1973)
Symbolic descriptions of factorial models for analysis of variance.
<em>Applied Statistics</em>, <b>22</b>, 392–9.
</p>


<h3>See Also</h3>

<p><code><a href="summary.lm.html">summary.lm</a></code> for summaries and <code><a href="anova.lm.html">anova.lm</a></code> for
the ANOVA table; <code><a href="aov.html">aov</a></code> for a different interface.
</p>
<p>The generic functions <code><a href="coef.html">coef</a></code>, <code><a href="effects.html">effects</a></code>,
<code><a href="residuals.html">residuals</a></code>, <code><a href="fitted.values.html">fitted</a></code>, <code><a href="vcov.html">vcov</a></code>.
</p>
<p><code><a href="predict.lm.html">predict.lm</a></code> (via <code><a href="predict.html">predict</a></code>) for prediction,
including confidence and prediction intervals;
<code><a href="confint.html">confint</a></code> for confidence intervals of <em>parameters</em>.
</p>
<p><code><a href="lm.influence.html">lm.influence</a></code> for regression diagnostics, and
<code><a href="glm.html">glm</a></code> for <b>generalized</b> linear models.
</p>
<p>The underlying low level functions,
<code><a href="lmfit.html">lm.fit</a></code> for plain, and <code><a href="lmfit.html">lm.wfit</a></code> for weighted
regression fitting.
</p>
<p>More <code>lm()</code> examples are available e.g., in
<code><a href="../../datasets/html/anscombe.html">anscombe</a></code>, <code><a href="../../datasets/html/attitude.html">attitude</a></code>, <code><a href="../../datasets/html/freeny.html">freeny</a></code>,
<code><a href="../../datasets/html/LifeCycleSavings.html">LifeCycleSavings</a></code>, <code><a href="../../datasets/html/longley.html">longley</a></code>,
<code><a href="../../datasets/html/stackloss.html">stackloss</a></code>, <code><a href="../../datasets/html/swiss.html">swiss</a></code>.
</p>
<p><code>biglm</code> in package <a href="https://CRAN.R-project.org/package=biglm"><span class="pkg">biglm</span></a> for an alternative
way to fit linear models to large datasets (especially those with many
cases).
</p>


<h3>Examples</h3>

<pre>require(graphics)

## Annette Dobson (1990) "An Introduction to Generalized Linear Models".
## Page 9: Plant Weight Data.
ctl &lt;- c(4.17,5.58,5.18,6.11,4.50,4.61,5.17,4.53,5.33,5.14)
trt &lt;- c(4.81,4.17,4.41,3.59,5.87,3.83,6.03,4.89,4.32,4.69)
group &lt;- gl(2, 10, 20, labels = c("Ctl","Trt"))
weight &lt;- c(ctl, trt)
lm.D9 &lt;- lm(weight ~ group)
lm.D90 &lt;- lm(weight ~ group - 1) # omitting intercept

anova(lm.D9)
summary(lm.D90)

opar &lt;- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(lm.D9, las = 1)      # Residuals, Fitted, ...
par(opar)

### less simple examples in "See Also" above
</pre>
EOT;
return $help;
}

function _quadstat_stem_help() {
  $help = <<<EOT
<h2>Stem-and-Leaf Plots</h2>

<h3>Description</h3>

<p><code>stem</code> produces a stem-and-leaf plot of the values in <code>x</code>.
The parameter <code>scale</code> can be used to expand the scale of the
plot.  A value of <code>scale = 2</code> will cause the plot to be roughly
twice as long as the default.
</p>


<h3>Usage</h3>

<pre>stem(x, scale = 1, width = 80, atom = 1e-08)
</pre>


<h3>Arguments</h3>

<table summary="R argblock">
<tbody><tr valign="top"><td><code>x</code></td>
<td>
<p>a numeric vector.</p>
</td></tr>
<tr valign="top"><td><code>scale</code></td>
<td>
<p>This controls the plot length.</p>
</td></tr>
<tr valign="top"><td><code>width</code></td>
<td>
<p>The desired width of plot.</p>
</td></tr>
<tr valign="top"><td><code>atom</code></td>
<td>
<p>a tolerance.</p>
</td></tr>
</tbody></table>


<h3>Details</h3>

<p>Infinite and missing values in <code>x</code> are discarded.
</p>


<h3>References</h3>

<p>Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
<em>The New S Language</em>.
Wadsworth &amp; Brooks/Cole.
</p>


<h3>Examples</h3>

<pre>stem(islands)
stem(log10(islands))
</pre>

<hr><div style="text-align: center;">[Package <em>graphics</em> version 3.4.0 <a href="00Index.html">Index</a>]</div>

</body>
EOT;
  return $help;
}

function _quadstat_cor_help() {
  $help = <<<EOT
<h2>Correlation, Variance and Covariance (Matrices)</h2>

<h3>Description</h3>

<p><code>var</code>, <code>cov</code> and <code>cor</code> compute the variance of <code>x</code>
and the covariance or correlation of <code>x</code> and <code>y</code> if these
are vectors.   If <code>x</code> and <code>y</code> are matrices then the
covariances (or correlations) between the columns of <code>x</code> and the
columns of <code>y</code> are computed.
</p>
<p><code>cov2cor</code> scales a covariance matrix into the corresponding
correlation matrix <em>efficiently</em>.
</p>

<h3>Usage</h3>

<pre>var(x, y = NULL, na.rm = FALSE, use)

cov(x, y = NULL, use = "everything",
    method = c("pearson", "kendall", "spearman"))

cor(x, y = NULL, use = "everything",
    method = c("pearson", "kendall", "spearman"))

cov2cor(V)
</pre>


<h3>Arguments</h3>

<table summary="R argblock">
<tbody><tr valign="top"><td><code>x</code></td>
<td>
<p>a numeric vector, matrix or data frame.</p>
</td></tr>
<tr valign="top"><td><code>y</code></td>
<td>
<p><code>NULL</code> (default) or a vector, matrix or data frame with
compatible dimensions to <code>x</code>.   The default is equivalent to
<code>y = x</code> (but more efficient).</p>
</td></tr>
<tr valign="top"><td><code>na.rm</code></td>
<td>
<p>logical. Should missing values be removed?</p>
</td></tr>
<tr valign="top"><td><code>use</code></td>
<td>
<p>an optional character string giving a
method for computing covariances in the presence
of missing values.  This must be (an abbreviation of) one of the strings
<code>"everything"</code>, <code>"all.obs"</code>, <code>"complete.obs"</code>,
<code>"na.or.complete"</code>, or <code>"pairwise.complete.obs"</code>.</p>
</td></tr>
<tr valign="top"><td><code>method</code></td>
<td>
<p>a character string indicating which correlation
coefficient (or covariance) is to be computed.  One of
<code>"pearson"</code> (default), <code>"kendall"</code>, or <code>"spearman"</code>:
can be abbreviated.</p>
</td></tr>
<tr valign="top"><td><code>V</code></td>
<td>
<p>symmetric numeric matrix, usually positive definite such as a
covariance matrix.</p>
</td></tr>
</tbody></table>


<h3>Details</h3>

<p>For <code>cov</code> and <code>cor</code> one must <em>either</em> give a matrix or
data frame for <code>x</code> <em>or</em> give both <code>x</code> and <code>y</code>.
</p>
<p>The inputs must be numeric (as determined by <code><a href="../../base/html/numeric.html">is.numeric</a></code>:
logical values are also allowed for historical compatibility): the
<code>"kendall"</code> and <code>"spearman"</code> methods make sense for ordered
inputs but <code><a href="../../base/html/xtfrm.html">xtfrm</a></code> can be used to find a suitable prior
transformation to numbers.
</p>
<p><code>var</code> is just another interface to <code>cov</code>, where
<code>na.rm</code> is used to determine the default for <code>use</code> when that
is unspecified.  If <code>na.rm</code> is <code>TRUE</code> then the complete
observations (rows) are used (<code>use = "na.or.complete"</code>) to
compute the variance.  Otherwise, by default <code>use = "everything"</code>.
</p>
<p>If <code>use</code> is <code>"everything"</code>, <code><a href="../../base/html/NA.html">NA</a></code>s will
propagate conceptually, i.e., a resulting value will be <code>NA</code>
whenever one of its contributing observations is <code>NA</code>.<br>
If <code>use</code> is <code>"all.obs"</code>, then the presence of missing
observations will produce an error.  If <code>use</code> is
<code>"complete.obs"</code> then missing values are handled by casewise
deletion (and if there are no complete cases, that gives an error).
<br>
<code>"na.or.complete"</code> is the same unless there are no complete
cases, that gives <code>NA</code>.
Finally, if <code>use</code> has the value <code>"pairwise.complete.obs"</code>
then the correlation or covariance between each pair of variables is
computed using all complete pairs of observations on those variables.
This can result in covariance or correlation matrices which are not positive
semi-definite, as well as <code>NA</code> entries if there are no complete
pairs for that pair of variables.   For <code>cov</code> and <code>var</code>,
<code>"pairwise.complete.obs"</code> only works with the <code>"pearson"</code>
method.
Note that (the equivalent of) <code>var(double(0), use = *)</code> gives
<code>NA</code> for <code>use = "everything"</code> and <code>"na.or.complete"</code>,
and gives an error in the other cases.
</p>
<p>The denominator <i>n - 1</i> is used which gives an unbiased estimator
of the (co)variance for i.i.d. observations.
These functions return <code><a href="../../base/html/NA.html">NA</a></code> when there is only one
observation (whereas S-PLUS has been returning <code>NaN</code>), and
fail if <code>x</code> has length zero.
</p>
<p>For <code>cor()</code>, if <code>method</code> is <code>"kendall"</code> or
<code>"spearman"</code>, Kendall's <i>tau</i> or Spearman's
<i>rho</i> statistic is used to estimate a rank-based measure of
association.  These are more robust and have been recommended if the
data do not necessarily come from a bivariate normal distribution.<br>
For <code>cov()</code>, a non-Pearson method is unusual but available for
the sake of completeness.  Note that <code>"spearman"</code> basically
computes <code>cor(R(x), R(y))</code> (or <code>cov(., .)</code>) where <code>R(u)
  := rank(u, na.last = "keep")</code>. In the case of missing values, the
ranks are calculated depending on the value of <code>use</code>, either
based on complete observations, or based on pairwise completeness with
reranking for each pair.
</p>
<p>When there are ties, Kendall's <i>tau_b</i> is computed, as
proposed by Kendall (1945).
</p>
<p>Scaling a covariance matrix into a correlation one can be achieved in
many ways, mathematically most appealing by multiplication with a
diagonal matrix from left and right, or more efficiently by using
<code><a href="../../base/html/sweep.html">sweep</a>(.., FUN = "/")</code> twice.  The <code>cov2cor</code> function
is even a bit more efficient, and provided mostly for didactical
reasons.
</p>


<h3>Value</h3>

<p>For <code>r &lt;- cor(*, use = "all.obs")</code>, it is now guaranteed that
<code>all(abs(r) &lt;= 1)</code>.
</p>


<h3>Note</h3>

<p>Some people have noted that the code for Kendall's tau is slow for
very large datasets (many more than 1000 cases).  It rarely makes
sense to do such a computation, but see function
<code><a href="../../pcaPP/html/cor.fk.html">cor.fk</a></code> in package <a href="https://CRAN.R-project.org/package=pcaPP"><span class="pkg">pcaPP</span></a>.
</p>


<h3>References</h3>

<p>Becker, R. A., Chambers, J. M. and Wilks, A. R. (1988)
<em>The New S Language</em>.
Wadsworth &amp; Brooks/Cole.
</p>
<p>Kendall, M. G. (1938)  A new measure of rank correlation,
<em>Biometrika</em> <b>30</b>, 81–93. <a href="https://dx.doi.org/10.1093/biomet/30.1-2.81">https://dx.doi.org/10.1093/biomet/30.1-2.81</a>
</p>
<p>Kendall, M. G. (1945)  The treatment of ties in rank problems.
<em>Biometrika</em> <b>33</b> 239–251. <a href="https://dx.doi.org/10.1093/biomet/33.3.239">https://dx.doi.org/10.1093/biomet/33.3.239</a>
</p>


<h3>See Also</h3>

<p><code><a href="cor.test.html">cor.test</a></code> for confidence intervals (and tests).
</p>
<p><code><a href="cov.wt.html">cov.wt</a></code> for <em>weighted</em> covariance computation.
</p>
<p><code><a href="sd.html">sd</a></code> for standard deviation (vectors).
</p>


<h3>Examples</h3>

<pre>var(1:10)  # 9.166667

var(1:5, 1:5) # 2.5

## Two simple vectors
cor(1:10, 2:11) # == 1

## Correlation Matrix of Multivariate sample:
(Cl &lt;- cor(longley))
## Graphical Correlation Matrix:
symnum(Cl) # highly correlated

## Spearman's rho  and  Kendall's tau
symnum(clS &lt;- cor(longley, method = "spearman"))
symnum(clK &lt;- cor(longley, method = "kendall"))
## How much do they differ?
i &lt;- lower.tri(Cl)
cor(cbind(P = Cl[i], S = clS[i], K = clK[i]))


## cov2cor() scales a covariance matrix by its diagonal
##           to become the correlation matrix.
cov2cor # see the function definition {and learn ..}
stopifnot(all.equal(Cl, cov2cor(cov(longley))),
          all.equal(cor(longley, method = "kendall"),
            cov2cor(cov(longley, method = "kendall"))))

##--- Missing value treatment:

C1 &lt;- cov(swiss)
range(eigen(C1, only.values = TRUE)values) # 6.19        1921

## swM := "swiss" with  3 "missing"s :
swM &lt;- swiss
colnames(swM) &lt;- abbreviate(colnames(swiss), min=6)
swM[1,2] &lt;- swM[7,3] &lt;- swM[25,5] &lt;- NA # create 3 "missing"

## Consider all 5 "use" cases :
(C. &lt;- cov(swM)) # use="everything"  quite a few NA's in cov.matrix
try(cov(swM, use = "all")) # Error: missing obs...
C2 &lt;- cov(swM, use = "complete")
stopifnot(identical(C2, cov(swM, use = "na.or.complete")))
range(eigen(C2, only.values = TRUE)values) # 6.46   1930
C3 &lt;- cov(swM, use = "pairwise")
range(eigen(C3, only.values = TRUE)values) # 6.19   1938

## Kendall's tau doesn't change much:
symnum(Rc &lt;- cor(swM, method = "kendall", use = "complete"))
symnum(Rp &lt;- cor(swM, method = "kendall", use = "pairwise"))
symnum(R. &lt;- cor(swiss, method = "kendall"))

## "pairwise" is closer componentwise,
summary(abs(c(1 - Rp/R.)))
summary(abs(c(1 - Rc/R.)))

## but "complete" is closer in Eigen space:
EV &lt;- function(m) eigen(m, only.values=TRUE) values
summary(abs(1 - EV(Rp)/EV(R.)) / abs(1 - EV(Rc)/EV(R.)))
</pre>
EOT;
  return $help;
}
Главная | Обратная связь
You are here

quadstat-8.x-1.x-dev/quadstat.install