aggregator2-4.6.x-1.x-dev/aggregator2.module
aggregator2.module
<?php
/**
* @file
* Used to aggregate syndicated content (RSS, RDF, Atom).
* Sponsored by Sandro Feuillet.
* Sponsored by John Bransford.
*/
/*
Copyright (C) 2005 by Marcin Konicki <ahwayakchih@gmail.com> and Sandro Feuillet <feuillet aat fastmail ddot fm>
Based on parts of Node Aggregator module by Bèr Kessels <ber aat webschuur ddot com>,
and Aggregator module by Drupal team - http://www.drupal.org
Also depends on other modules from Drupal basic distribution and, in some cases, contains parts of their code.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY.
See the LICENSE file for more details.
*/
/**
* Some definitions, to make code a bit more readable, and easier to maintain
*/
define("AGGREGATOR2_PERM_CREATE_FEED", "create feeds");
define("AGGREGATOR2_PERM_EDIT_OWN_FEED", "edit own feeds");
define("AGGREGATOR2_PERM_EDIT_OWN_ITEM", "edit own feed items");
define("AGGREGATOR2_PERM_REFRESH_OWN_FEED", "refresh own feed items");
define("AGGREGATOR2_PERM_EDIT_OWN_FEED_TAXONOMY", "edit own feed taxonomy");
define("AGGREGATOR2_PERM_EDIT_OWN_ITEM_TAXONOMY", "edit own item taxonomy");
define("AGGREGATOR2_PERM_ACCESS_VOCABS", "see all vocabularies on edit page");
define("AGGREGATOR2_PERM_ACCESS_FEED", "access feeds");
define("AGGREGATOR2_PERM_ACCESS_ITEM", "access feed items");
define("AGGREGATOR2_ITEM_DATE_SNIFFED", 0);
define("AGGREGATOR2_ITEM_DATE_CURRENT", 1);
define("AGGREGATOR2_SHOW_LINK_ALWAYS", 0);
define("AGGREGATOR2_SHOW_LINK_NEVER", 1);
define("AGGREGATOR2_SHOW_LINK_TEASER_ONLY", 2);
define("AGGREGATOR2_SHOW_LINK_PAGE_ONLY", 3);
// OR'ed deleting mode
define("AGGREGATOR2_ITEM_DELETE_ANY", 0);
define("AGGREGATOR2_ITEM_DELETE_UNPUBLISHED", 1);
static $AGGREGATOR2_REFRESH_FEED_RUNNING = FALSE;
/**
* Implementation of hook_help().
*/
function aggregator2_help($section) {
switch ($section) {
case 'admin/help#aggregator2':
return t('
<h3>Background</h3>
Thousands of sites (particularly news sites and weblogs) publish their latest headlines and/or stories in a machine-readable format so that other sites can easily link to them. This content is usually in the form of an <a href="http://blogs.law.harvard.edu/tech/rss">RSS</a> feed (which is an XML-based syndication standard). Aggregator2 module can download such feeds, and add news from them to Your site.<br />
<h3>Setting up Aggregator2</h3>
<p><b>1.</b> First You need to setup permissions for aggregator2 module on %admin->%access page:
<ul>
<li><b>Access news</b> - User can view news.</li>
<li><b>Administer News Feed</b> - User can contribute/administer Aggregator2 News feed.</li>
<li><b>Administer News Items</b> - User can contribute/adminster News Items.</li>
</ul>
</p>
<p><b>2.</b> Next, if You want to associate categories with aggregator2 content, go to %admin->%categories page and edit one of already exiting vocabularies or create new one, and make sure under "types" that aggregator2 news feed and aggregator2 news feed item are checked.</p>
<h3>Creating Content</h3>
<p>After setting up Aggregator2 module, You can start generating content with it. For that You have to create %feed.
<ul>
<li><b>Title</b> - Title of the News Feed.</li>
<li><b>Categories</b> - Select categories that you want the feed itself to be categorized in.</li>
<li><b>URL</b> - url of RSS feed.</li>
<li><b>Update interval</b> - Amount of time before news feed is updated.</li>
<li><b>Discard Feed Items older than</b> - Whether or not you want the feed items to be discarded after a certain time interval.</li>
<li><b>Item Categories</b> - Category or Categories you want the aggregated feed items to be associated with.</li>
</ul>
For the feed to update automatically you must run %cron on a regular basis.
</p>
<h3>Administrating Content</h3>
<p>You can administer aggregator2 feeds and aggregator2 items as You administer any other drupal content. Just go to %admin->%content and use "edit" link at node You want to edit. Or You can use "edit" tab, when viewing specific node.</p>
', array('%admin' => l(t('Administer'), 'admin'), '%access' => l(t('access control'), 'admin/access'), '%categories' => l(t('categories'), 'admin/taxonomy'), '%feed' => l(t('aggregator2 news feed'), 'node/add/aggregator2-feed'), '%cron' => l('cron.php', 'admin/help/system#cron'), '%content' => l(t('content'), 'admin/node')));
case 'admin/modules#description':
return t('Aggregates syndicated content (RSS and ATOM formats) as regular Drupal content.');
case 'admin/aggregator2':
return '<p>' . t('Thousands of sites (particularly news sites and weblogs) publish their latest headlines and/or stories in a machine-readable format so that other sites can easily link to them. This content is usually in the form of an %rssurl feed (which is an XML-based syndication standard).', array('%rssurl' => '<a href="http://blogs.law.harvard.edu/tech/rss">RSS</a>') ) . '</p><p>'. l(t('create news feed'), 'node/add/aggregator2-feed') .'</p>';
case 'node/add#aggregator2-feed':
return t('A news feed is a source of news from other site(s). If you add one from this page aggregator2 module will automatically add news feed items (nodes) in configured intervals. You will also be able to edit those items later. The URL is the full path to the RSS feed file. For the feed to update automatically you must run "cron.php" on a regular basis. If you already have a feed with the URL you are planning to use, the system will not accept another feed with the same URL.');
case 'node/add#aggregator2-item':
return t('A news feed item is an item that is part of a feed. They are added automatically when feed is updated.');
}
}
/**
* Implementation of hook_perm().
*/
function aggregator2_perm() {
return array(AGGREGATOR2_PERM_CREATE_FEED, AGGREGATOR2_PERM_EDIT_OWN_FEED, AGGREGATOR2_PERM_EDIT_OWN_ITEM, AGGREGATOR2_PERM_REFRESH_OWN_FEED, AGGREGATOR2_PERM_ACCESS_FEED, AGGREGATOR2_PERM_ACCESS_ITEM, AGGREGATOR2_PERM_EDIT_OWN_FEED_TAXONOMY, AGGREGATOR2_PERM_EDIT_OWN_ITEM_TAXONOMY, AGGREGATOR2_PERM_ACCESS_VOCABS);
}
/**
* Implementation of hook_node_name().
*/
function aggregator2_node_name($node) {
switch (is_string($node) ? $node : $node->type) {
case 'aggregator2-feed':
return t('aggregator2 news feed');
case 'aggregator2-item':
return t('aggregator2 news feed item');
}
}
/**
* Implementation of hook_access().
*/
function aggregator2_access($op, $node) {
global $AGGREGATOR2_REFRESH_FEED_RUNNING;
global $user;
if ($op == 'create') {
if ($node == 'aggregator2-item' || $node->type == 'aggregator2-item') {
if ($AGGREGATOR2_REFRESH_FEED_RUNNING) {
return TRUE;
}
}
else if ($node == 'aggregator2-feed' || $node->type == 'aggregator2-feed') {
return user_access(AGGREGATOR2_PERM_CREATE_FEED);
}
}
if ($op == 'update' || $op == 'delete') {
if ($node->type == 'aggregator2-feed') {
if ($AGGREGATOR2_REFRESH_FEED_RUNNING || (user_access(AGGREGATOR2_PERM_EDIT_OWN_FEED) && ($user->uid == $node->uid))) {
return TRUE;
}
}
else if ($AGGREGATOR2_REFRESH_FEED_RUNNING || (user_access(AGGREGATOR2_PERM_EDIT_OWN_ITEM) && ($user->uid == $node->uid))) {
return TRUE;
}
}
if ($op == 'view') {
if ($node->type == 'aggregator2-feed')
return user_access(AGGREGATOR2_PERM_ACCESS_FEED);
else
return user_access(AGGREGATOR2_PERM_ACCESS_ITEM);
}
}
/**
* Implementation of hook_node_types().
*/
function aggregator2_node_types() {
return array('aggregator2-item', 'aggregator2-feed');
}
/**
* Implementation of hook_link().
*/
function aggregator2_link($type, $node = NULL, $teaser = FALSE) {
$links = array();
if ($type == 'node' && $node != NULL) {
if (($node->item_show_link == AGGREGATOR2_SHOW_LINK_ALWAYS) ||
($teaser && $node->item_show_link == AGGREGATOR2_SHOW_LINK_TEASER_ONLY) ||
(!$teaser && $node->item_show_link == AGGREGATOR2_SHOW_LINK_PAGE_ONLY)) {
if ($node->type == 'aggregator2_item') {
$links[] = theme('aggregator2_link_full_article', $node);
}
else if ($node->type == 'aggregator2_feed') {
$links[] = theme('aggregator2_link_visit_site', $node);
}
}
global $user;
if ($node->type == 'aggregator2-feed') {
if ((user_access(AGGREGATOR2_PERM_REFRESH_OWN_FEED) && ($user->uid == $node->uid)) || user_access('administer nodes')) {
$links[] = l(t('refresh items'), "admin/aggregator2/refresh/{$node->nid}");
}
if ((user_access(AGGREGATOR2_PERM_EDIT_OWN_ITEM) && ($user->uid == $node->uid)) || user_access('administer nodes')) {
$links[] = l(t('remove items'), "admin/aggregator2/remove/{$node->nid}");
}
}
if ($node->type == 'aggregator2-item' && variable_get('aggregator2_show_feed_link', 0)) {
$links[] = l(t('source'), "aggregator2/sources/{$node->fid}");
}
if ($node->type == 'aggregator2-feed' && variable_get('aggregator2_show_item_link', 0)) {
$links[] = l(t('view items'), "aggregator2/sources/{$node->nid}");
}
}
return $links;
}
/**
* Implementation of hook_validate().
*/
function aggregator2_validate(&$node) {
if ($node->type == 'aggregator2-feed') {
if (isset($node->url)) {
if (trim($node->url) == '') {
form_set_error('url', t('URL field may not be empty, without it aggregator2 will not know from where to aggregate items.'));
}
$result = db_query("SELECT af.nid, n.title FROM {node} n, {aggregator2_feed} af WHERE af.url = '%s' AND af.nid = n.nid", $node->url);
while ($feed = db_fetch_object($result)) {
if ($feed->nid != $node->nid) {
$link = l($feed->title, "node/{$feed->nid}");
form_set_error('url', t('Duplicated URL: %link already uses that URL.', array('%link' => $link)));
break;
}
}
if (!aggregator2_is_valid_url($node->url)) {
form_set_error('url', t('That URL is not allowed.'));
}
}
// Remove "empty" terms
$temp = array();
if (is_array($node->feed_item_taxonomy)) {
foreach ($node->feed_item_taxonomy as $tid) {
if ($tid) {
$temp[] = $tid;
}
}
}
if (count($temp) > 0) {
$node->feed_item_taxonomy = $temp;
}
// Overwrite only if it's not saved from cron run, so it not gets freezed after each update :)
// TODO: if there will be some other module saving nodes, it will trigger overwriting values. Find way to workaround that?
// TODO: it uses url field as a way to check if it was edited already or not. I didn't want to add special idden field for that, but maybe that would be a better way?
global $AGGREGATOR2_REFRESH_FEED_RUNNING;
if (!$AGGREGATOR2_REFRESH_FEED_RUNNING && (!user_access('administer nodes') || (!$node->nid && !isset($node->url)))) {
$options = variable_get('aggregator2_feed_defs', array('refresh' => 3600, 'freezed' => 0, 'enable_block' => 0, 'update_items' => 1, 'item_status' => 1));
$node->refresh = $options['refresh'];
$node->update_items = $options['update_items'];
$node->item_status = $options['item_status'];
$node->item_delete_mode = $options['item_delete_mode'];
$node->clear_items = $options['clear_items'];
$node->promoted_items = $options['promoted_items'];
$node->freezed = $options['freezed'];
$node->enable_block = $options['enable_block'];
$node->change_existing_items = $options['change_existing_items'];
$node->guid_items = $options['guid_items'];
$node->item_date_source = $options['item_date_source'];
$node->item_show_link = $options['item_show_link'];
}
}
else if ($node->type == 'aggregator2-item') {
if ($node->link && !aggregator2_is_valid_url($node->link)) {
form_set_error('url', t('That URL is not allowed.'));
}
}
}
/**
* Implementation of hook_insert().
*/
function aggregator2_insert($node) {
if ($node->type == 'aggregator2-feed') {
if (is_array($node->item_delete_mode_bits)) {
$node->item_delete_mode = 0;
foreach ($node->item_delete_mode_bits as $bit) {
$node->item_delete_mode |= $bit;
}
}
db_query("INSERT INTO {aggregator2_feed} (nid, author, url, image, freezed, enable_block, refresh, clear_items, update_items, guid_items, promoted_items, item_status, item_taxonomy, item_date_source, item_show_link, item_delete_mode) VALUES (%d, '%s', '%s', '%s', %d, %d, %d, %d, %d, %d, %d, %d, '%s', %d, %d, %d)", $node->nid, $node->author, $node->url, $node->image, $node->freezed, $node->enable_block, $node->refresh, $node->clear_items, $node->update_items, $node->guid_items, $node->promoted_items, $node->item_status, serialize($node->feed_item_taxonomy), $node->item_date_source, $node->item_show_link, $node->item_delete_mode);
cache_clear_all('aggregator2:block:sources');
}
else if ($node->type == 'aggregator2-item') {
db_query("INSERT INTO {aggregator2_item} (nid, fid, author, link, guid, source_link, source_xml, source_title) VALUES (%d, %d, '%s', '%s', '%s', '%s', '%s', '%s')", $node->nid, $node->fid, $node->author, $node->link, $node->guid, $node->source_link, $node->source_xml, $node->source_title);
}
}
/**
* Implementation of hook_update().
*/
function aggregator2_update($node) {
if ($node->type == 'aggregator2-feed') {
if (is_array($node->item_delete_mode_bits)) {
$node->item_delete_mode = 0;
foreach ($node->item_delete_mode_bits as $bit) {
$node->item_delete_mode |= $bit;
}
}
db_query("UPDATE {aggregator2_feed} SET author = '%s', url = '%s', freezed = %d, enable_block = %d, refresh = %d, clear_items = %d, update_items = %d, guid_items = %d, promoted_items = %d, checked = %d, link = '%s', image = '%s', etag = '%s', modified = %d, item_status = %d, item_taxonomy = '%s', item_date_source = %d, item_show_link = %d, item_delete_mode = %d WHERE nid = %d", $node->author, $node->url, $node->freezed, $node->enable_block, $node->refresh, $node->clear_items, $node->update_items, $node->guid_items, $node->promoted_items, $node->checked, $node->link, $node->image, $node->etag, $node->modified, $node->item_status, serialize($node->feed_item_taxonomy), $node->item_date_source, $node->item_show_link, $node->item_delete_mode, $node->nid);
// update taxonomy for already existing nodes, it may take a while...
// TODO: find a way to split work and run it at cron run?
// maybe store serialized array as drupal variable (for example: aggregator2_update_items_[FEED->NID])
// and at cron run, at feed update time, load it and update X items form it, then save what's left for next cron run?
if (function_exists('taxonomy_node_save') && $node->change_existing_items == 1) {
$result = db_query("SELECT ai.nid FROM {aggregator2_item} ai WHERE ai.fid = '%d'", $node->nid);
$items = array();
while ($temp = db_fetch_object($result)) {
$items[] = $temp->nid;
// TODO: this removes previous categories, including those from autotaxonomy :( Find a way to remove only those we don't want?
// maybe setting by which vocabularies should change and which not? then load taxonomy, and remove only those terms which are from vocabularies allowed to change?
taxonomy_node_save($temp->nid, $node->feed_item_taxonomy);
}
// Update filter format of items
if (count($items) > 0) {
db_query('UPDATE {node} SET format = %d WHERE nid IN(%s)', $node->format, implode(',', $items));
drupal_set_message(t('Updated existing items'));
}
}
// Clear cache
cache_clear_all('aggregator2:block:sources');
cache_clear_all('aggregator2:block:'.$node->nid);
}
else if ($node->type == 'aggregator2-item') {
db_query("UPDATE {aggregator2_item} SET link = '%s', author = '%s', fid = %d WHERE nid = %d", $node->link, $node->author, $node->fid, $node->nid);
}
}
/**
* Implementation of hook_delete().
*/
function aggregator2_delete(&$node) {
if ($node->type == 'aggregator2-feed') {
db_query('DELETE FROM {aggregator2_feed} WHERE nid = %d', $node->nid);
// Clear cache
cache_clear_all('aggregator2:block:'.$node->nid);
cache_clear_all('aggregator2:block:sources');
}
if ($node->type == 'aggregator2-item') {
db_query('DELETE FROM {aggregator2_item} WHERE nid = %d', $node->nid);
}
}
/**
* Implementation of hook_load().
*/
function aggregator2_load($node) {
if ($node->type == 'aggregator2-feed') {
$temp = db_fetch_object(db_query('SELECT * FROM {aggregator2_feed} WHERE nid = %d', $node->nid));
$temp->feed_item_taxonomy = unserialize($temp->item_taxonomy);
unset($temp->item_taxonomy);
return $temp;
}
if ($node->type == 'aggregator2-item') {
$temp = db_fetch_object(db_query('SELECT ai.fid, ai.link, ai.source_link, ai.source_xml, ai.source_title, ai.author AS author, n.title AS feed_title, af.url AS feed_url, af.link AS feed_link, af.item_show_link AS item_show_link FROM {aggregator2_item} ai LEFT JOIN {aggregator2_feed} af ON af.nid = ai.fid LEFT JOIN {node} n ON n.nid = ai.fid WHERE ai.nid = %d', $node->nid));
return $temp;
}
}
/**
* Implementation of hook_menu().
*/
function aggregator2_menu($may_cache) {
$items = array();
if ($may_cache) {
$items[] = array('path' => 'node/add/aggregator2-feed', 'title' => t('aggregator2 news feed'),
'access' => user_access(AGGREGATOR2_PERM_CREATE_FEED));
$items[] = array('path' => 'aggregator2/sources', 'title' => t('aggregator2'),
'callback' => 'aggregator2_page_default', 'access' => user_access(AGGREGATOR2_PERM_ACCESS_FEED),
'type' => MENU_CALLBACK);
$items[] = array('path' => 'admin/aggregator2', 'title' => t('aggregator2'),
'callback' => 'aggregator2_admin_overview', 'access' => user_access(AGGREGATOR2_PERM_CREATE_FEED));
// TODO: find a nice way to allow refresh only to feed owner
$items[] = array('path' => 'admin/aggregator2/refresh', 'title' => t('aggregator2'),
'callback' => 'aggregator2_admin_refresh_feed', 'access' => user_access(AGGREGATOR2_PERM_REFRESH_OWN_FEED),
'type' => MENU_CALLBACK);
// TODO: find a nice way to allow remove only to items owner?
$items[] = array('path' => 'admin/aggregator2/remove', 'title' => t('remove items'),
'callback' => 'aggregator2_admin_remove_feed_items', 'access' => user_access(AGGREGATOR2_PERM_EDIT_OWN_ITEM),
'type' => MENU_CALLBACK);
$items[] = array('path' => 'aggregator2/opml', 'title' => t('opml'),
'callback' => 'aggregator2_page_opml', 'access' => user_access(AGGREGATOR2_PERM_ACCESS_FEED),
'type' => MENU_CALLBACK);
}
return $items;
}
/**
* Implementation of hook_view().
*/
function aggregator2_view(&$node, $teaser = FALSE, $page = FALSE) {
// Provide some statistics for feed nodes
if ($node->type == 'aggregator2-feed') {
$rows = array();
$items_count = db_result(db_query("SELECT COUNT(ai.nid) FROM {aggregator2_item} ai WHERE ai.fid = '%d'", $node->nid));
$rows[] = array(t('Feed hosted at'), $node->url);
$rows[] = array(t('Feed currently contains'), format_plural($items_count, '1 item', '%count items'));
$rows[] = array(t('Last checked feed host'), ($node->checked ? t('%time ago', array('%time' => format_interval(time() - $node->checked))) : t('never')) );
$rows[] = array(t('Time until next refresh'), ($node->checked ? t('%time left', array('%time' => format_interval($node->checked + $node->refresh - time()))) : t('never')) );
$output .= theme('table', NULL, $rows);
$node->body .= $output;
}
$node = node_prepare($node, $teaser);
}
/**
* Implementation of hook_form().
*/
function aggregator2_form(&$node) {
$type = ($node->type ? $node->type : arg(2));
if ($type == 'aggregator2-feed') {
$output .= form_textfield(t('URL'), 'url', $node->url, 60, 250, NULL, NULL, TRUE);
}
if (user_access('administer nodes') && $type == 'aggregator2-feed') {
$period = drupal_map_assoc(array(900, 1800, 3600, 7200, 10800, 21600, 32400, 43200, 64800, 86400, 172800, 259200, 604800, 1209600, 2419200), 'format_interval');
$output .= form_select(t('Update interval'), 'refresh', $node->refresh, $period, t('The refresh interval indicating how often you want to update this feed. Requires crontab.'));
$output .= form_textfield(t('Original author'), 'author', $node->author, 60, 60, NULL, NULL, NULL);
$output .= form_checkbox(t('Freeze'), 'freezed', 1, $node->freezed, t('If set, aggragator2 will not create new items, or update old ones, for this feed.'));
$output .= form_checkbox(t('Enable Block'), 'enable_block', 1, $node->enable_block, t('If set, aggragator2 will generate a Drupal block for this feed.'));
if ($node->nid) {
// "change_existing_items" is not saved anywhere, it's temporary for the time of editing, and saving time
$output .= form_checkbox(t('Apply changes to already existing items'), 'change_existing_items', 1, $node->change_existing_items, t('If set, changes to input format and item categories will be applied also to already existing items.'));
}
}
else {
$output .= form_hidden('refresh', $node->refresh);
$output .= form_hidden('author', $node->author);
$output .= form_hidden('freezed', $node->freezed);
$output .= form_hidden('enable_block', $node->enable_block);
}
if (user_access(AGGREGATOR2_PERM_EDIT_OWN_FEED_TAXONOMY) && function_exists('taxonomy_node_form')) {
// use hack (same as below for item categories) so we can hide specific vocabularies
//$temp = module_invoke('taxonomy', 'node_form', $type, $node);
/*
** Taxonomy module doesn't add taxonomy terms at load time... so we have to do it by hand :((
*/
$terms = module_invoke('taxonomy', 'node_get_terms', $node->nid, 'tid');
$node->taxonomy = array();
foreach ($terms as $tid => $term) {
if ($term->tid) {
$node->taxonomy[] = $term->tid;
}
}
$block = '';
// hide vocabularies
if (!user_access(AGGREGATOR2_PERM_ACCESS_VOCABS)) {
$hidden = variable_get('aggregator2_hidden_vocabs', array());
if (is_array($hidden) && count($hidden) > 0) {
$block = ' AND v.vid NOT IN ('. implode(',', $hidden) .') ';
}
if (is_array($node->taxonomy) && count($node->taxonomy) > 0) {
$c = db_query('SELECT tid FROM {term_data} WHERE vid IN ('. implode(',', $hidden) .') AND tid IN('. implode(',', $node->taxonomy) .')');
while ($tid = db_fetch_object($c)) {
$output .= form_hidden('taxonomy][', $tid->tid);
}
}
}
$temp = NULL;
$c = db_query("SELECT v.*, n.type FROM {vocabulary} v INNER JOIN {vocabulary_node_types} n ON v.vid = n.vid WHERE n.type = '%s' $block ORDER BY v.weight, v.name", 'aggregator2-feed');
while ($vocabulary = db_fetch_object($c)) {
$temp[] = taxonomy_form($vocabulary->vid, $node->taxonomy, '', 'taxonomy');
}
if (is_array($temp) && count($temp) > 0) {
$output .= form_group(t('Categories'), implode('', $temp), t('Select categories to be associated with this feed'));
}
}
else if (is_array($node->taxonomy)) {
foreach ($node->taxonomy as $tid) {
if ($tid > 0) {
$output .= form_hidden('taxonomy][', $tid);
}
}
}
if ($type == 'aggregator2-feed') {
if (user_access(AGGREGATOR2_PERM_EDIT_OWN_ITEM_TAXONOMY) && function_exists('taxonomy_node_form')) {
/*
* workaround "bug" in taxonomy module - taxonomy_node_form function doesn't care about different name for values
* so it always checks $node->taxonomy instead of $node->'name' :(
* so just copy part of taxonomy_node_form function here...
*/
$block = '';
// hide vocabularies
if (!user_access(AGGREGATOR2_PERM_ACCESS_VOCABS)) {
$hidden = variable_get('aggregator2_hidden_vocabs', array());
if (is_array($hidden) && count($hidden) > 0) {
$block = ' AND v.vid NOT IN ('. implode(',', $hidden) .') ';
}
if (is_array($node->feed_item_taxonomy) && count($node->feed_item_taxonomy) > 0) {
$c = db_query('SELECT tid FROM {term_data} WHERE vid IN ('. implode(',', $hidden) .') AND tid IN('. implode(',', $node->feed_item_taxonomy) .')');
while ($tid = db_fetch_object($c)) {
$output .= form_hidden('feed_item_taxonomy][', $tid->tid);
}
}
}
$temp = NULL;
$c = db_query("SELECT v.*, n.type FROM {vocabulary} v INNER JOIN {vocabulary_node_types} n ON v.vid = n.vid WHERE n.type = '%s' $block ORDER BY v.weight, v.name", 'aggregator2-item');
while ($vocabulary = db_fetch_object($c)) {
$temp[] = taxonomy_form($vocabulary->vid, $node->feed_item_taxonomy, '', 'feed_item_taxonomy');
}
if (is_array($temp) && count($temp) > 0) {
$output .= form_group(t('Item categories'), implode('', $temp), t('Select categories to be associated with aggregated items'));
}
}
else if (is_array($node->feed_item_taxonomy)) {
foreach ($node->feed_item_taxonomy as $tid) {
if ($tid > 0) {
$output .= form_hidden('feed_item_taxonomy][', $tid);
}
}
}
$output .= form_textfield(t('Image/Logo link'), 'image', $node->image, 60, 1024, t('Use only full URL (including "http://" part too) to image so it does not break RSS/ATOM feed compatibility.'), NULL, FALSE);
if (user_access('administer nodes')) {
//$output .= form_textfield(t('Image link'), 'image', $node->image, 60, 1024, t('Use only full URL (including "http://" part too) to image so it does not break RSS/ATOM feed compatibility.'), NULL, FALSE);
$output .= form_checkbox(t('Create GUID for items'), 'guid_items', 1, $node->guid_items, t('If enabled, aggragator2 will try to generate GUID for each item. Use this ONLY if aggregated items do not contain GUID tag and their LINK is not unique (ie. more than one item has the same link).'));
$output .= form_checkbox(t('Update existing items'), 'update_items', 1, $node->update_items, t('If enabled, aggragator2 will update already existing items, overwriting any changes done between cron runs.'));
$output .= form_checkbox(t('Publish new items'), 'item_status', 1, $node->item_status, t('If enabled, aggragator2 will mark each new item as published.'));
$temp = array();
$period = drupal_map_assoc(array(3600, 10800, 21600, 32400, 43200, 86400, 172800, 259200, 604800, 1209600, 2419200, 3628800, 4838400, 7257600, 15724800, 31536000), 'format_interval');
$period['1000000000'] = t('Never');
$temp[] = form_select(t('Discard feed items older than'), 'clear_items', $node->clear_items, $period, t('The time feed items should be kept. Older items will be automatically discarded. Requires crontab.'));
$temp[] = form_checkbox(t('Discard only items not published currently'), 'item_delete_mode_bits][1', AGGREGATOR2_ITEM_DELETE_UNPUBLISHED, $node->item_delete_mode & AGGREGATOR2_ITEM_DELETE_UNPUBLISHED);
$output .= form_group(t('Discarding items'), implode('', $temp));
$promoted_count = drupal_map_assoc(array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30));
$promoted_count['0'] = t('None');
$promoted_count['1000000000'] = t('All');
$output .= form_select(t('Promote items'), 'promoted_items', $node->promoted_items, $promoted_count, t('Select how many of aggregated items should be promoted to front page. When new items are created old ones will be taken out from front page.'));
$output .= form_select(t('Item date source'), 'item_date_source', $node->item_date_source, array(AGGREGATOR2_ITEM_DATE_SNIFFED => t('Feed'), AGGREGATOR2_ITEM_DATE_CURRENT => t('Current')), t('Select which date will be used for aggregated items. If "Feed" is selected, Aggregator2 module will try to find date in feed, and if not found, current date will be used. If "Current" is selected, date of creation of items will always be set to the one at which items are aggregated.'));
$output .= form_select(t('Show "full article"/"visit site" link'), 'item_show_link', $node->item_show_link, array(AGGREGATOR2_SHOW_LINK_ALWAYS => t('Always'), AGGREGATOR2_SHOW_LINK_NEVER => t('Do not display'), AGGREGATOR2_SHOW_LINK_TEASER_ONLY => t('Only with teaser'), AGGREGATOR2_SHOW_LINK_PAGE_ONLY => t('Only on full page')), t('Select place(s) where link to full article (for news items) or visit site (for news feeds) will be shown.'));
}
else {
//$output .= form_hidden('image', $node->image);
$output .= form_hidden('guid_items', $node->guid_items);
$output .= form_hidden('update_items', $node->update_items);
$output .= form_hidden('item_status', $node->item_status);
$output .= form_hidden('clear_items', $node->clear_items);
$output .= form_hidden('item_delete_mode_bits][1', $node->item_delete_mode & AGGREGATOR2_ITEM_DELETE_UNPUBLISHED);
$output .= form_hidden('promoted_items', $node->promoted_items);
$output .= form_hidden('item_date_source', $node->item_date_source);
$output .= form_hidden('item_show_link', $node->item_show_link);
}
}
if ($node->nid) {
if ($type == 'aggregator2-item') {
$output .= form_textfield(t('Link'), 'link', $node->link, 60, 250, NULL, NULL, TRUE);
$output .= form_select(t('Feed Name'), 'fid', $node->fid, aggregator2_node_list('aggregator2-feed'), t('The RSS feed which this item belongs to.'), 0, FALSE, TRUE);
}
}
$output .= form_textarea(t('Description'), 'body', $node->body, 60, 10, t('Leave it blank to allow aggregator2 to use aggregated content for description.'), NULL, NULL);
$output .= filter_form('format', $node->format);
return $output;
}
/**
* Implementation of hook_settings().
*/
function aggregator2_settings() {
$output = '';
$output .= form_checkbox(t('Create drupal blocks for each feed'), 'aggregator2_create_feed_blocks', 1, variable_get('aggregator2_create_feed_blocks', 0), t('If enabled, aggragator2 will create block for each feed. Such block still needs to be enabled on '. l('admin/block', 'admin/block') .' page.'));
$output .= form_checkbox(t('Show link to feed with each item'), 'aggregator2_show_feed_link', 1, variable_get('aggregator2_show_feed_link', 0), t('If enabled, aggragator2 will show "source" link with each item. It will point to item\'s feed node.'));
$output .= form_checkbox(t('Show link to items with each feed'), 'aggregator2_show_item_link', 1, variable_get('aggregator2_show_item_link', 0), t('If enabled, aggragator2 will show "items" link with each feed. It will point to feed node list of all items.'));
$output .= form_checkbox(t('Use link to item source whenever possible'), 'agg2_original_links', 1, variable_get('agg2_original_links', 0), t('If enabled, aggragator2 will use data from "source" tags instead of "link" tags. That will make "full article" link point to site which first published article, instead to site from which article was aggregated. Unfortunetly many sites do not use "source" tags, so often links will still point to site from which feeed was aggregated.'));
$output .= form_checkbox(t('Use javascript redirect when manually refreshing items'), 'agg2_js_redirect', 1, variable_get('agg2_js_redirect', 1), t('If enabled, aggragator2 output temporary page after refresh items is called. After refresh is done it will outpus javascript code and links to redirect browser after short time.'));
$output .= form_checkbox(t('Ignore RSS/ATOM teasers'), 'aggregator2_ignore_teasers', 1, variable_get('aggregator2_ignore_teasers', 0), t('If enabled, aggragator2 will ignore teasers set by RSS/ATOM data, and let Drupal auto-generate teasers.'));
if (function_exists('taxonomy_node_form')) {
// which categories will be blocked for users without "access all vocabs" permission
$temp = NULL;
$c = db_query("SELECT v.*, n.type FROM {vocabulary} v INNER JOIN {vocabulary_node_types} n ON v.vid = n.vid WHERE n.type = '%s' ORDER BY v.weight, v.name", 'aggregator2-item');
while ($vocabulary = db_fetch_object($c)) {
$temp[$vocabulary->vid] = $vocabulary->name;
}
if (is_array($temp) && count($temp) > 0) {
$output .= form_checkboxes(t('Hide vocabularies'), 'aggregator2_hidden_vocabs', variable_get('aggregator2_hidden_vocabs', array()), $temp, t('Users without the <em>see all vocabularies on edit page</em> permission will not be able to see selected vocabularies on feed and item edit pages.'));
}
}
// how many feeds to update at one cron run
$feed_count = drupal_map_assoc(array(0, 1, 2, 3, 4, 5, 10, 15, 20, 25, 50, 100));
$feed_count['9999999'] = t('All');
$output .= form_select(t('Number of feeds to update at a time'), 'aggregator2_cron_feed_count', variable_get('aggregator2_cron_feed_count', 10), $feed_count, t('Select how many feeds can be updated at one cron run.'));
// how long intervals to use between node_save()/node_delete() calls
$sleep_interval = drupal_map_assoc(array(0, 1, 2, 3, 4, 5));
$output .= form_select(t('Interval between node updates'), 'aggregator2_sleep_interval', variable_get('aggregator2_sleep_interval', 3), $sleep_interval, t('Select how many seconds aggregator2 should wait before trying to save/delete next node.'));
$output .= form_textarea(t('Blacklist URLs'), 'aggregator2_blacklist_url', variable_get('aggregator2_blacklist_url', ''), 60, 10,
t('One entry per line. You can enter full URLs or domain names only. You can also enter regular expression (find out more about what it is at %link. more examples can be found also at %link2). For example "http://some.url.com/some/page.html" will blacklist that specific URL. ".url.com" will blacklist all URLs from url.com domain, and all it\'s subdomains. "some.url.com" will blacklist all URLs from "some" subdomain. "/^ftp\:\/\//" will blacklist any ftp:// URL. Feed which has URL which matches any of the rules on blacklist will be blocked. Items which have link pointing to URL which matches any of the rules from blacklist will not be created.', array('%link' => l('http://www.php.net/manual/en/reference.pcre.pattern.syntax.php', 'http://www.php.net/manual/en/reference.pcre.pattern.syntax.php'), '%link2' => l('http://www.php.net/manual/en/function.preg-match.php', 'http://www.php.net/manual/en/function.preg-match.php'))),
NULL, NULL);
// Globally change settings for all feeds - useful if one wants to change setting without need to edit each feed
if ($clear_items = variable_get('aggregator2_clear_items', 0)) {
db_query("UPDATE {aggregator2_feed} SET clear_items = %d", $clear_items);
variable_set('aggregator2_clear_items', 0);
}
$period = drupal_map_assoc(array(3600, 10800, 21600, 32400, 43200, 86400, 172800, 259200, 604800, 1209600, 2419200, 3628800, 4838400, 7257600, 15724800, 31536000), 'format_interval');
$period['0'] = t('Do not change');
$period['1000000000'] = t('Never');
$output .= form_group(t('Change all news feeds with one click'), form_select(t('Discard feed items older than'), 'aggregator2_clear_items', 0, $period, t('The time feed items should be kept. Older items will be automatically discarded. Requires crontab.')));
$output .= form_checkbox(t('Time debugging'), 'agg2_dbg_time', 1, variable_get('agg2_dbg_time', 0), t('If enabled, aggragator2 will log beginning end ending of feed aggregation. This may slow down whole process, but also can give You information about which feeds are slow or even timing out.'));
return $output;
}
/**
* Implementation of hook_block().
*
* Generates news feeds blocks for display.
*/
function aggregator2_block($op = 'list', $delta = 0) {
if (variable_get('aggregator2_create_feed_blocks', 0) == 1) {
if ($op == 'list') {
$result = db_query(db_rewrite_sql('SELECT n.nid, n.title, af.enable_block FROM {node} n INNER JOIN {aggregator2_feed} af ON n.nid = af.nid WHERE n.type = \'aggregator2-feed\' AND af.enable_block = 1'));
while ($block = db_fetch_object($result)) {
$blocks[$block->nid]['info'] = $block->title . ' (feed block)';
}
$blocks['sources']['info'] = t('Latest sources');
return $blocks;
}
else if ($op == 'view') {
if ($block = cache_get('aggregator2:block:'.$delta)) {
return unserialize($block->data);
}
else if ($delta != 'sources') {
$feed = db_fetch_object(db_query('SELECT n.nid, n.title FROM {node} n WHERE n.nid = %d', $delta));
if ($feed->nid) {
$block = array();
$block['subject'] = $feed->title;
$items = db_query('SELECT n.nid, n.title FROM {node} n, {aggregator2_item} a WHERE n.nid = a.nid AND a.fid = %d ORDER BY n.created DESC, n.title LIMIT 10', $feed->nid);
$block['content'] = node_title_list($items);
cache_set('aggregator2:block:'.$delta, serialize($block));
return $block;
}
}
else {
$block = array();
$items = array();
$block['subject'] = t('Latest sources');
$result = db_query('SELECT n.nid, n.title FROM {node} n INNER JOIN {aggregator2_feed} af ON n.nid = af.nid WHERE n.type = \'aggregator2-feed\' AND n.status = 1 AND af.enable_block = 1 ORDER BY n.changed DESC LIMIT 10');
while ($temp = db_fetch_object($result)) {
$items[] = l($temp->title, 'aggregator2/sources/'.$temp->nid);
}
$block['content'] = theme('node_list', $items, NULL);
$block['content'] .= l(t('all sources'), 'aggregator2/sources');
cache_set('aggregator2:block:'.$delta, serialize($block));
return $block;
}
}
}
}
/**
* Implementation of hook_nodeapi().
*/
function aggregator2_nodeapi(&$node, $op, $teaser = NULL, $page = NULL) {
switch ($op) {
case 'settings':
if ($node->type == 'aggregator2-feed') {
$options = variable_get('aggregator2_feed_defs', array('refresh' => 3600, 'freezed' => 0, 'enable_block' => 0, 'update_items' => 1, 'item_status' => 1));
$output = '';
$temp = form_checkbox(t('Freeze'), 'aggregator2_feed_defs][freezed', 1, $options['freezed']);
$temp = form_checkbox(t('Enable Block'), 'aggregator2_feed_defs][enable_block', 1, $options['enable_block']);
// "change_existing_items" is not saved anywhere, it's temporary for the time of editing, and saving time
$temp .= form_checkbox(t('Apply changes to already existing items after feed is re-edited'), 'aggregator2_feed_defs][change_existing_items', 1, $options['change_existing_items']);
$temp .= form_checkbox(t('Create GUID for items'), 'aggregator2_feed_defs][guid_items', 1, $options['guid_items']);
$temp .= form_checkbox(t('Update existing items'), 'aggregator2_feed_defs][update_items', 1, $options['update_items']);
$temp .= form_checkbox(t('Publish new items'), 'aggregator2_feed_defs][item_status', 1, $options['item_status']);
$temp .= form_checkbox(t('Discard only items not published currently'), 'aggregator2_feed_defs][item_delete_mode', 1, $options['item_delete_mode']);
$output = form_item(t('Default feed options'), $temp);
$period = drupal_map_assoc(array(900, 1800, 3600, 7200, 10800, 21600, 32400, 43200, 64800, 86400, 172800, 259200, 604800, 1209600, 2419200), 'format_interval');
$output .= form_select(t('Default update interval'), 'aggregator2_feed_defs][refresh', $options['refresh'], $period, t('The refresh interval indicating how often feed should be updated. Requires crontab.'));
$promoted_count = drupal_map_assoc(array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 30));
$promoted_count['0'] = t('None');
$promoted_count['1000000000'] = t('All');
$output .= form_select(t('By default promote items'), 'aggregator2_feed_defs][promoted_items', $options['promoted_items'], $promoted_count, t('Select how many of aggregated items should be promoted to front page. When new items are created old ones will be taken out from front page.'));
$period = drupal_map_assoc(array(3600, 10800, 21600, 32400, 43200, 86400, 172800, 259200, 604800, 1209600, 2419200, 3628800, 4838400, 7257600, 15724800, 31536000), 'format_interval');
$period['1000000000'] = t('Never');
$output .= form_select(t('By default discard feed items older than'), 'aggregator2_feed_defs][clear_items', $options['clear_items'], $period, t('The time feed items should be kept. Older items will be automatically discarded. Requires crontab.'));
$output .= form_select(t('Item date source'), 'aggregator2_feed_defs][item_date_source', $options['item_date_source'], array(AGGREGATOR2_ITEM_DATE_SNIFFED => t('Feed'), AGGREGATOR2_ITEM_DATE_CURRENT => t('Current')), t('Select which date will be used for aggregated items. If "Feed" is selected, Aggregator2 module will try to find date in feed, and if not found, current date will be used. If "Current" is selected, date of creation of items will always be set to the one at which items are aggregated.'));
$output .= form_select(t('Show "full article"/"visit site" link'), 'aggregator2_feed_defs][item_show_link', $options['item_show_link'], array(AGGREGATOR2_SHOW_LINK_ALWAYS => t('Always'), AGGREGATOR2_SHOW_LINK_NEVER => t('Do not display'), AGGREGATOR2_SHOW_LINK_TEASER_ONLY => t('Only with teaser'), AGGREGATOR2_SHOW_LINK_PAGE_ONLY => t('Only on full page')), t('Select place(s) where link to full article (for news items) or visit site (for news feeds) will be shown.'));
return $output;
}
break;
case 'rss item':
if ($node->type == 'aggregator2-item') {
return array(array('key' => 'source',
'attributes' => array('url' => ($node->source_xml ? $node->source_xml : $node->feed_url)),
'value' => check_plain(($node->source_title ? $node->source_title : $node->feed_title))),
array('key' => 'dc:source',
'value' => ($node->source_link ? $node->source_link : $node->link)));
}
if ($node->type == 'aggregator2-feed') {
return array(array('key' => 'source',
'attributes' => array('url' => $node->url),
'value' => check_plain($node->title)),
array('key' => 'dc:source',
'value' => $node->link));
}
break;
}
}
/**
* Implementation of hook_taxonomy().
*
* Update all feeds to not use deleted category for items
*
*/
function aggregator2_taxonomy($op, $type, $object) {
static $cache_tids = array();
if ($type != 'term' || $op != 'delete') {
return;
}
// remove not only deleted term but also any other which doesn't exist in term_data table
$feeds = db_query('SELECT nid, item_taxonomy FROM aggregator2_feed');
while ($feed = db_fetch_object($feeds)) {
$terms = unserialize($feed->item_taxonomy);
$terms_removed = array();
$terms_refreshed = array();
foreach ($terms as $tid) {
if (!isset($cache_tids[$tid])) {
$temp = db_query('SELECT vid, tid FROM {term_data} WHERE tid = '. $tid);
if ($temp) {
$cache_tids[$tid] = db_fetch_object($temp);
}
else {
$cache_tids[$tid] = new StdClass();
$cache_tids[$tid]->vid = 0;
$cache_tids[$tid]->tid = 0;
}
}
if (!$cache_tids[$tid]->tid || !$cache_tids[$tid]->vid) {
$terms_removed[$tid] = 1;
}
else {
$terms_refreshed[] = $tid;
}
}
if (count($terms_removed) > 0) {
db_query("UPDATE aggregator2_feed SET item_taxonomy = '%s' WHERE nid = %d", serialize($terms_refreshed), $feed->nid);
}
}
}
/**
* Implementation of hook_cron().
*
* Checks news feeds for updates once their refresh interval has elapsed.
*/
function aggregator2_cron() {
global $user;
$old_user = $user;
// check how many feed nodew we can update at a time
$limit = variable_get('aggregator2_cron_feed_count', 10);
if ($limit == 0) {
return;
}
else if ($limit < 9999999) {
$limit = 'LIMIT '. $limit;
}
else {
$limit = '';
}
$updated_feeds = array();
$result = db_query('SELECT nid FROM {aggregator2_feed} WHERE freezed = 0 AND checked + refresh < %d ORDER BY checked ASC '. $limit, time());
while ($temp = db_fetch_array($result)) {
$feed = node_load(array('nid' => $temp['nid']));
// Fake login
if ($feed->uid != $user->uid) {
$user = user_load(array('uid' => $feed->uid, 'status' => 1));
}
// Check again if it's correct uid and only then refresh feed
if ($feed->uid == $user->uid) {
aggregator2_refresh($feed);
$updated_feeds[$feed->nid] = array($feed->clear_items, $feed->item_delete_mode);
}
}
// Now delete old items as admin (to make it faster - we don't really need to delete node as owner)
if ($user->uid != 1) {
$user = user_load(array('uid' => 1));
}
foreach ($updated_feeds as $nid => $args) {
aggregator2_remove_old_items($nid, $args[0], $args[1]);
}
// Now "logout"
if ($user->uid != $old_user->uid) {
$user = $old_user;
}
}
/**
* Menu callback; displays the aggregator-specific information from admin/help.
*/
function aggregator2_help_page() {
print theme('page', aggregator2_help('admin/help#aggregator'));
}
/**
* Menu callback; displays the aggregator administration page.
*/
function aggregator2_admin_overview() {
global $user;
if (!user_access('administer nodes')) {
$uid = ' WHERE n.uid = '. $user->uid .' ';
$can_edit = user_access(AGGREGATOR2_PERM_EDIT_OWN_FEED);
$can_remove = user_access(AGGREGATOR2_PERM_EDIT_OWN_ITEM);
$can_refresh = user_access(AGGREGATOR2_PERM_REFRESH_OWN_FEED);
}
else {
$uid = '';
$can_edit = TRUE;
$can_remove = TRUE;
$can_refresh = TRUE;
}
$result = db_query("SELECT n.nid, n.title, af.checked, af.refresh, af.freezed FROM {node} n INNER JOIN {aggregator2_feed} af ON n.nid = af.nid $uid ORDER BY n.title ASC");
$output = '<h3>'. t('Feed overview') .'</h3>';
$header = array(t('Title'), t('Items'), t('Last update'), t('Next update'), array('data' => t('Operations'), 'colspan' => '3'));
$rows = array();
while ($feed = db_fetch_object($result)) {
$items_count = db_result(db_query("SELECT COUNT(ai.nid) FROM {aggregator2_item} ai WHERE ai.fid = '%d'", $feed->nid));
$rows[] = array(l($feed->title, "node/$feed->nid"), format_plural($items_count, '1 item', '%count items'), ($feed->checked ? t('%time ago', array('%time' => format_interval(time() - $feed->checked))) : t('never')), ($feed->freezed ? t('freezed') : t('%time left', array('%time' => format_interval($feed->checked + $feed->refresh - time())))), ($can_edit ? l(t('edit'), "node/$feed->nid/edit") : ''), ($can_remove ? l(t('remove items'), "admin/aggregator2/remove/{$feed->nid}") : ''), ($can_refresh ? l(t('refresh items'), "admin/aggregator2/refresh/{$feed->nid}") : ''));
}
$output .= theme('table', $header, $rows);
print theme('page', $output);
}
/**
* Menu callback; refresh feed.
*/
function aggregator2_admin_refresh_feed($nid = NULL) {
if ($nid == NULL && is_numeric(arg(1))) {
$nid = arg(1);
}
$feed = node_load(array('nid'=>$nid));
if ($feed->type == 'aggregator2-feed') {
global $user;
$old_user = NULL;
if (!user_access(AGGREGATOR2_PERM_REFRESH_OWN_FEED) || $feed->uid != $user->uid) {
if (!user_access('administer nodes')) {
drupal_access_denied();
return;
}
else {
// Fake admin user
$old_user = $user;
$user = user_load(array('uid' => 1));
}
}
if (variable_get('agg2_js_redirect', 1)) {
echo '<html><head></head><body>';
echo 'refreshing items...<br />';
}
aggregator2_refresh($feed);
if (variable_get('agg2_js_redirect', 1)) {
echo 'checking for too old items...<br />';
}
aggregator2_remove_old_items($feed->nid, $feed->clear_items, $feed->item_delete_mode);
// If needed, back to "real" user
if ($old_user) {
$user = $old_user;
}
if (variable_get('agg2_js_redirect', 1)) {
echo 'done.<br />';
echo 'You can go to <a href="/node/'. $nid .'">feed node</a> or <a href="/admin/aggregator2">admin/aggregator2</a>.<br />';
echo '<script type="text/javascript">document.write(\'You will be redirected to feed node page in 10 seconds\'); setTimeout("document.location=\'/node/'. $nid .'\'", 10000);</script>';
echo '</body></html>';
return;
}
drupal_goto('node/'. $nid);
}
}
/*
* Menu callback; Remove all items belonging to an aggregator2 feed
*/
function aggregator2_admin_remove_feed_items($nid = NULL) {
if ($nid == NULL && is_numeric(arg(1))) {
$nid = arg(1);
}
// Handle operations
$op = $_POST['op'];
$edit = $_POST['edit'];
$feed = node_load(array('nid'=>$nid));
if ($feed->type == 'aggregator2-feed') {
global $user;
if (!user_access(AGGREGATOR2_PERM_EDIT_OWN_ITEM) || ($user->uid != $feed->uid)) {
if (!user_access('administer nodes')) {
drupal_access_denied();
return;
}
// We don't need to fake user here as we don't really need nodes to be deleted by owner
}
// Mass delete
if ($edit['confirm']) {
global $AGGREGATOR2_REFRESH_FEED_RUNNING;
// Stop Timeouts Whilst Processing Feed
set_time_limit(0);
$AGGREGATOR2_REFRESH_FEED_RUNNING = TRUE;
// Remove each feed item node
if (is_array($edit['nodes'])) {
foreach ($edit['nodes'] as $nid => $value) {
node_delete(array('nid' => $nid, 'confirm' => 1));
}
}
$AGGREGATOR2_REFRESH_FEED_RUNNING = FALSE;
// Reset the last checked time for feed
db_query("UPDATE {aggregator2_feed} SET checked = 0, etag = '', modified = 0 WHERE nid = %d", $feed->nid);
// Clear cache
cache_clear_all('aggregator2:block:'.$feed->nid);
drupal_set_message(t('The items have been deleted.'));
drupal_goto('admin/aggregator2');
}
else {
$extra = '<ul>';
$result = db_query('SELECT n.title, n.nid FROM {node} n LEFT JOIN {aggregator2_item} ai ON ai.nid = n.nid WHERE ai.fid = %d ORDER BY n.title', $feed->nid);
$item_count = 0;
while ($item = db_fetch_object($result)) {
$extra .= '<li>'. form_hidden('nodes]['. $item->nid, 1) . l($item->title, "node/{$item->nid}") .'</li>';
$item_count++;
}
$extra .= '</ul>';
$extra .= form_hidden('operation', 'delete');
$output = theme('confirm', t('Delete all items belonging to the %title feed', array('%title' => $feed->title)), 'admin/node', t('Are you sure you want to delete %count? This action cannot be undone.', array('%count' => $item_count . ' ' . format_plural($item_count, t('node'), t('nodes') ) )), t('Delete all'), t('Cancel'), $extra);
print theme('page', $output);
}
}
}
/**
* Menu callback; Generate a listing of feed items.
*/
function aggregator2_page_default($nid = NULL) {
$output = '';
if ($nid) {
$feed = node_load(array('nid' => $nid));
if ($feed && $feed->type == 'aggregator2-feed') {
$output .= node_view($feed, 1);
$result = pager_query(db_rewrite_sql('SELECT n.nid, n.sticky, n.created FROM {node} n INNER JOIN {aggregator2_item} ai ON ai.nid = n.nid WHERE ai.fid = %d AND n.status = 1 ORDER BY n.sticky DESC, n.created DESC'), variable_get('default_nodes_main', 10), 0, NULL, $feed->nid);
while ($node = db_fetch_object($result)) {
$output .= node_view(node_load(array('nid' => $node->nid)), 1);
}
$output .= theme('pager', NULL, variable_get('default_nodes_main', 10));
}
else {
drupal_goto('node/'. $nid);
}
}
else {
// If there's no nid given, then show page with feed nodes
$result = pager_query(db_rewrite_sql('SELECT n.nid, n.sticky, n.created FROM {node} n INNER JOIN {aggregator2_feed} af ON af.nid = n.nid WHERE n.status = 1 ORDER BY n.sticky DESC, n.created DESC'), variable_get('default_nodes_main', 10), 0, NULL);
while ($node = db_fetch_object($result)) {
$output .= node_view(node_load(array('nid' => $node->nid)), 1);
}
$output .= theme('pager', NULL, variable_get('default_nodes_main', 10));
}
print theme('page', $output);
}
/**
* Menu callback; Generates an OPML representation of all feeds.
*/
function aggregator2_page_opml() {
$result = db_query(db_rewrite_sql('SELECT n.nid, n.title, a.url FROM {node} n, {aggregator2_feed} a WHERE a.nid = n.nid ORDER BY n.title ASC'));
$output = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
$output .= "<opml version=\"1.1\">\n";
$output .= "<head>\n";
$output .= '<title>'. variable_get('site_name', 'drupal') .' - '. variable_get('site_slogan', '') ."</title>\n";
$output .= '<dateModified>'. gmdate('r') ."</dateModified>\n";
$output .= "</head>\n";
$output .= "<body>\n";
while ($feed = db_fetch_object($result)) {
$output .= '<outline text="'. check_plain($feed->title) .'" xmlUrl="'. check_plain($feed->url) .'" />'."\n";
}
$output .= "</body>\n";
$output .= "</opml>\n";
drupal_set_header('Content-Type: text/xml; charset=utf-8');
print $output;
}
/**
* This function gives control of contents and functions on the link of 'full article'.
*/
function theme_aggregator2_link_full_article($node) {
if (variable_get('agg2_original_links', 0)) {
return '<a href="'. ($node->source_link ? $node->source_link : $node->link) .'" title="'. t('Read original article.') .'">'. t('original article') .'</a>';
}
else {
return '<a href="'. $node->link .'" title="'. t('Read original article.') .'">'. t('original article') .'</a>';
}
}
/**
* This function gives control of contents and functions on the link of 'visit site'.
*/
function theme_aggregator2_link_visit_site($node) {
return '<a href="'. $node->link .'" title="'. t('Visit the site where this news was first published.') .'">'. t('visit site') .'</a>';
}
/**
* Private function; If URL is ok returns 1. If it's blacklisted returns 0. If expression contains error returns FALSE.
*/
function aggregator2_is_valid_url($url) {
static $blacklist = NULL;
// Prepare pattern string
if ($blacklist == NULL) {
$temp = variable_get('aggregator2_blacklist_url', '');
if ($temp) {
$blacklist = array();
$temp = explode("\n", $temp);
foreach ($temp as $line) {
// Simple comparision
if (preg_match('/^\w+:\/\//', $line)) {
$blacklist[] = '^'.preg_replace('/[^\w]/', '\\\$0', $line).'$';
}
// Check by domain and subdomain
else if ($line[0] != '/') {
if ($line[0] == '.') {
$blacklist[] = '^\w+:\/\/(?:\w+\.|\.)*'.preg_replace('/[^\w]/', '\\\$0', substr($line, 1)).'.*?$';
}
else {
$blacklist[] = '^\w+:\/\/'.preg_replace('/[^\w]/', '\\\$0', $line).'.*$';
}
}
// Use pattern
else {
$blacklist[] = substr($line, 1, -1);
}
}
if (count($blacklist) > 0) {
$blacklist = '/'.implode('|', $blacklist).'/';
}
else {
$blacklist = '';
}
}
}
if ($blacklist) {
return !preg_match($blacklist, $url, $matches);
}
return 1;
}
/**
* Private function; Create list of titles of nodes of given type
*/
function aggregator2_node_list($type) {
$feeds = array();
$result = db_query("SELECT n.nid, n.title FROM {node} n WHERE n.type = '%s' ORDER BY n.title", $type);
while($node = db_fetch_object($result)) {
$feeds[$node->nid] = $node->title;
}
return $feeds;
}
/**
* Private function; Parse HTTP headers from data retreived with cURL
* from: http://pl2.php.net/manual/en/function.curl-setopt.php#42009
*/
function parse_response($response){
/*
***original code extracted from examples at
***http://www.webreference.com/programming
/php/cookbook/chap11/1/3.html
***returns an array in the following format which varies depending on headers returned
[0] => the HTTP error or response code such as 404
[1] => Array
(
[Server] => Microsoft-IIS/5.0
[Date] => Wed, 28 Apr 2004 23:29:20 GMT
[X-Powered-By] => ASP.NET
[Connection] => close
[Set-Cookie] => COOKIESTUFF
[Expires] => Thu, 01 Dec 1994 16:00:00 GMT
[Content-Type] => text/html
[Content-Length] => 4040
)
[2] => Response body (string)
*/
do {
list($response_headers, $response) = explode("\r\n\r\n", $response, 2);
$response_header_lines = explode("\r\n", $response_headers);
// first line of headers is the HTTP response code
$http_response_line = array_shift($response_header_lines);
if (preg_match('@^HTTP/[0-9]\.[0-9] ([0-9]{3})@', $http_response_line, $matches)) {
$response_code = $matches[1];
}
else {
$response_code = "Error";
}
}
while (substr($response_code, 0, 1) == "1");
$response_body = $response;
// put the rest of the headers in an array
$response_header_array = array();
foreach ($response_header_lines as $header_line) {
list($header, $value) = explode(':', $header_line, 2);
$response_header_array[$header] = trim($value);
}
return array($response_code, $response_header_array, $response_body, $http_response_line);
}
/**
* Private function; Gets data from given URL :)
*/
function aggregator2_http_request($url, $headers = array(), $timeout = 15, $method = 'GET', $data = NULL, $follow = 3) {
if (!function_exists('curl_init')) {
return drupal_http_request($url, $headers, $method, $data, $follow);
}
// convert headers array to format used by cURL
$temp = array();
foreach ($headers as $header => $value) {
$temp[] = $header .': '. $value;
}
$headers = $temp;
$result = new StdClass();
$ch = curl_init();
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
$temp = curl_exec($ch);
if (($result->code = curl_errno($ch)) != 0) {
$result->error = curl_error($ch);
}
// TODO: currently we don't use it to keep compatibility with drupal_http_request.
// We could use it and depend on cURL's handling of redirections, so we knew URL was redirected and still get data.
// cURL returns last redirected URL (if there were more than one :) while drupal's function returns 1st.
// $info = curl_getinfo($ch);
curl_close($ch);
unset($ch);
if ($result->code != 0) {
return $result;
}
$response = parse_response($temp);
$result->code = $response[0];
$result->headers = $response[1];
$result->data = $response[2];
$error = $response[3];
switch ($result->code) {
case 200: // OK
case 304: // Not modified
break;
case 301: // Moved permanently
case 302: // Moved temporarily
case 307: // Moved temporarily
$location = $result->headers['Location'];
if ($follow) {
$result = aggregator2_http_request($result->headers['Location'], $headers, $timeout, $method, $data, --$follow);
$result->redirect_code = $result->code;
}
$result->redirect_url = $location;
break;
default:
$result->error = $error;
break;
}
$result->code = $response[0];
return $result;
}
/**
* Private function; Checks a news feed for new items.
*/
function aggregator2_refresh(&$feed) {
global $AGGREGATOR2_REFRESH_FEED_RUNNING;
// Debugging
if (variable_get('agg2_dbg_time', 0) == 1) {
watchdog('agg2-debug', t('Debug: start %title.', array('%title' => '<em>'. $feed->title .'</em>')), WATCHDOG_NOTICE, l(t('view'), 'node/'. $feed->nid));
$dbg_time_start = microtime();
}
// Generate conditional GET headers.
$headers = array();
if ($feed->etag) {
$headers['If-None-Match'] = $feed->etag;
}
if ($feed->modified) {
$headers['If-Modified-Since'] = gmdate('D, d M Y H:i:s', $feed->modified) .' GMT';
}
// Request feed.
$result = aggregator2_http_request($feed->url, $headers, 15);
// Update checked time, so it won't be checked each cron run, and thus block other feeds (outside of limit count) to be checked
db_query('UPDATE {aggregator2_feed} SET checked = %d WHERE nid = %d', time(), $feed->nid);
// Process HTTP response code.
switch ($result->code) {
case 304:
drupal_set_message(t('No new content syndicated from %site.', array('%site' => '<em>'. $feed->title .'</em>')));
break;
case 301:
if ($result->redirect_url) {
$feed->url = $result->redirect_url;
watchdog('aggregator2', t('Updated URL for feed %title to %url.', array('%title' => '<em>'. $feed->title .'</em>', '%url' => '<em>'. $feed->url .'</em>')), WATCHDOG_NOTICE, l(t('view'), 'node/'.$feed->nid));
db_query("UPDATE {aggregator2_feed} SET url = '%s' WHERE nid = %d", $feed->url, $feed->nid);
}
break;
case 200:
case 302:
case 307:
{
// Filter the input data:
$xml_tree = aggregator2_parse_xml($result->data);
if ($xml_tree['parser_error']) {
watchdog('aggregator2', t('Failed to parse RSS feed %site: %error at line %line.', array('%site' => '<em>'. $feed->title .'</em>', '%error' => $xml_tree['parser_error'], '%line' => $xml_tree['parser_line'])), WATCHDOG_ERROR);
drupal_set_message(t('Failed to parse RSS feed %site: %error at line %line.', array('%site' => '<em>'. $feed->title .'</em>', '%error' => $xml_tree['parser_error'], '%line' => $xml_tree['parser_line'])), 'error');
break;
}
else {
drupal_set_message(t('Parsing feed %title took %time seconds.', array('%title' => $feed->title, '%time' => $xml_tree['parser_time'])));
}
$AGGREGATOR2_REFRESH_FEED_RUNNING = TRUE;
if (aggregator2_parse_items($xml_tree, $feed) !== false || aggregator2_parse_opml($xml_tree, $feed) !== false) {
if ($result->headers['Last-Modified']) {
$modified = strtotime($result->headers['Last-Modified']);
}
/*
** Prepare data:
*/
if ($xml_tree['RSS']) { // RSS 0.91, 0.92, 2.0
$root = &$xml_tree['RSS'][0];
$channel = &$root['CHANNEL'][0];
$image = &$channel['IMAGE'][0];
$description = &$channel['DESCRIPTION'][0]['VALUE'];
$link = &$channel['LINK'][0]['VALUE'];
}
else if ($xml_tree['RDF:RDF']) {
$root = &$xml_tree['RDF:RDF'][0];
$channel = &$root['CHANNEL'][0];
$image = &$root['IMAGE'][0];
$description = &$channel['DESCRIPTION'][0]['VALUE'];
$link = &$channel['LINK'][0]['VALUE'];
}
else if ($xml_tree['FEED']) { // Atom 0.3, 1.0
$root = &$xml_tree['FEED'][0];
$channel = &$root;
$image = &$channel['LOGO'][0]['VALUE'];
$description = ($channel['TAGLINE'][0]['VALUE'] ? $channel['TAGLINE'][0]['VALUE'] : '');
// TODO: remove this Atom hack when we have field mapping or at least specialized parsers in place
if (count($channel['LINK']) > 1) {
$link = $feed->link;
foreach ($channel['LINK'] as $l) {
if ($l['REL'] == 'alternate') {
$link = $l['HREF'];
}
}
}
else {
$link = $channel['LINK'][0]['HREF'];
}
}
else if ($xml_tree['CHANNEL']) { // RSS 1.1
$root = &$xml_tree['CHANNEL'][0];
$channel = &$root;
$image = &$channel['IMAGE'][0];
$description = &$channel['DESCRIPTION'][0]['VALUE'];
$link = &$channel['LINK'][0]['VALUE'];
}
else if ($xml_tree['OPML']) {
$root = &$xml_tree['OPML'][0];
$channel = &$root;
$image = NULL;
$description = NULL;
$link = NULL;
}
else {
// unsupported format
break;
}
if (!$feed->author) {
if ($channel['AUTHOR'][0]['VALUE']) {
$feed->author = $channel['AUTHOR'][0]['VALUE'];
}
if ($channel['AUTHOR'][0]['NAME'][0]['VALUE']) {
$feed->author = $channel['AUTHOR'][0]['NAME'][0]['VALUE'];
}
else if ($channel['DC:CREATOR']) {
$feed->author = $channel['DC:CREATOR'][0]['VALUE'];
}
else {
$feed->author = '';
}
}
/*
** Generate image link
*/
if (!$feed->image && $image['LINK'] && $image['URL'] && $image['TITLE']) {
if (strlen($image['TITLE'][0]['VALUE']) > 250) {
$image['TITLE'][0]['VALUE'] = trim(substr($image['TITLE'][0]['VALUE'], 0, 250)).'...';
}
$feed->image = '<a href="'. $image['LINK'][0]['VALUE'] .'" class="aggregator2_logo_link"><img src="'. $image['URL'][0]['VALUE'] .'" class="aggregator2_logo" alt="'. $image['TITLE'][0]['VALUE'] .'" /></a>';
}
else if (!$feed->image) {
$feed->image = '';
}
/*
** Update the feed data:
*/
$feed->checked = time();
$feed->link = $link;
$feed->etag = $result->headers['ETag'];
$feed->modified = $modified;
if ($feed->body == '' && $description/* && valid_input_data($description)*/) {
$feed->body = $feed->teaser = $description;
}
$feed->rss_data = &$xml_tree;
/*
** Taxonomy module doesn't add taxonomy terms at load time... so we have to do it by hand :((
*/
$terms = module_invoke('taxonomy', 'node_get_terms', $feed->nid, 'tid');
$feed->taxonomy = array();
foreach ($terms as $tid => $term) {
if ($term->tid) {
$feed->taxonomy[] = $term->tid;
}
}
/*
** Save it! :)
*/
// Keep original creator - node module changes uid to current user, so if other user runs refresh, node will be "stolen"
// TODO: validation temporary removed because it brings back default settings handled by node.module - ie. put's feed back to moderation queue :(
// probably best way would be to run refresh as admin user and just not forget to set item's user to feed owner :(
// $feed = node_validate($feed);
$feed->validated = TRUE;
if (!($errors = form_get_errors()) && aggregator2_is_valid_url($feed->url)) {
node_save($feed);
flush();
sleep(variable_get('aggregator2_sleep_interval', 3));
}
else {
watchdog('aggregator2', t('Failed to validate aggregator2-feed for %site: %error.', array('%site' => '<em>'. $feed->title .'</em>', '%error' => '<em>'. implode("\n", $errors) .'</em>')), WATCHDOG_ERROR, l(t('view'), 'node/'.$feed->nid));
drupal_set_message(t('Failed to validate aggregator2-feed for %site: %error.', array('%site' => '<em>'. $feed->title .'</em>', '%error' => '<em>'. implode("\n", $errors) .'</em>')));
}
watchdog('aggregator2', t('Syndicated content from %site.', array('%site' => '<em>'. $feed->title .'</em>')), WATCHDOG_NOTICE, l(t('view'), 'node/'.$feed->nid));
drupal_set_message(t('Syndicated content from %site.', array('%site' => '<em>'. $feed->title .'</em>')));
}
$AGGREGATOR2_REFRESH_FEED_RUNNING = FALSE;
break;
}
default:
watchdog('aggregator2', t('Failed to parse RSS feed %site: %error.', array('%site' => '<em>'. $feed->title .'</em>', '%error' => "<em>$result->code $result->error</em>")), WATCHDOG_ERROR, l(t('view'), 'node/'.$feed->nid));
drupal_set_message(t('Failed to parse RSS feed %site: %error.', array('%site' => '<em>'. $feed->title .'</em>', '%error' => "<em>$result->code $result->error</em>")));
}
// Debugging
if (variable_get('agg2_dbg_time', 0) == 1) {
$dbg_time_end = microtime();
list($sec, $usec) = explode(' ', $dbg_time_start);
$dbg_time_start = $sec + $usec;
list($sec, $usec) = explode(' ', $dbg_time_end);
$dbg_time = ($sec + $usec) - $dbg_time_start;
watchdog('agg2-debug', t('Debug: end %title. Time: %time', array('%title' => '<em>'. $feed->title .'</em>', '%time' => $dbg_time)), WATCHDOG_NOTICE, l(t('view'), 'node/'. $feed->nid));
}
}
/**
* Private function;
* Parse the W3C date/time format, a subset of ISO 8601. PHP date parsing
* functions do not handle this format.
* See http://www.w3.org/TR/NOTE-datetime for more information.
* Origionally from MagpieRSS (http://magpierss.sourceforge.net/).
*
* @param $date_str A string with a potentially W3C DTF date.
* @return A timestamp if parsed successfully or -1 if not.
*/
function aggregator2_parse_w3cdtf($date_str) {
if (preg_match('/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/', $date_str, $match)) {
list($year, $month, $day, $hours, $minutes, $seconds) = array($match[1], $match[2], $match[3], $match[4], $match[5], $match[6]);
// calc epoch for current date assuming GMT
$epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
if ($match[10] != 'Z') { // Z is zulu time, aka GMT
list($tz_mod, $tz_hour, $tz_min) = array($match[8], $match[9], $match[10]);
// zero out the variables
if (!$tz_hour) {
$tz_hour = 0;
}
if (!$tz_min) {
$tz_min = 0;
}
$offset_secs = (($tz_hour * 60) + $tz_min) * 60;
// is timezone ahead of GMT? then subtract offset
if ($tz_mod == '+') {
$offset_secs *= -1;
}
$epoch += $offset_secs;
}
return $epoch;
}
else {
return -1;
}
}
/**
* Private function;
* from: http://pl2.php.net/manual/en/function.html-entity-decode.php#51055
* Used as callback function for preg_replace_all() to decode numeric entities to UTF-8 chars
*
* @param $ord Number
* @return UTF-8 string
*/
function aggregator2_replace_num_entity($ord) {
$ord = $ord[1];
if (preg_match('/^x([0-9a-f]+)$/i', $ord, $match)) {
$ord = hexdec($match[1]);
}
else {
$ord = intval($ord);
}
$no_bytes = 0;
$byte = array();
if ($ord == 128) {
return chr(226).chr(130).chr(172);
}
else if($ord == 129) {
return chr(239).chr(191).chr(189);
}
else if($ord == 130) {
return chr(226).chr(128).chr(154);
}
else if($ord == 131) {
return chr(198).chr(146);
}
else if($ord == 132) {
return chr(226).chr(128).chr(158);
}
else if($ord == 133) {
return chr(226).chr(128).chr(166);
}
else if($ord == 134) {
return chr(226).chr(128).chr(160);
}
else if($ord == 135) {
return chr(226).chr(128).chr(161);
}
else if($ord == 136) {
return chr(203).chr(134);
}
else if($ord == 137) {
return chr(226).chr(128).chr(176);
}
else if($ord == 138) {
return chr(197).chr(160);
}
else if($ord == 139) {
return chr(226).chr(128).chr(185);
}
else if($ord == 140) {
return chr(197).chr(146);
}
else if($ord == 141) {
return chr(239).chr(191).chr(189);
}
else if($ord == 142) {
return chr(197).chr(189);
}
else if($ord == 143) {
return chr(239).chr(191).chr(189);
}
else if($ord == 144) {
return chr(239).chr(191).chr(189);
}
else if($ord == 145) {
return chr(226).chr(128).chr(152);
}
else if($ord == 146) {
return chr(226).chr(128).chr(153);
}
else if($ord == 147) {
return chr(226).chr(128).chr(156);
}
else if($ord == 148) {
return chr(226).chr(128).chr(157);
}
else if($ord == 149) {
return chr(226).chr(128).chr(162);
}
else if($ord == 150) {
return chr(226).chr(128).chr(147);
}
else if($ord == 151) {
return chr(226).chr(128).chr(148);
}
else if($ord == 152) {
return chr(203).chr(156);
}
else if($ord == 153) {
return chr(226).chr(132).chr(162);
}
else if($ord == 154) {
return chr(197).chr(161);
}
else if($ord == 155) {
return chr(226).chr(128).chr(186);
}
else if($ord == 156) {
return chr(197).chr(147);
}
else if($ord == 157) {
return chr(239).chr(191).chr(189);
}
else if($ord == 158) {
return chr(197).chr(190);
}
else if($ord == 159) {
return chr(197).chr(184);
}
else if($ord == 160) {
return chr(194).chr(160);
}
if ($ord < 128) {
return chr($ord);
}
else if ($ord < 2048) {
$no_bytes = 2;
}
else if ($ord < 65536) {
$no_bytes = 3;
}
else if ($ord < 1114112) {
$no_bytes = 4;
}
else {
return;
}
switch ($no_bytes) {
case 2:
$prefix = array(31, 192);
break;
case 3:
$prefix = array(15, 224);
break;
case 4:
$prefix = array(7, 240);
break;
}
for ($i = 0; $i < $no_bytes; $i++) {
$byte[$no_bytes - $i - 1] = (($ord & (63 * pow(2, 6 * $i))) / pow(2, 6 * $i)) & 63 | 128;
}
$byte[0] = ($byte[0] & $prefix[0]) | $prefix[1];
$ret = '';
for ($i = 0; $i < $no_bytes; $i++) {
$ret .= chr($byte[$i]);
}
return $ret;
}
/**
* Private function; Convert named entities to UTF-8 characters
* from: http://pl2.php.net/manual/en/function.html-entity-decode.php#51722
*/
function aggregator2_replace_name_entities(&$text) {
static $ttr;
if (!$ttr) {
$trans_tbl = get_html_translation_table(HTML_ENTITIES);
foreach ($trans_tbl as $k => $v) {
$ttr[$v] = utf8_encode($k);
}
$ttr['''] = "'";
}
return strtr($text, $ttr);
}
/**
* Private function; Convert all entities to UTF-8 characters
*/
function aggregator2_replace_entities(&$text) {
$result = aggregator2_replace_name_entities($text);
return preg_replace_callback('/&#([0-9a-fx]+);/mi', 'aggregator2_replace_num_entity', $result);
}
/**
* Private function; Clone object function to stay compatible with both php4 and php5
* from: Drupal 4.7CVS
* TODO: remove after moving to Drupal 4.7
*/
function aggregator2_clone($object) {
return version_compare(phpversion(), '5.0') < 0 ? $object : clone($object);
}
/**
* Private function; Convert relative URLs
*/
function aggregator2_convert_relative_urls(&$data, $base_url) {
$src = '%( href| src)="(?!\w+://)/?([^"]*)"%';
$dst = '$1="'. trim($base_url, '/') .'/$2"';
return preg_replace($src, $dst, $data);
}
/**
* Private function; Creates nodes from data found in given xml_tree
*/
function aggregator2_parse_items(&$xml_tree, &$feed) {
if ($xml_tree['RSS']) { // RSS 0.91, 0.92, 2.0
$items = &$xml_tree['RSS'][0]['CHANNEL'][0]['ITEM'];
$link_field = 'VALUE';
}
else if ($xml_tree['RDF:RDF']) {
$items = &$xml_tree['RDF:RDF'][0]['ITEM'];
$link_field = 'VALUE';
}
else if ($xml_tree['FEED']) { // Atom 0.3, 1.0
$items = &$xml_tree['FEED'][0]['ENTRY'];
$link_field = 'HREF';
}
else if ($xml_tree['CHANNEL']) { // RSS 1.1
$items = &$xml_tree['CHANNEL'][0]['ITEMS'][0]['ITEM'];
$link_field = 'VALUE';
}
else {
// unsupported format
$items = array();
return false;
}
// Don't add items older than allowed age for items
if ($feed->clear_items != 1000000000) {
// calculate time horizont converted to user's timezone
$time_horizont = time() - $feed->clear_items;
}
else {
$time_horizont = 0;
}
// Prepare data needed for promoting items
// TODO: maybe select just those which will not be deleted? so update at the end of this funtion would have less nodes to update :)
$promote_items = $feed->promoted_items;
$promoted = array();
if ($promote_items != 1000000000 && $promote_items != 0) {
$result = db_query('SELECT ai.nid AS nid FROM {node} n, {aggregator2_item} ai WHERE ai.fid = %d AND ai.nid = n.nid AND n.status = 1 ORDER BY n.created ASC', $feed->nid);
while ($temp = db_fetch_array($result)) {
$promoted[] = $temp['nid'];
}
}
/*
** We reverse the array such that we store the first item last,
** and the last item first. In the database, the newest item
** should be at the top.
*/
$items_added = 0;
for ($index = count($items) - 1; $index >= 0; $index--) {
$item = &$items[$index];
$teaser = NULL;
$body = NULL;
// Description field is needed early for case when no title is specified
if ($item['DESCRIPTION']) { // RSS 0.91, 0.92, 1.0, 1.1, 2.0
$body = &$item['DESCRIPTION'][0]['VALUE'];
}
else if ($item['SUMMARY']) { // Atom 0.3, 1.0
$body = &$item['SUMMARY'][0]['VALUE'];
}
if ($item['CONTENT']) { // Atom 0.3, 1.0
if (strlen($body) < strlen($item['CONTENT'][0]['VALUE'])) {
if ($body && !variable_get('aggregator2_ignore_teasers', 0)) {
$teaser = $body;
}
$body = &$item['CONTENT'][0]['VALUE'];
}
}
else if ($item['CONTENT:ENCODED']) { // Don't know where it came from but it can be found in RSS 2.0 feeds
if (strlen($body) < strlen($item['CONTENT:ENCODED'][0]['VALUE'])) {
if ($body && !variable_get('aggregator2_ignore_teasers', 0)) {
$teaser = $body;
}
$body = &$item['CONTENT:ENCODED'][0]['VALUE'];
}
}
/*
** Resolve the item's title. If no title is found, we use
** up to 40 characters of the description ending at a word
** boundary but not splitting potential entities.
*/
if (!($title = $item['TITLE'][0]['VALUE'])) {
$title = preg_replace('/^(.*)[^\w;&].*?$/', "\\1", truncate_utf8($body, 40));
}
// If title was "escaped" then it may still contain entities, becuase each & from entity was also escabet to & before
// TODO: the same for content?
if ($item['TITLE'][0]['MODE'] == 'escaped') {
$title = aggregator2_replace_entities($title);
}
$title = strip_tags($title);
/*
** Resolve the items link.
*/
if ($item['LINK']) {
// TODO: remove this Atom hack when we have field mapping or at least specialized parsers in place
if (count($item['LINK']) > 1) {
$link = $feed->link;
foreach ($item['LINK'] as $temp) {
if ($temp['REL'] == 'alternate') {
$link = $temp[$link_field];
}
}
}
else {
$link = $item['LINK'][0][$link_field];
}
}
elseif ($item['GUID'] && (strncmp($item['GUID'][0][$link_field], 'http://', 7) == 0) && $item['GUID'][0]['ISPERMALINK'] != 'false') {
$link = $item['GUID'][0][$link_field];
}
else {
$link = $feed->link;
}
// Try to "sniff" real link from feeds like news.google.com which "hide" real link behind own url
if (strpos($link, 'http://news.google.com/news/url?') === 0) {
if (preg_match('/\&url=(.*)\&/U', $link, $matches) && $matches[1]) {
$link = rawurldecode($matches[1]);
}
}
else if (preg_match('/^\w+:\/\/(?:\w+\.|\.)*yahoo.com\/dailynews\/rss\/.*\*(.*)/', $link, $matches)) {
$link = rawurldecode($matches[1]);
}
/*
** Resolve the items source.
*/
// RSS 2.0 description of SOURCE is a bit different from ATOM and DC.
// It says link should point to XML data of source (so i guess to feed/channel??),
// while ATOM and DC say it just points to original data (and from examples on web
// it looks like it means link to original article on site, not in RSS/ATOM format).
if ($item['SOURCE'][0]['VALUE'] && $item['SOURCE'][0]['URL']) { // RSS 2.0
$source_title = &$item['SOURCE'][0]['VALUE'];
$source_xml = &$item['SOURCE'][0]['URL'];
}
if ($item['DC:SOURCE'][0]['VALUE'] || (!$source_xml && $item['SOURCE'][0]['VALUE'])) { // Dublin core
$source_link = &$item['DC:SOURCE'][0]['VALUE'];
}
else if ($item['SOURCE'] || $item['ATOM:SOURCE']) { // ATOM 1.0
if ($item['SOURCE'][0]['TITLE']) $source_title = &$item['SOURCE'][0]['TITLE'][0]['VALUE'];
else if ($item['SOURCE'][0]['ATOM:TITLE']) $source_title = &$item['SOURCE'][0]['ATOM:TITLE'][0]['VALUE'];
if ($item['SOURCE'][0]['LINK']) $source_link = &$item['SOURCE'][0]['LINK'][0]['VALUE'];
else if ($item['SOURCE'][0]['ATOM:LINK']) $source_link = &$item['SOURCE'][0]['ATOM:LINK'][0]['VALUE'];
}
if (!$source_title) {
$source_title = '';
}
if (!$source_link) {
$source_link = '';
}
if (!$source_xml) {
$source_xml = '';
}
/*
** Try to resolve and parse the item's publication date. If no
** date is found, we use the current date instead.
*/
// TODO: find nicer way for handling namespaces ;)
if ($item['PUBDATE']) $date = $item['PUBDATE'][0]['VALUE']; // RSS 2.0
else if ($item['DC:DATE']) $date = $item['DC:DATE'][0]['VALUE']; // Dublin core
else if ($item['DATE']) $date = $item['DATE'][0]['VALUE']; // Dublin core
else if ($item['DCTERMS:ISSUED']) $date = $item['DCTERMS:ISSUED'][0]['VALUE']; // Dublin core
else if ($item['ISSUED']) $date = $item['ISSUED'][0]['VALUE']; // Dublin core
else if ($item['DCTERMS:CREATED']) $date = $item['DCTERMS:CREATED'][0]['VALUE']; // Dublin core
else if ($item['CREATED']) $date = $item['CREATED'][0]['VALUE']; // Dublin core
else if ($item['DCTERMS:MODIFIED']) $date = $item['DCTERMS:MODIFIED'][0]['VALUE']; // Dublin core
else if ($item['MODIFIED']) $date = $item['MODIFIED'][0]['VALUE']; // Dublin core
else if ($item['ATOM:UPDATED']) $date = $item['ATOM:UPDATED'][0]['VALUE']; // Atom
else if ($item['UPDATED']) $date = $item['UPDATED'][0]['VALUE']; // Atom
else $date = 'now';
if ($feed->item_date_source == AGGREGATOR2_ITEM_DATE_SNIFFED && $date) {
$timestamp = strtotime($date); // strtotime() returns -1 on failure
if ($timestamp < 0) {
$timestamp = aggregator2_parse_w3cdtf($date); // also returns -1 on failure
if ($timestamp < 0) {
$timestamp = time(); // better than nothing
}
}
}
else {
$timestamp = time();
}
// Ignore items older than allowed for feed
if ($timestamp < $time_horizont) {
continue;
}
/*
** Save this item. Try to avoid duplicate entries as much as
** possible. If we find a duplicate entry, we resolve it and
** pass along it's ID such that we can update it if needed.
*/
// Try to use RSS:GUID/ATOM:ID as unique identifier
$guid = '';
if ($item['GUID'][0]['VALUE']) { // RSS 2.0
$guid = $item['GUID'][0]['VALUE'];
}
else if ($item['ATOM:ID'][0]['VALUE']) { // ATOM 0.3, 1.0
$guid = $item['ATOM:ID'][0]['VALUE'];
}
else if ($item['ID'][0]['VALUE']) { // ATOM 0.3, 1.0
$guid = $item['ID'][0]['VALUE'];
}
else if ($feed->guid_items) {
// feed may contain duplicated links for different items, so we try to generate unique ID for each item
$guid = md5("$title - $body");
}
// TODO: is there anyway to check if DC:IDENTIFIER is unique?
// http://dublincore.org/documents/usageguide/elements.shtml says it can be non-unique so useles for us :(
$entry = NULL;
if ($guid && strlen($guid) > 0) {
$entry = db_fetch_object(db_query("SELECT nid FROM {aggregator2_item} WHERE guid = '%s' AND fid = %d", $guid, $feed->nid));
}
else if ($link && $link != $feed->link && $link != $feed->url) {
$entry = db_fetch_object(db_query("SELECT nid FROM {aggregator2_item} WHERE link = '%s' AND fid = %d", $link, $feed->nid));
}
else {
$entry = db_fetch_object(db_query("SELECT ai.nid AS nid FROM {node} n, {aggregator2_item} ai WHERE ai.fid = %d AND ai.nid = n.nid AND n.title = '%s'", $feed->nid, $title));
}
// Ignore items already existing in database and not allowed to be updated
if ($feed->update_items == 0 && $entry && $entry->nid) {
continue;
}
if (!$title) {
drupal_set_message(t('No title found in entry from %site feed.', array('%site' => '<em>'. $feed->title .'</em>')), 'error');
watchdog('aggregator2', t('No title found in entry from %site feed.', array('%site' => '<em>'. $feed->title .'</em>')), WATCHDOG_WARNING);
}
else {
$edit = NULL;
if ($entry && $entry->nid) {
$edit = node_load(array('nid' => $entry->nid));
/*
** Taxonomy module doesn't add taxonomy terms at load time... so we have to do it by hand :((
*/
$terms = module_invoke('taxonomy', 'node_get_terms', $edit->nid, 'tid');
foreach ($terms as $tid => $term) {
if ($term->tid) {
$edit->taxonomy[] = $term->tid;
}
}
}
else {
$edit = aggregator2_clone($feed);
unset($edit->nid);
}
$edit->fid = $feed->nid;
$edit->type = 'aggregator2-item';
if ($item['AUTHOR'][0]['VALUE']) {
$edit->author = $item['AUTHOR'][0]['VALUE'];
}
if ($item['AUTHOR'][0]['NAME'][0]['VALUE']) {
$edit->author = $item['AUTHOR'][0]['NAME'][0]['VALUE'];
}
else if ($item['DC:CREATOR']) {
$edit->author = $item['DC:CREATOR'][0]['VALUE'];
}
else {
$edit->author = '';
}
// avoid overwriting user's changes
if (!$edit->nid) {
// From node.module (node_validate function): Force defaults in case people modify the form:
$node_options = variable_get('node_options_aggregator2-item', array('status', 'promote'));
$edit->status = $feed->item_status;// in_array('status', $node_options);
$edit->moderate = in_array('moderate', $node_options);
$edit->promote = 0; // in_array('promote', $node_options);
$edit->sticky = in_array('sticky', $node_options);
$edit->revision = in_array('revision', $node_options);
$edit->taxonomy = $feed->feed_item_taxonomy;
unset($edit->path); // avoid path alias conflicts!
unset($edit->parent_node); // avoid messing up relativity module's data :)
$edit->date = format_date($timestamp, 'custom', 'Y-m-d H:i O');
$edit->created = strtotime($edit->date);
$edit->guid = $guid;
}
$edit->title = $title;
$edit->link = $link;
$edit->source_link = $source_link;
$edit->source_title = $source_title;
$edit->source_xml = $source_xml;
$edit->body = aggregator2_convert_relative_urls($body, $feed->link);
$edit->rss_data = &$xml_tree;
$edit->rss_item_data = &$item;
if ($teaser) {
$edit->teaser = aggregator2_convert_relative_urls($teaser, $feed->link);
}
else {
unset($edit->teaser);
}
// avoid "content modified by another user" error in case when item is updated
// (so it already exists in database). It's because we use feed as template for each item,
// so it certainly will have older "changed" date than item in database (and will get error, if we not change it :)
$edit->changed = time();
$node = node_validate($edit);
// Once again setup stuff which node_validate overwrote :(
$node->date = format_date($timestamp, 'custom', 'Y-m-d H:i O');
$node->created = strtotime($node->date);
if (!$node->nid) {
$node->status = $feed->item_status;
// Handle promotion stuff
if ($index + 1 <= $promote_items || $promote_items == 1000000000) {
$node->promote = 1;
}
else {
$node->promote = 0;
}
}
if ($errors = form_get_errors()) {
drupal_set_message(t('Errors in entry from %site feed:', array('%site' => '<em>'. $feed->title .'</em>'))."\n".implode("\n", $errors), 'error');
watchdog('aggregator2', t('Errors in entry %title from %site feed:'."\n".implode("\n", $errors), array('%title' => '<em>'. $node->title .'</em>', '%site' => '<em>'. $feed->title .'</em>')), WATCHDOG_ERROR);
}
else {
$nid = node_save($node);
if ($nid) {
if ($nid != $edit->nid) {
if ($node->promote && $promote_items != 1000000000) {
$promoted[] = $nid;
}
$items_added++;
watchdog('aggregator2', t('%type: added %title from %site feed.', array('%type' => '<em>'. t($node->type) .'</em>', '%title' => '<em>'. $node->title .'</em>', '%site' => '<em>'. $feed->title .'</em>')), WATCHDOG_NOTICE, l(t('view'), "node/$nid"));
}
else {
watchdog('aggregator2', t('%type: updated %title from %site feed.', array('%type' => '<em>'. t($node->type) .'</em>', '%title' => '<em>'. $node->title .'</em>', '%site' => '<em>'. $feed->title .'</em>')), WATCHDOG_NOTICE, l(t('view'), "node/$nid"));
}
}
else {
watchdog('aggregator2', t('Could not save %type node %title from %site feed.', array('%type' => '<em>'. t($node->type) .'</em>', '%title' => '<em>'. $node->title .'</em>', '%site' => '<em>'. $feed->title .'</em>')), WATCHDOG_ERROR, l(t('view'), "node/$nid"));
}
// Sleep only if something was saved. No need to waste time for nothing :)
flush();
sleep(variable_get('aggregator2_sleep_interval', 3));
}
}
}
// Now un-promote older items
if ($promote_items != 1000000000 && $promote_items != 0) {
$temp = array();
while (count($promoted) > $promote_items) {
$temp[] = array_shift($promoted);
}
if (count($temp) > 0) {
db_query('UPDATE {node} SET promote = 0 WHERE nid IN (%s)', implode(',', $temp));
}
}
return $items_added;
}
/**
* Private function; Parse feed url and create feeds
*/
function aggregator2_parse_opml($xml_tree, $feed) {
if (!isset($xml_tree['OPML']) || !is_array($xml_tree['OPML'][0]['BODY'][0]['OUTLINE'])) {
return false;
}
if (!function_exists('_aggregator2_easyfeed_add')) {
return false;
}
global $user;
$u = array('nid' => $user->nid, 'name' => $user->name);
foreach ($xml_tree['OPML'][0]['BODY'][0]['OUTLINE'] as $item) {
$nid = _aggregator2_easyfeed_add($item['XMLURL'], &$u);
if ($nid && $nid > 0) {
db_query("INSERT INTO {aggregator2_item} (nid, fid, author, link, guid, source_link, source_xml, source_title) VALUES (%d, %d, '%s', '%s', '%s', '%s', '%s', '%s')", $nid, $feed->nid, '', '', $item['XMLURL'], $feed->link, $feed->url, $feed->title);
}
}
}
/**
* Private function; Remove items older than allowed for given feed
*/
function aggregator2_remove_old_items($fid, $age, $item_delete_mode = 0) {
if ($age != 1000000000) {
$age = time() - $age;
$requirement = '';
if ($item_delete_mode & AGGREGATOR2_ITEM_DELETE_UNPUBLISHED) {
$requirement .= ' AND n.status = 0 ';
}
$result = db_query('SELECT ai.nid AS nid FROM {node} n, {aggregator2_item} ai WHERE ai.fid = %d AND ai.nid = n.nid AND n.created < %d'.$requirement, $fid, $age);
while ($item = db_fetch_object($result)) {
node_delete(array('nid' => $item->nid, 'confirm' => '1'));
flush();
sleep(variable_get('aggregator2_sleep_interval', 3));
}
}
}
/**
* Private function; parses given XML data and returns array
*/
function aggregator2_parse_xml(&$data) {
global $xml_tree, $xml_paths, $xml_path_cur;
$xml_tree = array();
$xml_paths[] = &$xml_tree;
$xml_path_cur = 0;
$_start = microtime();
// Some feeds already use CDATA but in "wrong way": http://www.rocketboom.com/vlog/quicktime_daily_enclosures.xml (ie. <description> something <CDATA soemthing else></description>
$data = trim(str_replace(array('<![CDATA[', ']]>'), '', $data));
// Add CDATA around content which may contain (x)html data, and is not contained in CDATA yet
$src = array(
'%(<(link|content|content:encoded|description|title|summary|info|tagline|copyright|source|itunes:summary|media:text|text)(?>[^<]*(?<!/)>)(?!<!\[CDATA\[))(.*)(</\2>)%sUS',
'%24:(\d\d:\d\d)%' // workaround buggy hour format in feeds
/*'%(<(\w+)(?>[^<]*type=")(?:text/html|application/xhtml\+xml|html|xhtml")(?>[^<]*(?<!/)>)(?!<!\[CDATA\[))(.*)(</\2>)%sUS'*/
);
$dst = array(
'$1<![CDATA[$3]]>$4',
'00:$1'
);
$data = preg_replace($src, $dst, $data);
// parse the data:
$xml_parser = drupal_xml_parser_create($data);
if ($xml_parser == NULL) {
return $xml_tree;
}
xml_set_element_handler($xml_parser, 'aggregator2_element_start', 'aggregator2_element_end');
xml_set_character_data_handler($xml_parser, 'aggregator2_element_data');
xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, 1);
xml_parser_set_option($xml_parser, XML_OPTION_SKIP_WHITE, 1);
if (!xml_parse($xml_parser, $data, 1)) {
$xml_tree['parser_error'] = xml_error_string(xml_get_error_code($xml_parser));
$xml_tree['parser_line'] = xml_get_current_line_number($xml_parser);
}
else {
unset($xml_tree['parser_error']);
unset($xml_tree['parser_line']);
}
xml_parser_free($xml_parser);
$_end = microtime();
list($sec, $usec) = explode(' ', $_start);
$_start = $sec + $usec;
list($sec, $usec) = explode(' ', $_end);
$xml_tree['parser_time'] = ($sec + $usec) - $_start;
return $xml_tree;
}
/**
* Private call-back function used by the XML parser.
*/
function aggregator2_element_start($parser, $name, $attributes) {
global $xml_tree, $xml_paths, $xml_path_cur;
$temp = &$xml_paths[$xml_path_cur++];
$temp[$name][] = $attributes;
$xml_paths[$xml_path_cur] = &$temp[$name][count($temp[$name])-1];
}
/**
* Private call-back function used by the XML parser.
*/
function aggregator2_element_end($parser, $name) {
global $xml_tree, $xml_paths, $xml_path_cur;
$temp = &$xml_paths[$xml_path_cur];
array_pop($xml_paths);
$xml_path_cur--;
if (isset($temp['VALUE'])) {
$temp['VALUE'] = trim(aggregator2_replace_entities($temp['VALUE']));
}
}
/**
* Private call-back function used by the XML parser.
*/
function aggregator2_element_data($parser, $data) {
global $xml_tree, $xml_paths, $xml_path_cur;
$temp = trim($data);
if (strlen($temp) > 0) {
$temp = &$xml_paths[$xml_path_cur];
$temp['VALUE'] .= $data;
}
}
?>
