instagram_importer-8.x-1.1/src/instagramImport.php
src/instagramImport.php
<?php
namespace Drupal\instagram_importer;
use Drupal\Component\Serialization\Json;
use Drupal\Core\File\FileSystemInterface;
class instagramImport {
protected $NEW_POST_COUNT = 0;
public function prepare() {
$hashtags = \Drupal::config('instagram_importer.settings')->get('settings.instagram_tags');
$hashtags = str_replace(' ', '', $hashtags);
$hashtags = str_replace('#', '', $hashtags);
$users = \Drupal::config('instagram_importer.settings')->get('settings.instagram_users');
$users = str_replace(' ', '', $users);
$users = str_replace('@', '', $users);
$posts = [];
if (strpos($hashtags, ',') !== false) {
$hashtags = explode(',', $hashtags);
}
if(!is_array($hashtags) && (strlen($hashtags) > 1)) {
$posts[] = $this->scrape_insta_hash($hashtags);
$this->processPosts($posts, 'TagPage', $hashtags);
} else {
if(is_array($hashtags)) {
foreach($hashtags as $hashtag) {
$posts[] = $this->scrape_insta_hash($hashtag);
$this->processPosts($posts, 'TagPage', $hashtag);
}
}
}
if (strpos($users, ',') !== false) {
$users = explode(',', $users);
}
if(!is_array($users) && (strlen(trim($users)) > 1)) {
$posts[] = $this->scrape_insta_hash_user($users);
$this->processPosts($posts, 'ProfilePage');
} else {
if($users) {
foreach($users as $user) {
$posts[] = $this->scrape_insta_hash_user($user);
$this->processPosts($posts, 'ProfilePage');
}
}
}
\Drupal::logger('instagram_importer')->notice('Importer has run. ' . $this->NEW_POST_COUNT . ' post(s) imported.');
}
private function scrape_insta_hash($tag) {
$insta_source = file_get_contents('https://www.instagram.com/explore/tags/'.$tag.'/'); // instagram profiel page
$shards = explode('window._sharedData = ', $insta_source);
$insta_json = explode(';</script>', $shards[1]);
$insta_array = json_decode($insta_json[0], TRUE);
return $insta_array;
}
private function scrape_insta_hash_user($username) {
// see https://www.picssel.com/build-a-simple-instagram-api-case-study/
$url = "https://www.instagram.com/".$username;
$instagram_url = \Drupal::httpClient()->get($url);
$response = (string) $instagram_url->getBody();
if (empty($response)) {
//Can't find Url
return FALSE;
}
$start_position = strpos($response ,'window._sharedData = ');
$start_positionlength = strlen('window._sharedData = ');
$trimmed_before = trim(substr($response, ($start_position + $start_positionlength)));
$end_position = strpos($trimmed_before, '</script>');
$trimmed = trim( substr($trimmed_before, 0, $end_position));
$jsondata = substr($trimmed, 0, -1);
$object = Json::decode($jsondata, true);
return $object;
}
private function scrape_insta_user($name) {
$insta_source = file_get_contents($name); // instagrame url
$shards = explode('window._sharedData = ', $insta_source);
$insta_json = explode(';</script>', $shards[1]);
$insta_array = json_decode($insta_json[0], TRUE);
return $insta_array; // this return a lot things print it and see what else you need
}
private function processPosts($items, $type, $hashtag = NULL) {
if(count($items[0]) > 0) {
} else {
\Drupal::logger('instagram_importer')->notice('Importer has run. No post(s) could be found. Are you sure your settings are correct?');
return;
}
foreach($items as $posts) {
$limit = 50; // provide the limit thats important because one page only give some images.
$post_array= array(); // array to store images.
for ($i=0; $i < $limit; $i++) {
//previous code to get images from json
//$latest_array = $results_array['entry_data']['TagPage'][0]['tag']['media']['nodes'][$i];
//new code to get images from json
if($type == 'TagPage') {
if(array_key_exists($i, $posts['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'])) {
$latest_array = $posts['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'][$i]['node'];
$data = array();
$data['media'] = $latest_array['thumbnail_src']; // thumbnail and same sizes
//$userid = $posts['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'][$i]['node']['owner']['id'];
$data['text'] = $posts['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'][$i]['node']['edge_media_to_caption']['edges'][0]['node']['text'];
$shorturl = $posts['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'][$i]['node']['shortcode'];
$data['time'] = $posts['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'][$i]['node']['taken_at_timestamp'];
$data['url'] = 'https://www.instagram.com/p/'.$shorturl;
$userinfo = $this->scrape_insta_user($data['url']);
$data['id'] = $posts['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'][$i]['node']['id'];
$data['name'] = $userinfo['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['username'];
$data['screenname'] = $userinfo['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['full_name'];
$data['profile_img'] = $userinfo['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['profile_pic_url'];
$data['hashtag'] = $hashtag;
//$image_data = '<img src="'.$latest_array['display_src'].'">'; actual image and different sizes
array_push($post_array, $data);
}
}
if($type == 'ProfilePage') {
if(isset($posts['entry_data']['ProfilePage'])) {
if(is_array($posts['entry_data']['ProfilePage']['0']['graphql']['user']['edge_owner_to_timeline_media']['edges'])) {
if (array_key_exists($i, $posts['entry_data']['ProfilePage']['0']['graphql']['user']['edge_owner_to_timeline_media']['edges'])) {
$latest_array = $posts['entry_data']['ProfilePage']['0']['graphql']['user']['edge_owner_to_timeline_media']['edges'][$i]['node'];
$data = array();
$data['media'] = $latest_array['thumbnail_src']; // thumbnail and same sizes
//$userid = $posts['entry_data']['TagPage'][0]['graphql']['hashtag']['edge_hashtag_to_media']['edges'][$i]['node']['owner']['id'];
if(isset($posts['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges'][$i]['node']['edge_media_to_caption']['edges'][0])) {
$data['text'] = $posts['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges'][$i]['node']['edge_media_to_caption']['edges'][0]['node']['text'];
} else {
$data['text'] = ' ';
}
$shorturl = $posts['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges'][$i]['node']['shortcode'];
$data['time'] = $posts['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges'][$i]['node']['taken_at_timestamp'];
$data['url'] = 'https://www.instagram.com/p/' . $shorturl;
$userinfo = $this->scrape_insta_user($data['url']);
$data['id'] = $posts['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges'][$i]['node']['id'];
$data['name'] = $userinfo['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['username'];
$data['screenname'] = $userinfo['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['full_name'];
$data['profile_img'] = $userinfo['entry_data']['PostPage'][0]['graphql']['shortcode_media']['owner']['profile_pic_url'];
//$image_data = '<img src="'.$latest_array['display_src'].'">'; actual image and different sizes
array_push($post_array, $data);
}
}
}
}
}
$new_posts = 0;
foreach ($post_array as $post) {
$is_new = $this->instagram_importer_save_socialpost($post);
if ($is_new) {
$new_posts += 1;
}
}
if ($new_posts > 0) {
$current_count = $this->NEW_POST_COUNT;
$this->NEW_POST_COUNT = $current_count + 1;
}
}
}
function instagram_importer_save_socialpost($socialpost) {
// Controleer of id reeds bestaat
if(!$socialpost['id']) {
return null;
}
$count = \Drupal::entityQuery('node')
->condition('type', 'instagram')
->condition('field_instagram_id', $socialpost['id'])
->execute();
if (count($count) == 0) {
if(strlen(substr(preg_replace('/[^(\x20-\x7F)]*/', '', $socialpost['text']), 0, 100)) < 1) {
$title = '[unknown]';
} else {
$title = substr(preg_replace('/[^(\x20-\x7F)]*/', '', $socialpost['text']), 0, 100);
}
$data = array(
'type' => 'instagram',
'title' => $title,
'uid' => 1
);
$node_socialpost = \Drupal::entityTypeManager()
->getStorage('node')
->create($data);
$node_socialpost->set('field_instagram_id', $socialpost['id']);
$node_socialpost->set('field_instagram_link', ['uri' => $socialpost['url']]);
$node_socialpost->set('field_instagram_username', strip_tags($socialpost['name']));
$node_socialpost->set('field_instagram_screenname', "@" . strip_tags($socialpost['screenname']));
$node_socialpost->set('body', preg_replace('/[^(\x20-\x7F)]*/', '', $socialpost['text']));
$node_socialpost->set('field_instagram_created_date', date('Y-m-d\TH:i:s', $socialpost['time']));
// Fetch image en maak hier managed file van
$profile_img = $socialpost['profile_img'];
$image_file = $this->InstagramImporterFetchExternalFile($profile_img);
if($image_file) {
$node_socialpost->set('field_instagram_avatar', $image_file->id());
}
if(isset($socialpost['hashtag'])) {
$node_socialpost->set('field_instagram_hashtag', $socialpost['hashtag']);
}
$picture = FALSE;
// Controleer of socialpost foto bevat
if (isset($socialpost['media'])) {
if ($socialpost['media'] != null) {
$picture = $socialpost['media'];
}
if ($picture) {
$image_file = $this->InstagramImporterFetchExternalFile($picture);
$node_socialpost->set('field_instagram_image', $image_file->id());
} else return FALSE;
}
// Save socialpost entity
$node_socialpost->save();
$new_posts = $this->NEW_POST_COUNT;
$this->NEW_POST_COUNT = $new_posts + 1;
return TRUE;
}
return FALSE;
}
/**
* Helper function to import external file and make it a managed drupal file
*
* @param $url
* The file name + location
* @param $external
* The external host
*
* @return $fid
* The file id
*/
private function InstagramImporterFetchExternalFile($source) {
$directory = 'public://instagram';
try {
\Drupal::httpClient()->get($source);
}
catch (\Exception $e) {
\Drupal::logger('instagram_importer')->error( 'Instagram image could not be downloaded: '.$source);
}
if(\Drupal::service('file_system')->prepareDirectory($directory, FileSystemInterface::CREATE_DIRECTORY)) {
$newfile = system_retrieve_file(trim($source), $directory, true);
return $newfile;
}
return false;
}
}
