Capstone project -(Twitter scraping using Snscrape)
Created 2 years ago
169
Views
0
Comments
# importing libraries and packages
import snscrape.modules.twitter as sntwitter
import pandas
# Creating list to append tweet data
tweets_list1 = []
# Using TwitterSearchScraper to scrape data and append tweets to list
for i,tweet in enumerate(sntwitter.TwitterSearchScraper('from:imVkohli').get_items()): #declare a username
if i>100: #number of tweets you want to scrape
break
tweets_list1.append([tweet.date, tweet.id, tweet.content, tweet.user.username]) #declare the attributes to be returned
# Creating a dataframe from the tweets list above
tweets_df1 = pd.DataFrame(tweets_list1, columns=['Datetime', 'Tweet Id', 'Text', 'Username'])
for the above code I'm getting below error , can someone help me with this
ScraperException Traceback (most recent call last)
Cell In[5], line 9
6 tweets_list1 = []
8 # Using TwitterSearchScraper to scrape data and append tweets to list
----> 9 for i,tweet in enumerate(sntwitter.TwitterSearchScraper('from:imVkohli').get_items()): #declare a username
10 if i>100: #number of tweets you want to scrape
11 break
File ~\anaconda3\lib\site-packages\snscrape\modules\twitter.py:1661, in TwitterSearchScraper.get_items(self)
1658 params = paginationParams.copy()
1659 del params['cursor']
-> 1661 for obj in self._iter_api_data('https://api.twitter.com/2/search/adaptive.json', _TwitterAPIType.V2, params, paginationParams, cursor = self._cursor):
1662 yield from self._v2_timeline_instructions_to_tweets_or_users(obj)
File ~\anaconda3\lib\site-packages\snscrape\modules\twitter.py:761, in _TwitterAPIScraper._iter_api_data(self, endpoint, apiType, params, paginationParams, cursor, direction)
759 while True:
760 _logger.info(f'Retrieving scroll page {cursor}')
--> 761 obj = self._get_api_data(endpoint, apiType, reqParams)
762 yield obj
764 # No data format test, just a hard and loud crash if anything's wrong :-)
File ~\anaconda3\lib\site-packages\snscrape\modules\twitter.py:727, in _TwitterAPIScraper._get_api_data(self, endpoint, apiType, params)
725 if apiType is _TwitterAPIType.GRAPHQL:
726 params = urllib.parse.urlencode({k: json.dumps(v, separators = (',', ':')) for k, v in params.items()}, quote_via = urllib.parse.quote)
--> 727 r = self._get(endpoint, params = params, headers = self._apiHeaders, responseOkCallback = self._check_api_response)
728 try:
729 obj = r.json()
File ~\anaconda3\lib\site-packages\snscrape\base.py:251, in Scraper._get(self, *args, **kwargs)
250 def _get(self, *args, **kwargs):
--> 251 return self._request('GET', *args, **kwargs)
File ~\anaconda3\lib\site-packages\snscrape\base.py:247, in Scraper._request(self, method, url, params, data, headers, timeout, responseOkCallback, allowRedirects, proxies)
245 _logger.fatal(msg)
246 _logger.fatal(f'Errors: {", ".join(errors)}')
--> 247 raise ScraperException(msg)
248 raise RuntimeError('Reached unreachable code')
ScraperException: 4 requests to https://api.twitter.com/2/search/adaptive.json?include_profile_interstitial_type=1&include_blocking=1&include_blocked_by=1&include_followed_by=1&include_want_retweets=1&include_mute_edge=1&include_can_dm=1&include_can_media_tag=1&include_ext_has_nft_avatar=1&include_ext_is_blue_verified=1&include_ext_verified_type=1&skip_status=1&cards_platform=Web-12&include_cards=1&include_ext_alt_text=true&include_ext_limited_action_results=false&include_quote_count=true&include_reply_count=1&tweet_mode=extended&include_ext_collab_control=true&include_ext_views=true&include_entities=true&include_user_entities=true&include_ext_media_color=true&include_ext_media_availability=true&include_ext_sensitive_media_warning=true&include_ext_trusted_friends_metadata=true&send_error_codes=true&simple_quoted_tweet=true&q=from%3AimVkohli&tweet_search_mode=live&count=20&query_source=spelling_expansion_revert_click&pc=1&spelling_corrections=1&include_ext_edit_control=true&ext=mediaStats%2ChighlightedLabel%2ChasNftAvatar%2CvoiceInfo%2Cenrichments%2CsuperFollowMetadata%2CunmentionInfo%2CeditControl%2Ccollab_control%2Cvibe failed, giving up.
im trying to scrape data from twitter using snscrape.