-
Notifications
You must be signed in to change notification settings - Fork 0
/
Tweets_Scrapping_snscrape-All_Users.py
97 lines (77 loc) · 3.2 KB
/
Tweets_Scrapping_snscrape-All_Users.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Source Link: https://betterprogramming.pub/how-to-scrape-tweets-with-snscrape-90124ed006af
# Note: The code has resumable capability, so it can be resumed if any interruption of hanging occurs
import snscrape.modules.twitter as sntwitter
import csv
from multiprocessing import Pool
import os
from time import time, sleep
def scrape_user(param):
try:
number, dname = param
# Creating list to append tweet data to
tweets_list1 = []
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(f"from:{dname}").get_items()):
if i >= 3240:
break
'''
if i % 100 == 99 or i == 3239:
print(f"{i + 1}/3240 Scrapped")
'''
# To remove b' ' symbol due to utf-8 end
tweet_content = str(tweet.content.encode("utf-8"))
if (tweet_content[:2] == "b'" and tweet_content[-1] == "'") or (
tweet_content[:2] == 'b"' and tweet_content[-1] == '"'):
tweet_content = tweet_content[2:-1]
tweets_list1.append([tweet.id, tweet.date, tweet_content])
except:
while True:
try:
number, dname = param
# Creating list to append tweet data to
tweets_list1 = []
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(f"from:{dname}").get_items()):
if i >= 3240:
break
'''
if i % 100 == 99 or i == 3239:
print(f"{i + 1}/3240 Scrapped")
'''
# To remove b' ' symbol due to utf-8 end
tweet_content = str(tweet.content.encode("utf-8"))
if (tweet_content[:2] == "b'" and tweet_content[-1] == "'") or (
tweet_content[:2] == 'b"' and tweet_content[-1] == '"'):
tweet_content = tweet_content[2:-1]
tweets_list1.append([tweet.id, tweet.date, tweet_content])
break
except:
continue
try:
with open(f"User_Tweets/{number}.csv", "w", newline='') as fp:
write = csv.writer(fp)
write.writerows(tweets_list1)
except:
while True:
try:
with open(f"User_Tweets/{number}.csv", "w", newline='') as fp:
write = csv.writer(fp)
write.writerows(tweets_list1)
break
except:
continue
if __name__ == '__main__':
start_time = time()
with open("All_Users.csv", "r", newline='') as f:
reader = csv.reader(f)
users = list(reader)
# taskList = [(x[0], x[2]) for x in users]
taskList = []
for ui in users:
if not os.path.isfile(f"User_Tweets/{ui[0]}.csv"):
taskList.append((ui[0], ui[2]))
print(f"Mining {len(taskList)} Users. . .")
process = Pool(60)
process.map(scrape_user, taskList)
process.close()
process.join()
# JiLin_Tweets
print("All Tweets Saved, Time Taken:", time() - start_time)