You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi Guys — first off, this is a wonderful module and the discussions around it are lively. That said, I've been experiencing this weird, sporadic output using scholarly & the proxy generator where sometimes it'll give me a good output, something I want, and sometimes it won't.
This is my code:
import csv
from scholarly import scholarly, ProxyGenerator
import json
import time
class MaxTriesExceededException(Exception):
pass
def initialize_proxy():
pg = ProxyGenerator()
success = pg.FreeProxies(timeout=10, wait_time=1200)
if not success:
raise Exception("Failed to initialize proxy.")
scholarly.use_proxy(pg)
def search_scholar(author, year, keyword, max_retries=1, delay=10):
retries = 0
results = []
while retries < max_retries:
try:
search_query = scholarly.search_pubs(f'{author} {year} {keyword}')
while True:
try:
pub = next(search_query)
print(pub)
results.append(pub)
if len(results) > 5:
return results
except StopIteration:
return results
except Exception as e:
retries += 1
print(f"Attempt {retries}/{max_retries} failed with error: {e}. Reinitializing proxy and retrying...")
initialize_proxy()
time.sleep(delay)
return results
def extract_information(papers, year, author):
extracted_data = []
for paper in papers:
info = {
"title": paper.get("bib", {}).get("title"),
"author": paper.get("bib", {}).get("author"),
"year": paper.get("bib", {}).get("pub_year"),
"abstract": paper.get("bib", {}).get("abstract"),
"url": paper.get("eprint_url")
}
if info["year"] == year and check_name_match(info["author"], author):
extracted_data.append(info)
print(extracted_data)
return extracted_data
def check_name_match(array, name):
name_normalized = name.lower()
name_words = set(name_normalized.split())
for element in array:
element_normalized = element.lower()
element_words = set(element_normalized.split())
if element_words.intersection(name_words):
return True
return False
def read_csv_and_search(file_path):
with open(file_path, mode='r') as file:
csv_reader = csv.DictReader(file)
all_extracted_data = []
for row in csv_reader:
author = row['author']
year = row['year']
keyword = row['keyword']
print(author, year, keyword)
papers = search_scholar(author, year, keyword)
extracted_data = extract_information(papers, year, author)
all_extracted_data.extend(extracted_data)
return all_extracted_data
# Initialize proxy first
initialize_proxy()
# Read from CSV and perform search
csv_file_path = 'keywords_authors_years.csv'
extracted_data = read_csv_and_search(csv_file_path)
# Print the extracted data
for data in extracted_data:
print(json.dumps(data, indent=2))
# Save to a JSON file
with open('scholar_data.json', 'w') as f:
json.dump(extracted_data, f, indent=2)
The great output it gives me, which I'll clip for sanity purposes, is:
(base) tanushkaushik@Tanushs-MacBook-Pro Texas Lead DS % python3 testedscholarlymodule2.py
krishna kumar 2023 curriculum
{'container_type': 'Publication', 'source': <PublicationSource.PUBLICATION_SEARCH_SNIPPET: 'PUBLICATION_SEARCH_SNIPPET'>, 'bib': {'title': 'Liberalisation, privatisation, modernisation, and schooling in India: An interview with Krishna Kumar', 'author': ['C LaDousa'], 'pub_year': '2007', 'venue': 'Globalisation, Societies and Education', 'abstract': 'In 2004, India’s Congress Party wrested control of parliament from the Hindu nationalist Bharatiya Janata Party. I present below an interview conducted with Professor Krishna Kumar,'}, 'filled': False, 'gsrank': 1, 'pub_url': 'https://www.tandfonline.com/doi/abs/10.1080/14767720701425628', 'author_id': ['cPY2hPIAAAAJ'], 'url_scholarbib': '/scholar?hl=en&q=info:p1A1yltMh2AJ:scholar.google.com/&output=cite&scirp=0&hl=en', 'url_add_sclib': '/citations?hl=en&xsrf=&continue=/scholar%3Fq%3Dkrishna%2Bkumar%2B2023%2Bcurriculum%26hl%3Den%26as_sdt%3D0,33&citilm=1&update_op=library_add&info=p1A1yltMh2AJ&ei=jIRoZtiSBNS-6rQP_NO1gAc&json=', 'num_citations': 31, 'citedby_url': '/scholar?cites=6955612106615247015&as_sdt=5,33&sciodt=0,33&hl=en', 'url_related_articles': '/scholar?q=related:p1A1yltMh2AJ:scholar.google.com/&scioq=krishna+kumar+2023+curriculum&hl=en&as_sdt=0,33'}
{'container_type': 'Publication', 'source': <PublicationSource.PUBLICATION_SEARCH_SNIPPET: 'PUBLICATION_SEARCH_SNIPPET'>, 'bib': {'title': 'Technology and Education Today', 'author': ['K Kumar'], 'pub_year': '2022', 'venue': 'Social Change', 'abstract': 'as a scripted curriculum, a modular lesson, a PowerPoint presentation or any number of ever-increasing conveniences. The new commonplace of curriculum includes the replacement'}, 'filled': False, 'gsrank': 2, 'pub_url': 'https://journals.sagepub.com/doi/abs/10.1177/00490857221134925', 'author_id': [''], 'url_scholarbib': '/scholar?hl=en&q=info:NvjFxqfYC-UJ:scholar.google.com/&output=cite&scirp=1&hl=en', 'url_add_sclib': '/citations?hl=en&xsrf=&continue=/scholar%3Fq%3Dkrishna%2Bkumar%2B2023%2Bcurriculum%26hl%3Den%26as_sdt%3D0,33&citilm=1&update_op=library_add&info=NvjFxqfYC-UJ&ei=jIRoZtiSBNS-6rQP_NO1gAc&json=', 'num_citations': 2, 'citedby_url': '/scholar?cites=16504523474535315510&as_sdt=5,33&sciodt=0,33&hl=en', 'url_related_articles': '/scholar?q=related:NvjFxqfYC-UJ:scholar.google.com/&scioq=krishna+kumar+2023+curriculum&hl=en&as_sdt=0,33'}
{'container_type': 'Publication', 'source': <PublicationSource.PUBLICATION_SEARCH_SNIPPET: 'PUBLICATION_SEARCH_SNIPPET'>, 'bib': {'title': 'Human and integral education: educational paradigms from the Indian context expanding meanings of peace and conflict', 'author': ['K Kumar'], 'pub_year': '2022', 'venue': 'Journal of Peace Education', 'abstract': 'Consequently, school curriculum has played a role in For instance, the Indian national peace curriculum draws our attention The national peace curriculum also recognizes exclusion in'}, 'filled': False, 'gsrank': 3, 'pub_url': 'https://www.tandfonline.com/doi/abs/10.1080/17400201.2022.2148639', 'author_id': [''], 'url_scholarbib': '/scholar?hl=en&q=info:m6pksJmNLowJ:scholar.google.com/&output=cite&scirp=2&hl=en', 'url_add_sclib': '/citations?hl=en&xsrf=&continue=/scholar%3Fq%3Dkrishna%2Bkumar%2B2023%2Bcurriculum%26hl%3Den%26as_sdt%3D0,33&citilm=1&update_op=library_add&info=m6pksJmNLowJ&ei=jIRoZtiSBNS-6rQP_NO1gAc&json=', 'num_citations': 3, 'citedby_url': '/scholar?cites=10101166705467501211&as_sdt=5,33&sciodt=0,33&hl=en', 'url_related_articles': '/scholar?q=related:m6pksJmNLowJ:scholar.google.com/&scioq=krishna+kumar+2023+curriculum&hl=en&as_sdt=0,33'}
{'container_type': 'Publication', 'source': <PublicationSource.PUBLICATION_SEARCH_SNIPPET: 'PUBLICATION_SEARCH_SNIPPET'>, 'bib': {'title': 'Empowering Minds: The Evolution of Higher Education in Tamil Nadu towards Innovation and Excellence', 'author': ['A Saravanakumar', 'K Krishnamoorthy'], 'pub_year': '2023', 'venue': 'NA', 'abstract': "are clearly outlined in the State's Vision 2023. The vision document declares the government's development goals in curriculum, research, innovation, cocurricular, and extracurricular"}, 'filled': False, 'gsrank': 4, 'pub_url': 'https://www.researchgate.net/profile/Dr-saravanakumar-2/publication/374673790_The_Evolution_of_Higher_Education_in_Tamil_Nadu_towards_Innovation_and_Excellence/links/6528d1062e1ba453041e6c26/The-Evolution-of-Higher-Education-in-Tamil-Nadu-towards-Innovation-and-Excellence.pdf', 'author_id': ['3gl8pgIAAAAJ', 'teFhC08AAAAJ'], 'url_scholarbib': '/scholar?hl=en&q=info:ux3yW6jCHCMJ:scholar.google.com/&output=cite&scirp=3&hl=en', 'url_add_sclib': '/citations?hl=en&xsrf=&continue=/scholar%3Fq%3Dkrishna%2Bkumar%2B2023%2Bcurriculum%26hl%3Den%26as_sdt%3D0,33&citilm=1&update_op=library_add&info=ux3yW6jCHCMJ&ei=jIRoZtiSBNS-6rQP_NO1gAc&json=', 'num_citations': 1, 'citedby_url': '/scholar?cites=2530111119028264379&as_sdt=5,33&sciodt=0,33&hl=en', 'url_related_articles': '/scholar?q=related:ux3yW6jCHCMJ:scholar.google.com/&scioq=krishna+kumar+2023+curriculum&hl=en&as_sdt=0,33', 'eprint_url': 'https://www.researchgate.net/profile/Dr-saravanakumar-2/publication/374673790_The_Evolution_of_Higher_Education_in_Tamil_Nadu_towards_Innovation_and_Excellence/links/6528d1062e1ba453041e6c26/The-Evolution-of-Higher-Education-in-Tamil-Nadu-towards-Innovation-and-Excellence.pdf'}
{'container_type': 'Publication', 'source': <PublicationSource.PUBLICATION_SEARCH_SNIPPET: 'PUBLICATION_SEARCH_SNIPPET'>, 'bib': {'title': 'Reflections on Teaching an Engineering Course through Murder Mysteries.', 'author': ['K Kumar'], 'pub_year': '2023', 'venue': 'Journal of Technology and Science Education', 'abstract': 'This paper presents a reflective analysis of a novel approach to Problem-Based Learning (PBL) to teach abstract concepts in a large-class setting, specifically tailored for a third-year'}, 'filled': False, 'gsrank': 5, 'pub_url': 'https://eric.ed.gov/?id=EJ1407522', 'author_id': ['VJQEa68AAAAJ'], 'url_scholarbib': '/scholar?hl=en&q=info:UE5Ej0rVoXcJ:scholar.google.com/&output=cite&scirp=4&hl=en', 'url_add_sclib': '/citations?hl=en&xsrf=&continue=/scholar%3Fq%3Dkrishna%2Bkumar%2B2023%2Bcurriculum%26hl%3Den%26as_sdt%3D0,33&citilm=1&update_op=library_add&info=UE5Ej0rVoXcJ&ei=jIRoZtiSBNS-6rQP_NO1gAc&json=', 'num_citations': 1, 'citedby_url': '/scholar?cites=8620405677971754576&as_sdt=5,33&sciodt=0,33&hl=en', 'url_related_articles': '/scholar?q=related:UE5Ej0rVoXcJ:scholar.google.com/&scioq=krishna+kumar+2023+curriculum&hl=en&as_sdt=0,33', 'eprint_url': 'https://files.eric.ed.gov/fulltext/EJ1407522.pdf'}
But on the other hand, when I try the same code the next day, it gives me this output:
(base) tanushkaushik@Tanushs-MacBook-Pro Texas Lead DS % python3 testedscholarlymodule2.py
krishna kumar 2023 curriculum
Attempt 1/1 failed with error: Cannot Fetch from Google Scholar.. Reinitializing proxy and retrying...
jules elkins 2023 education
Attempt 1/1 failed with error: Cannot Fetch from Google Scholar.. Reinitializing proxy and retrying...
jules elkins 2023 student
Attempt 1/1 failed with error: Cannot Fetch from Google Scholar.. Reinitializing proxy and retrying...
jules elkins 2023 classroom
Attempt 1/1 failed with error: Cannot Fetch from Google Scholar.. Reinitializing proxy and retrying...
jules elkins 2023 curriculum
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
Hi Guys — first off, this is a wonderful module and the discussions around it are lively. That said, I've been experiencing this weird, sporadic output using scholarly & the proxy generator where sometimes it'll give me a good output, something I want, and sometimes it won't.
This is my code:
But on the other hand, when I try the same code the next day, it gives me this output:
why is it so sporadic?? What can I do to fix it?
Beta Was this translation helpful? Give feedback.
All reactions