Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
monoxgas committed Aug 23, 2024
1 parent 7945564 commit 7e1ae2c
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 30 deletions.
13 changes: 0 additions & 13 deletions .github/workflows/short.yml

This file was deleted.

1 change: 0 additions & 1 deletion arxiv_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def fill_papers_with_arxiv(papers: list[Paper]) -> list[Paper]:

if paper.title and paper.title != result.title:
print(f'[!] Title mismatch: "{paper.title}" vs "{result.title}"')
continue

paper.title = result.title
paper.url = result.entry_id
Expand Down
24 changes: 15 additions & 9 deletions notion_utils.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
import asyncio
import typing as t
from datetime import datetime

from notion_client import Client
from notion_client.helpers import collect_paginated_api
from notion_client import AsyncClient
from notion_client.helpers import async_collect_paginated_api
from tqdm import tqdm # type: ignore

from _types import Paper, Focus

NotionClient = Client
NotionClient = AsyncClient


def get_notion_client(token: str) -> NotionClient:
return NotionClient(auth=token)


def get_papers_from_notion(client: NotionClient, database_id: str) -> list[Paper]:
results = collect_paginated_api(client.databases.query, database_id=database_id)
async def get_papers_from_notion(client: NotionClient, database_id: str) -> list[Paper]:
results = await async_collect_paginated_api(
client.databases.query, database_id=database_id
)

papers: list[Paper] = []
for result in results:
Expand Down Expand Up @@ -53,10 +57,10 @@ def get_papers_from_notion(client: NotionClient, database_id: str) -> list[Paper
return papers


def write_papers_to_notion(
async def write_papers_to_notion(
client: NotionClient, database_id: str, papers: list[Paper]
) -> None:
for paper in papers:
for paper in tqdm(papers):
properties: dict[str, t.Any] = {}
if paper.title:
properties["Title"] = {"title": [{"text": {"content": paper.title}}]}
Expand All @@ -78,8 +82,10 @@ def write_papers_to_notion(
properties["Explored"] = {"checkbox": paper.explored}

if paper.page_id:
client.pages.update(paper.page_id, properties=properties)
await client.pages.update(paper.page_id, properties=properties)
else:
client.pages.create(
await client.pages.create(
parent={"database_id": database_id}, properties=properties
)

return None
19 changes: 13 additions & 6 deletions paperstack.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import argparse
import asyncio
import os
from datetime import datetime

from arxiv_utils import fill_papers_with_arxiv, search_arxiv_as_paper
from notion_utils import (
Expand All @@ -20,7 +22,7 @@
"""


def main():
async def main():
parser = argparse.ArgumentParser()

parser.add_argument(
Expand Down Expand Up @@ -53,7 +55,12 @@ def main():
openai_client = get_openai_client(args.openai_token)

print(f" |- Getting papers from Notion [{args.database_id}]")
papers = get_papers_from_notion(notion_client, args.database_id)
papers = await get_papers_from_notion(notion_client, args.database_id)
print(f" |- {len(papers)} existing papers")

for p in papers:
if p.published < datetime.fromisoformat("2024-07-01 00:00:00+00:00"):
p.explored = True

if not all([p.has_arxiv_props() for p in papers]):
print(" |- Filling in missing data from arXiv")
Expand All @@ -63,7 +70,7 @@ def main():
print(" |- Searching arXiv for new papers")
existing_titles = [paper.title for paper in papers]
for searched_paper in search_arxiv_as_paper(
args.arxiv_search_query, max_results=10
args.arxiv_search_query, max_results=50
):
if searched_paper.title not in existing_titles:
print(f" |- {searched_paper.title[:50]}...")
Expand All @@ -73,7 +80,7 @@ def main():
to_explore = [p for p in papers if not p.explored]
if to_explore:
print(" |- Getting related papers from Semantic Scholar")
recommended_papers = get_recommended_arxiv_ids_from_semantic_scholar(papers)
recommended_papers = get_recommended_arxiv_ids_from_semantic_scholar(to_explore)
papers.extend(fill_papers_with_arxiv(recommended_papers))
print(f" |- {len(recommended_papers)} new papers")
else:
Expand All @@ -96,10 +103,10 @@ def main():
to_write = [p for p in papers if p.has_changed()]
if to_write:
print(f" |- Writing {len(to_write)} updates back to Notion")
write_papers_to_notion(notion_client, args.database_id, to_write)
await write_papers_to_notion(notion_client, args.database_id, to_write)

print("[+] Done!")


if __name__ == "__main__":
main()
asyncio.run(main())
3 changes: 2 additions & 1 deletion scholar_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from semanticscholar import SemanticScholar # type: ignore
from tqdm import tqdm # type: ignore

from _types import Paper

Expand All @@ -9,7 +10,7 @@ def get_recommended_arxiv_ids_from_semantic_scholar(
papers: list[Paper], max_results: int = 10, min_year: int = 2018
) -> list[Paper]:
results: list[dict] = []
for paper in papers:
for paper in tqdm(papers):
if not paper.url:
continue

Expand Down

0 comments on commit 7e1ae2c

Please sign in to comment.