From d9158e9366708d7ea97649921c0016aa40eb0f15 Mon Sep 17 00:00:00 2001 From: noxibow Date: Wed, 18 Oct 2023 20:50:56 +0900 Subject: [PATCH 1/6] fix naver blogposts directory error for posts created less than 24 hours ago --- gallery_dl/extractor/naver.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py index 55faf9e79b..95bbbd19d6 100644 --- a/gallery_dl/extractor/naver.py +++ b/gallery_dl/extractor/naver.py @@ -10,7 +10,7 @@ from .common import GalleryExtractor, Extractor, Message from .. import text - +from datetime import date class NaverBase(): """Base class for naver extractors""" @@ -59,6 +59,11 @@ def metadata(self, page): data["post"]["date"] = text.parse_datetime( extr('se_publishDate pcol2">', '<') or extr('_postAddDate">', '<'), "%Y. %m. %d. %H:%M") + + # fixes directory error for posts created less than 24 hours ago + if "전" in str(data["post"]["date"]): + data["post"]["date"] = text.parse_datetime(date.today().isoformat(), format="%Y-%m-%d") + return data def images(self, page): From 5e11cf915098f3e766e57f0cf3fcad5f738e1a0a Mon Sep 17 00:00:00 2001 From: noxibow Date: Wed, 18 Oct 2023 21:05:16 +0900 Subject: [PATCH 2/6] add support for videos on naver blogs --- gallery_dl/extractor/naver.py | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py index 95bbbd19d6..55bf66ce20 100644 --- a/gallery_dl/extractor/naver.py +++ b/gallery_dl/extractor/naver.py @@ -11,6 +11,8 @@ from .common import GalleryExtractor, Extractor, Message from .. import text from datetime import date +import json +import urllib.request class NaverBase(): """Base class for naver extractors""" @@ -67,10 +69,37 @@ def metadata(self, page): return data def images(self, page): - return [ + # grab keys for json files + keys = [ + key for key in text.extract_iter(page, 'inkey" : "', '"') + ] + + videos = [] + + if keys: + # grab json ids + json_ids = text.extr(page, "likeItVideoIdListJson = '", "'") + + # convert to list + json_ids = json_ids.strip('[]').replace('"', '').replace(' ', '').split(',') + + # create list of json urls + jsons = [f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{j}?key={k}' for j,k in zip(json_ids, keys)] + for j in jsons: + with urllib.request.urlopen(j) as url: + data = json.loads(url.read().decode()) + + # Parse source video urls and select highest quality source + sources = data['videos']['list'] + sizes = [s['size'] for s in sources] + i = sizes.index(max(sizes)) + videos.append((sources[i]['source'], None)) + + images = [ (url.replace("://post", "://blog", 1).partition("?")[0], None) for url in text.extract_iter(page, 'data-lazy-src="', '"') ] + return images + videos class NaverBlogExtractor(NaverBase, Extractor): From 78aa80764180dca3b7c727d499e1b613693462b6 Mon Sep 17 00:00:00 2001 From: noxibow2 Date: Fri, 20 Oct 2023 07:40:52 +0900 Subject: [PATCH 3/6] refactor to remove unnecessary imports --- gallery_dl/extractor/naver.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py index 55bf66ce20..b63aab8b1c 100644 --- a/gallery_dl/extractor/naver.py +++ b/gallery_dl/extractor/naver.py @@ -11,8 +11,6 @@ from .common import GalleryExtractor, Extractor, Message from .. import text from datetime import date -import json -import urllib.request class NaverBase(): """Base class for naver extractors""" @@ -86,14 +84,13 @@ def images(self, page): # create list of json urls jsons = [f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{j}?key={k}' for j,k in zip(json_ids, keys)] for j in jsons: - with urllib.request.urlopen(j) as url: - data = json.loads(url.read().decode()) - - # Parse source video urls and select highest quality source - sources = data['videos']['list'] - sizes = [s['size'] for s in sources] - i = sizes.index(max(sizes)) - videos.append((sources[i]['source'], None)) + data = self.request(j).json() + + # Parse source video urls and select highest quality source + sources = data['videos']['list'] + sizes = [s['size'] for s in sources] + i = sizes.index(max(sizes)) + videos.append((sources[i]['source'], None)) images = [ (url.replace("://post", "://blog", 1).partition("?")[0], None) From 24d5ef6e15c174cce99ea9f85c308e7dc1033aa2 Mon Sep 17 00:00:00 2001 From: noxibow2 Date: Fri, 20 Oct 2023 07:54:42 +0900 Subject: [PATCH 4/6] fix line lengths --- gallery_dl/extractor/naver.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py index b63aab8b1c..45cd610fdd 100644 --- a/gallery_dl/extractor/naver.py +++ b/gallery_dl/extractor/naver.py @@ -62,7 +62,8 @@ def metadata(self, page): # fixes directory error for posts created less than 24 hours ago if "전" in str(data["post"]["date"]): - data["post"]["date"] = text.parse_datetime(date.today().isoformat(), format="%Y-%m-%d") + td = date.today().isoformat() + data["post"]["date"] = text.parse_datetime(td, format="%Y-%m-%d") return data @@ -79,10 +80,12 @@ def images(self, page): json_ids = text.extr(page, "likeItVideoIdListJson = '", "'") # convert to list - json_ids = json_ids.strip('[]').replace('"', '').replace(' ', '').split(',') + json_ids = json_ids.strip('[]').replace('"', '') \ + .replace(' ', '').split(',') # create list of json urls - jsons = [f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{j}?key={k}' for j,k in zip(json_ids, keys)] + json_base = f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/' + jsons = [f'{json_base}{j}?key={k}' for j,k in zip(json_ids, keys)] for j in jsons: data = self.request(j).json() From a47faef9d81337019eb0c3fef2189a0376935648 Mon Sep 17 00:00:00 2001 From: noxibow2 Date: Fri, 20 Oct 2023 23:54:16 +0900 Subject: [PATCH 5/6] fix video url scraping method --- gallery_dl/extractor/naver.py | 36 ++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py index 45cd610fdd..a2706f79c1 100644 --- a/gallery_dl/extractor/naver.py +++ b/gallery_dl/extractor/naver.py @@ -11,6 +11,7 @@ from .common import GalleryExtractor, Extractor, Message from .. import text from datetime import date +import json class NaverBase(): """Base class for naver extractors""" @@ -77,23 +78,24 @@ def images(self, page): if keys: # grab json ids - json_ids = text.extr(page, "likeItVideoIdListJson = '", "'") - - # convert to list - json_ids = json_ids.strip('[]').replace('"', '') \ - .replace(' ', '').split(',') - - # create list of json urls - json_base = f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/' - jsons = [f'{json_base}{j}?key={k}' for j,k in zip(json_ids, keys)] - for j in jsons: - data = self.request(j).json() - - # Parse source video urls and select highest quality source - sources = data['videos']['list'] - sizes = [s['size'] for s in sources] - i = sizes.index(max(sizes)) - videos.append((sources[i]['source'], None)) + json_id_str = text.extr(page, "likeItVideoContentsIdMapJson = '", "'") + + if json_id_str: + json_dict = json.loads(json_id_str) + json_ids = json_dict.keys() + + # create list of json urls + json_base = f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/' + jsons = [f'{json_base}{j}?key={k}' for j,k in zip(json_ids, keys)] + for j in jsons: + + data = self.request(j).json() + + # Parse source video urls and select highest quality source + sources = data['videos']['list'] + sizes = [s['size'] for s in sources] + i = sizes.index(max(sizes)) + videos.append((sources[i]['source'], None)) images = [ (url.replace("://post", "://blog", 1).partition("?")[0], None) From 076805dbcec32f26b6c64c03c721136fd1983697 Mon Sep 17 00:00:00 2001 From: noxibow2 Date: Fri, 20 Oct 2023 23:59:40 +0900 Subject: [PATCH 6/6] fix line lengths --- gallery_dl/extractor/naver.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py index a2706f79c1..b9fad12c35 100644 --- a/gallery_dl/extractor/naver.py +++ b/gallery_dl/extractor/naver.py @@ -78,15 +78,19 @@ def images(self, page): if keys: # grab json ids - json_id_str = text.extr(page, "likeItVideoContentsIdMapJson = '", "'") + json_base = f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/' + json_id_str = text.extr(page, + "likeItVideoContentsIdMapJson = '", "'") + if json_id_str: json_dict = json.loads(json_id_str) json_ids = json_dict.keys() # create list of json urls - json_base = f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/' - jsons = [f'{json_base}{j}?key={k}' for j,k in zip(json_ids, keys)] + jsons = [f'{json_base}{j}?key={k}' + for j,k in zip(json_ids, keys)] + for j in jsons: data = self.request(j).json()