Fix issue with name with BF4

toiletclogger69 · Jul 17, 2022 · 638b1d3 · 638b1d3
1 parent ae08b09
commit 638b1d3
Show file tree

Hide file tree

Showing 6 changed files with 252 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,63 @@
+# eh_download
+dgeh, Download Gallery E-hentai.
+
+GUI gallery downloader for e-hentai and possibly nhentai
+
+Once you downloaded all the files you want, I suggest using this script : https://gist.github.com/toiletclogger69/d08164cd8c978f7a1fc2f12ee9e4f15a to easily read the gallery afterward.
+
+## **DOWNLOAD THE EXE HERE [dgeh.zip](https://github.com/toiletclogger69/eh_download/files/8010577/dgeh.zip)**
+
+## requirements (as an exe)
+None, just click on the exe
+
+## requirements (as a python script)
+```
+pip install gooey
+pip install requests
+```
+
+---
+
+## Screenshots :
+The GUI : 
+
+![Screen1](doc/url1.png)
+
+The result of downloading 4 gallery :
+
+![Screen2](doc/url2.png)
+
+In each folder every picture :
+
+![Screen3](doc/url3.png)
+
+---
+
+## Fields
+**Text area** : Put a list of url inside the box, eg:
+```
+https://e-hentai.org/g/lots_of_number1/lots_of_number1/
+https://e-hentai.org/g/lots_of_number2/lots_of_number2/
+https://e-hentai.org/g/lots_of_number3/lots_of_number3/
+```
+
+It only accept url from e-hentai (eg : https://e-hentai.org/g/lots_of_number/lots_of_number/) and nhentai (eg : https://nhentai.net/g/lots_of_number/)
+
+**Replace character** : Remove every | ? ! and other character that are not accepted in window folder name.
+
+## Use a standalone script
+Go into [eh.py](eh.py), set `TRUE_IF_CLI` to True.
+
+Add a text file named "urls.txt" in your current folder, put every urls inside then execute the script with "python eh.py".
+
+## Issue with nhentai
+I can't access easily that website so I can't test if the script works. 
+
+In theory it should, if you get an error saying forbidden access (Cloudflare protection against bot) I can't do anything about it.
+
+## How to build the exe
+`pip install pyinstaller`
+
+go into the current folder >
+
+`pyinstaller build.spec`
diff --git a/doc/url1.png b/doc/url1.png
diff --git a/doc/url2.png b/doc/url2.png
diff --git a/doc/url3.png b/doc/url3.png
diff --git a/eh.py b/eh.py
@@ -0,0 +1,188 @@
+#! python3
+"""
+###############################################################
+made by toiletclogger69
+https://github.com/toiletclogger69/eh_download
+
+pip install gooey requests bs4
+
+to run :
+python eh.py (launch the gooey app)
+
+if you don't want to use gooey and only this script, you can call download_gallery directly, eg :
+download_gallery(url_to_the_gallery_you_want, true)
+
+you can set TRUE_IF_CLI = True if you want to use the script directly with a text file "urls.txt" containing all urls
+
+###############################################################
+
+download a list of gallery from e-hentai
+you can pass a list of url in the text area and it will download every page,
+and put them in their respective gallery folder
+
+###############################################################
+3~4s between the download of each page, or you get flagged as a bot
+
+the images downloaded are the sample resolution for eh (1280 * xxxx), if you want better resolution go with a torrent
+"""
+
+from sys import stdout, exit
+from os import path, makedirs, getcwd
+from time import sleep
+from random import randint
+
+import requests
+from bs4 import BeautifulSoup
+from gooey import Gooey, GooeyParser
+
+
+# python -m venv env && env\Scripts\activate.bat
+
+
+# change this var to change the folder name
+GALLERY_FOLDER = "doujins"
+
+''' add a urls.txt file in the current folder, add every urls inside, separated by a newline eg:
+https://e-hentai.org/g/lots_of_number1/lots_of_number1/
+https://e-hentai.org/g/lots_of_number2/lots_of_number2/
+https://e-hentai.org/g/lots_of_number3/lots_of_number3/
+then set this to True and execture the script with python eh.py'''
+TRUE_IF_CLI = False
+# TRUE_IF_CLI = True
+
+
+headers = {"User-Agent" : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
+
+
+###############################################################
+
+
+def download_gallery(url='', true_if_replace_name=False) -> int:
+    html_request = requests.get(url, headers=headers)
+    html_request.raise_for_status()
+    html_page = BeautifulSoup(html_request.text, 'html.parser')
+
+    # if wrong url, or error 404
+    if "Key missing, or incorrect key provided." in html_page.text or "error 404" in html_page.text:
+        print(f"couldn't find {url}")
+        print("-1%")
+        stdout.flush()
+        sleep(6)
+        return
+
+    # find title
+    title_bs4 = html_page.find('title')
+    title_gallery = title_bs4.text.split(" - E-Hentai Galleries")[0]
+
+    # find number of pages
+    meta = html_page.find_all(class_ ="gdt2")
+    number_of_page = [x for x in meta if "pages" in x.text][0].text
+    number_of_page = int(number_of_page.split(" pages")[0])
+
+    # find first page url
+    first_page = html_page.find_all(class_ ="gdtm")[0]
+    url_next_page = first_page.find('a')['href']
+
+    # create the folder
+    if true_if_replace_name:
+        title_gallery = ''.join(char for char in title_gallery if (char.isalnum() or char in r"_-[]()\{\} "))
+    gallery_path = path.join(getcwd(), GALLERY_FOLDER, title_gallery)
+    makedirs(gallery_path, exist_ok=True)
+
+    # informations
+    print("\n\n============================\n")
+    print(f'\t{number_of_page} pages - {url}')
+    print(f'\t{title_gallery}')
+    print("\n----------------------------\n")
+
+    for page_number in range(number_of_page):
+        # don't touch the sleep amount or you will get flagged as a bot
+        sleep(randint(3, 4))
+
+        html_request = requests.get(url_next_page, headers=headers)
+        html_request.raise_for_status()
+        html_page = BeautifulSoup(html_request.text, 'html.parser')
+
+        # find the picture div
+        element_image = html_page.find("div", {"id": "i3"})
+
+        # find the next url page link
+        element_link = element_image.find('a')
+        url_next_page = element_link['href']
+
+        # find the current picture url
+        element_source = element_image.find('img')
+        url_image = element_source['src']
+
+        # 0001.jpg, 0049.png, etc.
+        file_name = path.basename(f"{str(page_number):0>4}.{url_image.split('.')[-1]}")
+
+        content_image = requests.get(url_image, headers=headers)
+        with open(path.join(gallery_path, file_name), 'wb+') as file:
+            for chunk in content_image.iter_content(100000):
+                file.write(chunk)
+        print(f"{file_name}\t\t{int((page_number / (int(number_of_page) - 1)) * 100):0>2}%")
+
+        stdout.flush()
+    return 1
+
+
+@Gooey(progress_regex=r"(-?\d+)%$"
+    , disable_progress_bar_animation=True
+    , program_name='dgeh'
+    , requires_shell=False
+    , program_description="Download gallery on eh"
+    , menu=[{'name': 'About'
+        , 'items': [{
+            'type': 'AboutDialog'
+            , 'menuTitle': 'About'
+            , 'version': '1.1.0'
+            , 'copyright': '2021'
+            , 'website': 'https://github.com/toiletclogger69/eh_download'
+            , 'developer': 'toiletclogger69'
+        }]
+    }]
+)
+def main():
+    parser = GooeyParser(prog="gallery eh downloader")
+    parser.add_argument('urls'
+            , metavar='liste of url'
+            , help='urls are separated by a newline, no blank line allowed'
+            , widget='Textarea'
+            , gooey_options={'height': 200, })
+    parser.add_argument('-true_if_window_compatible'
+        , metavar='Replace character'
+        , action='store_true'
+        , required=False
+        , help='Some character ( / | ? ! etc. ) in gallery title are not valid in windows file name, if set those character will be removed')
+    args = parser.parse_args()
+    liste_urls = args.urls.split('\n')
+
+    for current_url in liste_urls:
+        if current_url and current_url.startswith("http"):
+            download_gallery(current_url, args.true_if_window_compatible)
+            print("")
+            print("")
+            print("-1%")
+            stdout.flush()
+            sleep(2)
+
+
+if __name__ == "__main__":
+    # https://e-hentai.org/g/2228972/6126f6c5b4/
+    # フレイラお姉ちゃんが実験台にされてひどい目に合うお話
+    # https://e-hentai.org/g/2213861/40252db7b3/
+    # [ChCrumbles] Network I／O [Hi-Res][Still be Continuing ?...]
+
+    try:
+        if TRUE_IF_CLI:
+            """ Read every url from a text file """
+            with open("urls.txt", 'r') as file_text:
+                list_url = file_text.readlines()
+            for current_url in list_url:
+                if current_url and current_url.startswith("http"):
+                    download_gallery(current_url, True)
+        else:
+            exit(main())
+    except KeyboardInterrupt:
+        print("\nEnd program")
diff --git a/urls.txt b/urls.txt
@@ -0,0 +1 @@
+https://e-hentai.org/g/2228972/6126f6c5b4/