Skip to content

Commit

Permalink
Fix issue with name with BF4
Browse files Browse the repository at this point in the history
  • Loading branch information
toiletclogger69 committed Jul 17, 2022
1 parent ae08b09 commit 638b1d3
Show file tree
Hide file tree
Showing 6 changed files with 252 additions and 0 deletions.
63 changes: 63 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# eh_download
dgeh, Download Gallery E-hentai.

GUI gallery downloader for e-hentai and possibly nhentai

Once you downloaded all the files you want, I suggest using this script : https://gist.github.com/toiletclogger69/d08164cd8c978f7a1fc2f12ee9e4f15a to easily read the gallery afterward.

## **DOWNLOAD THE EXE HERE [dgeh.zip](https://github.com/toiletclogger69/eh_download/files/8010577/dgeh.zip)**

## requirements (as an exe)
None, just click on the exe

## requirements (as a python script)
```
pip install gooey
pip install requests
```

---

## Screenshots :
The GUI :

![Screen1](doc/url1.png)

The result of downloading 4 gallery :

![Screen2](doc/url2.png)

In each folder every picture :

![Screen3](doc/url3.png)

---

## Fields
**Text area** : Put a list of url inside the box, eg:
```
https://e-hentai.org/g/lots_of_number1/lots_of_number1/
https://e-hentai.org/g/lots_of_number2/lots_of_number2/
https://e-hentai.org/g/lots_of_number3/lots_of_number3/
```

It only accept url from e-hentai (eg : https://e-hentai.org/g/lots_of_number/lots_of_number/) and nhentai (eg : https://nhentai.net/g/lots_of_number/)

**Replace character** : Remove every | ? ! and other character that are not accepted in window folder name.

## Use a standalone script
Go into [eh.py](eh.py), set `TRUE_IF_CLI` to True.

Add a text file named "urls.txt" in your current folder, put every urls inside then execute the script with "python eh.py".

## Issue with nhentai
I can't access easily that website so I can't test if the script works.

In theory it should, if you get an error saying forbidden access (Cloudflare protection against bot) I can't do anything about it.

## How to build the exe
`pip install pyinstaller`

go into the current folder >

`pyinstaller build.spec`
Binary file added doc/url1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/url2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added doc/url3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
188 changes: 188 additions & 0 deletions eh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#! python3
"""
###############################################################
made by toiletclogger69
https://github.com/toiletclogger69/eh_download
pip install gooey requests bs4
to run :
python eh.py (launch the gooey app)
if you don't want to use gooey and only this script, you can call download_gallery directly, eg :
download_gallery(url_to_the_gallery_you_want, true)
you can set TRUE_IF_CLI = True if you want to use the script directly with a text file "urls.txt" containing all urls
###############################################################
download a list of gallery from e-hentai
you can pass a list of url in the text area and it will download every page,
and put them in their respective gallery folder
###############################################################
3~4s between the download of each page, or you get flagged as a bot
the images downloaded are the sample resolution for eh (1280 * xxxx), if you want better resolution go with a torrent
"""

from sys import stdout, exit
from os import path, makedirs, getcwd
from time import sleep
from random import randint

import requests
from bs4 import BeautifulSoup
from gooey import Gooey, GooeyParser


# python -m venv env && env\Scripts\activate.bat


# change this var to change the folder name
GALLERY_FOLDER = "doujins"

''' add a urls.txt file in the current folder, add every urls inside, separated by a newline eg:
https://e-hentai.org/g/lots_of_number1/lots_of_number1/
https://e-hentai.org/g/lots_of_number2/lots_of_number2/
https://e-hentai.org/g/lots_of_number3/lots_of_number3/
then set this to True and execture the script with python eh.py'''
TRUE_IF_CLI = False
# TRUE_IF_CLI = True


headers = {"User-Agent" : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}


###############################################################


def download_gallery(url='', true_if_replace_name=False) -> int:
html_request = requests.get(url, headers=headers)
html_request.raise_for_status()
html_page = BeautifulSoup(html_request.text, 'html.parser')

# if wrong url, or error 404
if "Key missing, or incorrect key provided." in html_page.text or "error 404" in html_page.text:
print(f"couldn't find {url}")
print("-1%")
stdout.flush()
sleep(6)
return

# find title
title_bs4 = html_page.find('title')
title_gallery = title_bs4.text.split(" - E-Hentai Galleries")[0]

# find number of pages
meta = html_page.find_all(class_ ="gdt2")
number_of_page = [x for x in meta if "pages" in x.text][0].text
number_of_page = int(number_of_page.split(" pages")[0])

# find first page url
first_page = html_page.find_all(class_ ="gdtm")[0]
url_next_page = first_page.find('a')['href']

# create the folder
if true_if_replace_name:
title_gallery = ''.join(char for char in title_gallery if (char.isalnum() or char in r"_-[]()\{\} "))
gallery_path = path.join(getcwd(), GALLERY_FOLDER, title_gallery)
makedirs(gallery_path, exist_ok=True)

# informations
print("\n\n============================\n")
print(f'\t{number_of_page} pages - {url}')
print(f'\t{title_gallery}')
print("\n----------------------------\n")

for page_number in range(number_of_page):
# don't touch the sleep amount or you will get flagged as a bot
sleep(randint(3, 4))

html_request = requests.get(url_next_page, headers=headers)
html_request.raise_for_status()
html_page = BeautifulSoup(html_request.text, 'html.parser')

# find the picture div
element_image = html_page.find("div", {"id": "i3"})

# find the next url page link
element_link = element_image.find('a')
url_next_page = element_link['href']

# find the current picture url
element_source = element_image.find('img')
url_image = element_source['src']

# 0001.jpg, 0049.png, etc.
file_name = path.basename(f"{str(page_number):0>4}.{url_image.split('.')[-1]}")

content_image = requests.get(url_image, headers=headers)
with open(path.join(gallery_path, file_name), 'wb+') as file:
for chunk in content_image.iter_content(100000):
file.write(chunk)
print(f"{file_name}\t\t{int((page_number / (int(number_of_page) - 1)) * 100):0>2}%")

stdout.flush()
return 1


@Gooey(progress_regex=r"(-?\d+)%$"
, disable_progress_bar_animation=True
, program_name='dgeh'
, requires_shell=False
, program_description="Download gallery on eh"
, menu=[{'name': 'About'
, 'items': [{
'type': 'AboutDialog'
, 'menuTitle': 'About'
, 'version': '1.1.0'
, 'copyright': '2021'
, 'website': 'https://github.com/toiletclogger69/eh_download'
, 'developer': 'toiletclogger69'
}]
}]
)
def main():
parser = GooeyParser(prog="gallery eh downloader")
parser.add_argument('urls'
, metavar='liste of url'
, help='urls are separated by a newline, no blank line allowed'
, widget='Textarea'
, gooey_options={'height': 200, })
parser.add_argument('-true_if_window_compatible'
, metavar='Replace character'
, action='store_true'
, required=False
, help='Some character ( / | ? ! etc. ) in gallery title are not valid in windows file name, if set those character will be removed')
args = parser.parse_args()
liste_urls = args.urls.split('\n')

for current_url in liste_urls:
if current_url and current_url.startswith("http"):
download_gallery(current_url, args.true_if_window_compatible)
print("")
print("")
print("-1%")
stdout.flush()
sleep(2)


if __name__ == "__main__":
# https://e-hentai.org/g/2228972/6126f6c5b4/
# フレイラお姉ちゃんが実験台にされてひどい目に合うお話
# https://e-hentai.org/g/2213861/40252db7b3/
# [ChCrumbles] Network I/O [Hi-Res][Still be Continuing ?...]

try:
if TRUE_IF_CLI:
""" Read every url from a text file """
with open("urls.txt", 'r') as file_text:
list_url = file_text.readlines()
for current_url in list_url:
if current_url and current_url.startswith("http"):
download_gallery(current_url, True)
else:
exit(main())
except KeyboardInterrupt:
print("\nEnd program")
1 change: 1 addition & 0 deletions urls.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://e-hentai.org/g/2228972/6126f6c5b4/

0 comments on commit 638b1d3

Please sign in to comment.