From a755323cdcedd958a654607c611288f543dee993 Mon Sep 17 00:00:00 2001 From: Kenneth Lin Date: Thu, 20 Jun 2024 13:56:47 +0800 Subject: [PATCH 1/4] My updated. Rename pdf file. --- archive-org-downloader.py | 13 +++++++++++-- book.txt | 0 download.bat | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 book.txt create mode 100644 download.bat diff --git a/archive-org-downloader.py b/archive-org-downloader.py index 467e2fc..f64cd11 100644 --- a/archive-org-downloader.py +++ b/archive-org-downloader.py @@ -9,6 +9,7 @@ import sys import shutil import json +from urllib.parse import unquote def display_error(response, message): print(message) @@ -214,6 +215,10 @@ def make_pdf(pdf, title, directory): session = loan(session, book_id) title, links, metadata = get_book_infos(session, url) + print(unquote(url)) + + #print("metadata is" + metadata["title-alt-script"]) + directory = os.path.join(d, title) # Handle the case where multiple books with the same name are downloaded i = 1 @@ -247,7 +252,7 @@ def make_pdf(pdf, title, directory): raise Exception("unsupported metadata type") # title if 'title' in metadata: - pdfmeta['title'] = metadata['title'] + pdfmeta['title'] = metadata['title'] # author if 'creator' in metadata and 'associated-names' in metadata: pdfmeta['author'] = metadata['creator'] + "; " + metadata['associated-names'] @@ -263,8 +268,12 @@ def make_pdf(pdf, title, directory): pass # keywords pdfmeta['keywords'] = [f"https://archive.org/details/{book_id}"] - + pdf = img2pdf.convert(images, **pdfmeta) + if 'title-alt-script' in metadata: + title = metadata['title-alt-script'] + title = title.replace("\\"," ").replace("/"," ").replace(":"," ").replace("*"," ").replace("?"," ") + title = title.replace("\""," ").replace("<"," ").replace(">"," ").replace("|"," ") make_pdf(pdf, title, args.dir if args.dir != None else "") try: shutil.rmtree(directory) diff --git a/book.txt b/book.txt new file mode 100644 index 0000000..e69de29 diff --git a/download.bat b/download.bat new file mode 100644 index 0000000..9244900 --- /dev/null +++ b/download.bat @@ -0,0 +1 @@ +python archive-org-downloader.py -e *** -p *** -r 3 -d books --file book.txt \ No newline at end of file From 58640cb718556c542efb5bd22990ea87aa73b25d Mon Sep 17 00:00:00 2001 From: Kenneth Lin Date: Thu, 20 Jun 2024 14:03:13 +0800 Subject: [PATCH 2/4] Update markdown. --- .gitignore | 1 + README.md | 9 +-------- 2 files changed, 2 insertions(+), 8 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7030d93 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/books diff --git a/README.md b/README.md index 7862a99..ea8a397 100644 --- a/README.md +++ b/README.md @@ -69,11 +69,4 @@ python3 archive-org-downloader.py -e myemail@tempmail.com -p Passw0rd -r 0 -u ht If you want to download a lot of books, you can paste the urls of the books in a .txt file (one per line) and use `--file` ```sh python3 archive-org-downloader.py -e myemail@tempmail.com -p Passw0rd --file books_to_download.txt -``` - -## Donation -If you want to support my work, you can send 2 or 3 Bitcoins 🙃 to this address: -``` -bc1q4nq8tjuezssy74d5amnrrq6ljvu7hd3l880m7l -``` -![bitcoin_address](https://user-images.githubusercontent.com/54740007/169100171-1061c7a0-207e-459b-84de-2d6bb93b0f38.png) +``` \ No newline at end of file From 9ce5253f1fde2c947e449d39bdbd9208f1337548 Mon Sep 17 00:00:00 2001 From: Kenneth Lin Date: Mon, 5 Jan 2026 17:19:40 +0800 Subject: [PATCH 3/4] Refactor login function for better error handling --- archive-org-downloader.py | 42 ++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/archive-org-downloader.py b/archive-org-downloader.py index a471079..00181de 100644 --- a/archive-org-downloader.py +++ b/archive-org-downloader.py @@ -9,7 +9,6 @@ import sys import shutil import json -from urllib.parse import unquote import re import base64 import hashlib @@ -45,19 +44,30 @@ def get_book_infos(session, url): def login(email, password): session = requests.Session() - session.get("https://archive.org/account/login") + response = session.get("https://archive.org/services/account/login/") + login_data = response.json() + if not login_data['success']: + display_error(response, "[-] Error while getting login token:") - data = {"username":email, "password":password} + login_token = login_data["value"]["token"] - response = session.post("https://archive.org/account/login", data=data) - if "bad_login" in response.text: - print("[-] Invalid credentials!") - exit() - elif "Successful login" in response.text: + headers = {"Content-Type": "application/x-www-form-urlencoded"} + data = {"username":email, "password":password, "t": login_token} + + response = session.post("https://archive.org/services/account/login/", headers=headers, data=json.dumps(data)) + try: + response_json = response.json() + except: + display_error(response, "[-] Error while login:") + + if response_json["success"] == False: + if response_json["value"] == "bad_login": + print("[-] Invalid credentials!") + exit() + display_error(response, "[-] Error while login:") + else: print("[+] Successful login") return session - else: - display_error(response, "[-] Error while login:") def loan(session, book_id, verbose=True): data = { @@ -266,10 +276,6 @@ def make_pdf(pdf, title, directory): session = loan(session, book_id) title, links, metadata = get_book_infos(session, url) - print(unquote(url)) - - #print("metadata is" + metadata["title-alt-script"]) - directory = os.path.join(d, title) # Handle the case where multiple books with the same name are downloaded i = 1 @@ -303,7 +309,7 @@ def make_pdf(pdf, title, directory): raise Exception("unsupported metadata type") # title if 'title' in metadata: - pdfmeta['title'] = metadata['title'] + pdfmeta['title'] = metadata['title'] # author if 'creator' in metadata and 'associated-names' in metadata: pdfmeta['author'] = metadata['creator'] + "; " + metadata['associated-names'] @@ -319,12 +325,8 @@ def make_pdf(pdf, title, directory): pass # keywords pdfmeta['keywords'] = [f"https://archive.org/details/{book_id}"] - + pdf = img2pdf.convert(images, **pdfmeta) - if 'title-alt-script' in metadata: - title = metadata['title-alt-script'] - title = title.replace("\\"," ").replace("/"," ").replace(":"," ").replace("*"," ").replace("?"," ") - title = title.replace("\""," ").replace("<"," ").replace(">"," ").replace("|"," ") make_pdf(pdf, title, args.dir if args.dir != None else "") try: shutil.rmtree(directory) From 22026b953037a915d8d651296f79f5f60af8af4b Mon Sep 17 00:00:00 2001 From: Kenneth Lin Date: Tue, 6 Jan 2026 16:21:21 +0800 Subject: [PATCH 4/4] Refactor archive-org-downloader.py structure --- archive-org-downloader.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/archive-org-downloader.py b/archive-org-downloader.py index 00181de..7d3cc6f 100644 --- a/archive-org-downloader.py +++ b/archive-org-downloader.py @@ -327,7 +327,11 @@ def make_pdf(pdf, title, directory): pdfmeta['keywords'] = [f"https://archive.org/details/{book_id}"] pdf = img2pdf.convert(images, **pdfmeta) - make_pdf(pdf, title, args.dir if args.dir != None else "") + if 'title-alt-script' in metadata: + title = metadata['title-alt-script'] + title = title.replace("\\"," ").replace("/"," ").replace(":"," ").replace("*"," ").replace("?"," ") + title = title.replace("\""," ").replace("<"," ").replace(">"," ").replace("|"," ") + make_pdf(pdf, title, args.dir if args.dir != None else "") try: shutil.rmtree(directory) except OSError as e: