From 70d91723b2d2ef171d84b5cba5ad666a7c32dce6 Mon Sep 17 00:00:00 2001 From: t510599 Date: Fri, 26 Jun 2020 22:20:53 +0800 Subject: [PATCH] first version --- .gitignore | 3 + convert.py | 69 +++++++++++++ static/main.css | 52 ++++++++++ static/upload.js | 212 ++++++++++++++++++++++++++++++++++++++++ templates/index.html.j2 | 50 ++++++++++ web.py | 65 ++++++++++++ web.wsgi | 1 + 7 files changed, 452 insertions(+) create mode 100644 .gitignore create mode 100644 convert.py create mode 100644 static/main.css create mode 100644 static/upload.js create mode 100644 templates/index.html.j2 create mode 100644 web.py create mode 100644 web.wsgi diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1b63875 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +# test epub files +*.epub \ No newline at end of file diff --git a/convert.py b/convert.py new file mode 100644 index 0000000..4184160 --- /dev/null +++ b/convert.py @@ -0,0 +1,69 @@ +import argparse +import zipfile +import opencc +import glob +import time +from io import BytesIO +from pathlib import Path + +# only initailize OpenCC once, or it would be very slow +converter = opencc.OpenCC(config="s2twp.json") + +def convert_epub(epub, output=None): + target_filetype = ["htm", "html", "xhtml", "ncx", "opf"] + + origin = zipfile.ZipFile(epub, mode="r") + copy = zipfile.ZipFile(output, mode="w") + + for i, fn in enumerate(origin.namelist()): + info = origin.getinfo(fn) + extension = Path(fn).suffix[1:] # remove heading `.` + if extension in target_filetype: + # if file extension is targeted file type + sc_content = origin.read(fn) + tc_content = convert_content(sc_content) + if extension == "opf": + tc_content = tc_content.replace("zh-CN", "zh-TW") + copy.writestr(s2t(fn), tc_content, compress_type=info.compress_type) + else: + # write other files directly + copy.writestr(s2t(fn), origin.read(fn), compress_type=info.compress_type) + + origin.close() + copy.close() + return output + +def convert_content(content): + _tmp = [] + + for line in content.splitlines(): + _tmp.append(s2t(line)) + + return "\n".join(_tmp) + +def s2t(text): + return converter.convert(text) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert simplified chinese to traditional chinese in epub.") + parser.add_argument('file', nargs='+', help="epub files") + args = parser.parse_args() + + if len(args.file) == 1 and "*" in args.file[0]: + fn_list = glob.glob(args.file[0]) + else: + fn_list = args.file + + for fn in fn_list: + if not Path(fn).suffix == ".epub": + print(f"Skipping file {fn}, which is not an epub document.") + elif fn == s2t(fn): + print(f"Skipping file {fn}, which has already been converted.") + else: + t = time.time() + print(f"Converting {fn}") + buffer = BytesIO() + output = convert_epub(fn, buffer) + with open(s2t(fn), "wb") as f: + f.write(buffer.getvalue()) + print(f"File {fn} is successfully converted. Time elapsed: {round(time.time() - t, 2)}s") \ No newline at end of file diff --git a/static/main.css b/static/main.css new file mode 100644 index 0000000..9ad647d --- /dev/null +++ b/static/main.css @@ -0,0 +1,52 @@ +body, html { + height: 100%; + width: 100%; + margin: 0; + padding: 0; +} + +body, #main { + display: flex; + flex-direction: column; +} + +.close.button { + position: absolute; + z-index: 3; + right: 2em; + top: 2em; +} + +#dragzone[data-mode="selecting"] .close.button, +#dragzone:not([data-mode="selected"]) #submitbtn, +#dragzone:not([data-mode="converted"]) #downloadbtn, +#dragzone:not([data-mode^="upload"]) .ts.progress { + display: none; +} + +#main { + flex: 1; + flex-grow: 1; + justify-content: center; +} + +#dragzone { + margin-bottom: 1em; +} + +#progressbar .bar { + min-width: 0; +} + +#upload { + visibility: hidden; + height: 0; + width: 0; + position: absolute; + top: 0; + left: 0; +} + +#submit { + z-index: 5; +} \ No newline at end of file diff --git a/static/upload.js b/static/upload.js new file mode 100644 index 0000000..2d0fcff --- /dev/null +++ b/static/upload.js @@ -0,0 +1,212 @@ +const CancelToken = axios.CancelToken; +let cancel; + +const dqs = (selector, ctx = document) => { + return ctx.querySelector(selector); +} + +HTMLElement.prototype.on = function(event, callback) { + this.addEventListener(event, callback); + return this; +} + +function updateFile(files) { + let filename = files[0].name; + let size = files[0].size; + + // check file extension + if (filename.split(".").pop() != "epub") { + ts(".ts.snackbar").snackbar({ + content: "只接受 EPUB 格式的檔案!" + }); + return false; + } + + // check file size + if (size >= sizeLimit) { + ts(".ts.snackbar").snackbar({ + content: "檔案過大!" + }); + return false; + } + + if (!dqs("#upload").files.length) { + dqs("#upload").files = files; + } + + dqs(".header", dqs("#dragzone")).textContent = filename; + dqs(".description", dqs("#dragzone")).textContent = `檔案大小: ${humanFileSize(size, false)}`; + dqs("#dragzone").dataset.mode = "selected"; +} + +function reset(ev) { + if (ev) { + ev.preventDefault(); + ev.stopPropagation(); + } + dqs(".header", dqs("#dragzone")).textContent = "上傳"; + dqs(".description", dqs("#dragzone")).innerHTML = "將檔案拖拉至此處進行上傳,或是點擊此處選取檔案。
Max upload size : " + humanFileSize(sizeLimit, false); + dqs("#dragzone").dataset.mode = "selecting"; + dqs("#upload").value = ""; +} + +// https://stackoverflow.com/a/14919494 +function humanFileSize(bytes, si) { + var thresh = si ? 1000 : 1024; + if(Math.abs(bytes) < thresh) { + return bytes + ' B'; + } + var units = si + ? ['kB','MB','GB','TB','PB','EB','ZB','YB'] + : ['KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB']; + var u = -1; + do { + bytes /= thresh; + ++u; + } while (Math.abs(bytes) >= thresh && u < units.length - 1); + return bytes.toFixed(1)+' '+units[u]; +} + +dqs("#upload").on("change", ev => { + let el = ev.target; + if (el.files.length) { + if (el.files.length > 1) { + ts('.snackbar').snackbar({ + content: "一次僅可上傳一個檔案。" + }); + } else { + updateFile(el.files); + } + } else { + reset(); + } +}); + +dqs(".ts.close.button").on("click", ev => { + if (dqs("#dragzone").dataset.mode == "uploading") { + if (cancel) { + cancel(); + } + } + reset(); +}); + +dqs("#submitbtn").on("click", ev => { + ev.stopPropagation(); + ev.preventDefault(); + + dqs("#dragzone").dataset.mode = "uploading"; + + // clean up styles + ["preparing", "positive", "negative"].forEach(c => { + dqs("#progressbar").classList.toggle(c, false); + }); + + dqs("#progressbar .bar").style.width = "0"; + if (dqs("#downloadbtn").href) { + window.URL.revokeObjectURL(dqs("#downloadbtn").href); + dqs("#downloadbtn").href = ""; + dqs("#downloadbtn").removeAttribute("download"); + } + + axios.post("./api/convert", new FormData(document.form), { + responseType: "blob", + cancelToken: new CancelToken(function (executor) { + cancel = executor; + }), + onUploadProgress: (ev) => { + percentage = (ev.loaded / ev.total) * 100 + dqs("#progressbar .bar").style.width = percentage + "%"; + if (percentage == 100) { + dqs("#progressbar").classList.add("preparing"); + } + } + }).then(function (res) { + dqs("#dragzone").dataset.mode = "converted"; + dqs("#progressbar").classList.remove("preparing"); + + let blob = new Blob([res.data], { type: "application/epub+zip" }); + let disposition = res.headers['content-disposition']; + let filename = disposition.slice(disposition.lastIndexOf("=") + 1, disposition.length); + if (filename.startsWith("UTF-8''")) { + filename = decodeURIComponent(filename.slice(7, filename.length)); + } + dqs("#downloadbtn").href = window.URL.createObjectURL(blob); + dqs("#downloadbtn").setAttribute("download", filename); + }).catch(function (e) { + dqs("#dragzone").dataset.mode = "uploadend"; + dqs("#progressbar").classList.remove("preparing"); + dqs("#progressbar").classList.add("negative"); + if (e.response) { + if (e.response.data instanceof Blob && e.response.data.type == "application/json") { + let reader = new FileReader(); + reader.onload = function () { + let data = JSON.parse(this.result); + ts(".snackbar").snackbar({ + content: `錯誤: ${data.error}` + }); + } + reader.readAsText(e.response.data); + } + } else if (axios.isCancel(e)) { + console.log("Upload progress canceled"); + dqs("#progressbar").classList.remove("negative"); + ts(".snackbar").snackbar({ + content: "上傳已取消" + }); + } else { + console.error(e); + } + }); +}); + +dqs("#dragzone").on("click", ev => { + ev.preventDefault(); + + if (dqs("#dragzone").dataset.mode != "uploading") { + let allowlist = ["button", "a"]; + if (allowlist.indexOf(ev.target.tagName.toLowerCase()) == -1) { + dqs("#upload").click(); + } + } +}); + +dqs("#downloadbtn").on("click", ev => { + ev.preventDefault(); + let el = ev.target; + + let link = document.createElement("a"); + link.setAttribute("download", el.getAttribute("download")); + link.style.display = "none"; + link.href = el.href; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + + reset(); +}); + +dqs("#dragzone").on("drop", ev => { + ev.stopPropagation(); + ev.preventDefault(); + + if (dqs("#dragzone").dataset.mode != "uploading") { + let files = ev.dataTransfer.files; + if (files) { + if (files.length > 1) { + ts('.snackbar').snackbar({ + content: "一次僅可上傳一個檔案。" + }); + } else if (files.length == 1) { + updateFile(files); + } + } + } +}); + +["dragenter", "dragover"].forEach(event => { + dqs("#dragzone").on(event, ev => { + ev.stopPropagation(); + ev.preventDefault(); + }); +}); \ No newline at end of file diff --git a/templates/index.html.j2 b/templates/index.html.j2 new file mode 100644 index 0000000..204bd7e --- /dev/null +++ b/templates/index.html.j2 @@ -0,0 +1,50 @@ + + + + + + + + + + + + EPUB Convert + +
+ +
+
+ EPUB Convert +
+
+
+ + + 上傳 + 將檔案拖拉至此處進行上傳,或是點擊此處選取檔案。
Max upload size : {{ limit_human_readable }}
+
+ + Download +
+
+
+
+
+
+ + +
+
+
+ + + + + + diff --git a/web.py b/web.py new file mode 100644 index 0000000..823fed5 --- /dev/null +++ b/web.py @@ -0,0 +1,65 @@ +import tempfile +import hashlib +from io import BytesIO +from flask import ( + Flask, jsonify, redirect, request, render_template, send_file, url_for, safe_join +) +from werkzeug.utils import secure_filename +from convert import convert_epub, s2t +from pathlib import Path + +app = Flask(__name__) +app.config['JSON_AS_ASCII'] = False +app.config['MAX_CONTENT_LENGTH'] = 20 * 1024 * 1024 + +def human_file_size(bytes_count): + threshold = 1024 + units = ['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'] + if bytes_count < threshold: + return f"{bytes_count} B" + + ui = -1 + while True: + bytes_count /= threshold + ui += 1 + if bytes_count < threshold or ui == (len(units) - 1): + break + + return f"{round(bytes_count, 1)} {units[ui]}" + +@app.route("/", methods=["GET"]) +def render_index(): + limit = app.config["MAX_CONTENT_LENGTH"] + return render_template("index.html.j2", limit=limit, limit_human_readable=human_file_size(limit)) + +@app.route('/api/convert', methods=["POST"]) +def upload_epub_sync(): + if 'upload' not in request.files: + return jsonify({"status": False, "error": "No file is specified."}), 400 + + epub_file = request.files['upload'] + + if epub_file.filename == '': + return jsonify({"status": False, "error": "No file name."}), 400 + + # https://stackoverflow.com/questions/283707/size-of-an-open-file-object/283719#283719 + epub_file.seek(0, 2) + end_position = epub_file.tell() + if end_position > app.config['MAX_CONTENT_LENGTH']: + return jsonify({"status": False, "error": f"File is too large. Maxium file size is {human_file_size(app.config['MAX_CONTENT_LENGTH'])}"}), 413 + + if epub_file and Path(epub_file.filename).suffix == ".epub": + output_buffer = BytesIO() + try: + _result = convert_epub(epub_file, output_buffer) + print(f"Converted Successfully. File: {s2t(epub_file.filename)}") + output_buffer.seek(0) + return send_file(output_buffer, as_attachment=True, attachment_filename=s2t(epub_file.filename)) + except Exception as e: + error_class = e.__class__.__name__ + return jsonify({"status": False, "error": error_class}), 500 + else: + return jsonify({"status": False, "error": "Not an epub document"}), 415 # Unsupported Media Type + +if __name__ == "__main__": + app.run(host="0.0.0.0", debug=True) \ No newline at end of file diff --git a/web.wsgi b/web.wsgi new file mode 100644 index 0000000..21fd2f1 --- /dev/null +++ b/web.wsgi @@ -0,0 +1 @@ +from web import app as application \ No newline at end of file