Spaces:
Sleeping
Sleeping
progect set-up update
Browse files- .gitignore +2 -0
- Makefile +3 -0
- header.html +10 -21
- requirements.txt +4 -2
- utils.py +1 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
output/
|
Makefile
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
apt-get update && apt-get upgrade
|
| 2 |
+
|
| 3 |
+
/home/user/app/download_models_hf.py
|
header.html
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
<html>
|
| 2 |
<head>
|
| 3 |
-
<!-- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/css/bulma.min.css"> -->
|
| 4 |
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
|
| 5 |
<style>
|
| 6 |
.link-block {
|
|
@@ -21,10 +20,6 @@
|
|
| 21 |
padding: 0 16px;
|
| 22 |
cursor: pointer !important;
|
| 23 |
}
|
| 24 |
-
.external-link,
|
| 25 |
-
.external-link:hover {
|
| 26 |
-
cursor: pointer !important;
|
| 27 |
-
}
|
| 28 |
a {
|
| 29 |
text-decoration: none;
|
| 30 |
}
|
|
@@ -43,12 +38,7 @@
|
|
| 43 |
gap: 24px;
|
| 44 |
border-radius: 8px;
|
| 45 |
">
|
| 46 |
-
<div style="
|
| 47 |
-
display: flex;
|
| 48 |
-
flex-direction: column;
|
| 49 |
-
align-items: center;
|
| 50 |
-
gap: 16px;
|
| 51 |
-
">
|
| 52 |
<div style="display: flex; flex-direction: column; gap: 8px">
|
| 53 |
<h1 style="
|
| 54 |
font-size: 48px;
|
|
@@ -57,7 +47,7 @@
|
|
| 57 |
font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
|
| 58 |
'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
|
| 59 |
">
|
| 60 |
-
MinerU: PDF Extraction &
|
| 61 |
</h1>
|
| 62 |
</div>
|
| 63 |
</div>
|
|
@@ -69,8 +59,9 @@
|
|
| 69 |
color: #fafafa;
|
| 70 |
opacity: 0.8;
|
| 71 |
">
|
| 72 |
-
|
| 73 |
-
|
|
|
|
| 74 |
</p>
|
| 75 |
<style>
|
| 76 |
.link-block {
|
|
@@ -85,7 +76,7 @@
|
|
| 85 |
<div class="publication-links">
|
| 86 |
<!-- Code Link. -->
|
| 87 |
<span class="link-block">
|
| 88 |
-
<a href="https://github.com/opendatalab/MinerU" class="external-link button is-normal is-rounded is-dark"
|
| 89 |
<span class="icon" style="margin-right: 4px">
|
| 90 |
<i class="fab fa-github" style="color: white; margin-right: 4px"></i>
|
| 91 |
</span>
|
|
@@ -95,7 +86,7 @@
|
|
| 95 |
|
| 96 |
<!-- arXiv Link. -->
|
| 97 |
<span class="link-block">
|
| 98 |
-
<a href="https://arxiv.org/abs/2409.18839" class="external-link button is-normal is-rounded is-dark"
|
| 99 |
<span class="icon" style="margin-right: 8px">
|
| 100 |
<i class="fas fa-file" style="color: white"></i>
|
| 101 |
</span>
|
|
@@ -105,7 +96,7 @@
|
|
| 105 |
|
| 106 |
<!-- Homepage Link. -->
|
| 107 |
<span class="link-block">
|
| 108 |
-
<a href="https://mineru.org.cn/home?source=huggingface" class="external-link button is-normal is-rounded is-dark"
|
| 109 |
<span class="icon" style="margin-right: 8px">
|
| 110 |
<i class="fas fa-home" style="color: white"></i>
|
| 111 |
</span>
|
|
@@ -115,7 +106,7 @@
|
|
| 115 |
|
| 116 |
<!-- Client Link. -->
|
| 117 |
<span class="link-block">
|
| 118 |
-
<a href="https://mineru.org.cn/client?source=huggingface" class="external-link button is-normal is-rounded is-dark"
|
| 119 |
<span class="icon" style="margin-right: 8px">
|
| 120 |
<i class="fas fa-download" style="color: white"></i>
|
| 121 |
</span>
|
|
@@ -125,7 +116,7 @@
|
|
| 125 |
|
| 126 |
<!-- Voice Reading Demo Link. -->
|
| 127 |
<span class="link-block">
|
| 128 |
-
<a href="https://mineru.org.cn/voice?source=huggingface" class="external-link button is-normal is-rounded is-dark"
|
| 129 |
<span class="icon" style="margin-right: 8px">
|
| 130 |
<i class="fas fa-volume-up" style="color: white"></i>
|
| 131 |
</span>
|
|
@@ -134,8 +125,6 @@
|
|
| 134 |
</span>
|
| 135 |
</div>
|
| 136 |
</div>
|
| 137 |
-
|
| 138 |
-
<!-- New Demo Links -->
|
| 139 |
</div>
|
| 140 |
</body>
|
| 141 |
</html>
|
|
|
|
| 1 |
<html>
|
| 2 |
<head>
|
|
|
|
| 3 |
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
|
| 4 |
<style>
|
| 5 |
.link-block {
|
|
|
|
| 20 |
padding: 0 16px;
|
| 21 |
cursor: pointer !important;
|
| 22 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
a {
|
| 24 |
text-decoration: none;
|
| 25 |
}
|
|
|
|
| 38 |
gap: 24px;
|
| 39 |
border-radius: 8px;
|
| 40 |
">
|
| 41 |
+
<div style="display: flex; flex-direction: column; align-items: center; gap: 16px">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
<div style="display: flex; flex-direction: column; gap: 8px">
|
| 43 |
<h1 style="
|
| 44 |
font-size: 48px;
|
|
|
|
| 47 |
font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
|
| 48 |
'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
|
| 49 |
">
|
| 50 |
+
MinerU: AI-Powered PDF Extraction & Voice Reading
|
| 51 |
</h1>
|
| 52 |
</div>
|
| 53 |
</div>
|
|
|
|
| 59 |
color: #fafafa;
|
| 60 |
opacity: 0.8;
|
| 61 |
">
|
| 62 |
+
Built for automation and efficiency, MinerU is an open-source AI solution<br>
|
| 63 |
+
for extracting insights from PDFs, webpages, and e-books—now with voice-powered<br>
|
| 64 |
+
reading capabilities for hands-free access to your documents.
|
| 65 |
</p>
|
| 66 |
<style>
|
| 67 |
.link-block {
|
|
|
|
| 76 |
<div class="publication-links">
|
| 77 |
<!-- Code Link. -->
|
| 78 |
<span class="link-block">
|
| 79 |
+
<a href="https://github.com/opendatalab/MinerU" class="external-link button is-normal is-rounded is-dark">
|
| 80 |
<span class="icon" style="margin-right: 4px">
|
| 81 |
<i class="fab fa-github" style="color: white; margin-right: 4px"></i>
|
| 82 |
</span>
|
|
|
|
| 86 |
|
| 87 |
<!-- arXiv Link. -->
|
| 88 |
<span class="link-block">
|
| 89 |
+
<a href="https://arxiv.org/abs/2409.18839" class="external-link button is-normal is-rounded is-dark">
|
| 90 |
<span class="icon" style="margin-right: 8px">
|
| 91 |
<i class="fas fa-file" style="color: white"></i>
|
| 92 |
</span>
|
|
|
|
| 96 |
|
| 97 |
<!-- Homepage Link. -->
|
| 98 |
<span class="link-block">
|
| 99 |
+
<a href="https://mineru.org.cn/home?source=huggingface" class="external-link button is-normal is-rounded is-dark">
|
| 100 |
<span class="icon" style="margin-right: 8px">
|
| 101 |
<i class="fas fa-home" style="color: white"></i>
|
| 102 |
</span>
|
|
|
|
| 106 |
|
| 107 |
<!-- Client Link. -->
|
| 108 |
<span class="link-block">
|
| 109 |
+
<a href="https://mineru.org.cn/client?source=huggingface" class="external-link button is-normal is-rounded is-dark">
|
| 110 |
<span class="icon" style="margin-right: 8px">
|
| 111 |
<i class="fas fa-download" style="color: white"></i>
|
| 112 |
</span>
|
|
|
|
| 116 |
|
| 117 |
<!-- Voice Reading Demo Link. -->
|
| 118 |
<span class="link-block">
|
| 119 |
+
<a href="https://mineru.org.cn/voice?source=huggingface" class="external-link button is-normal is-rounded is-dark">
|
| 120 |
<span class="icon" style="margin-right: 8px">
|
| 121 |
<i class="fas fa-volume-up" style="color: white"></i>
|
| 122 |
</span>
|
|
|
|
| 125 |
</span>
|
| 126 |
</div>
|
| 127 |
</div>
|
|
|
|
|
|
|
| 128 |
</div>
|
| 129 |
</body>
|
| 130 |
</html>
|
requirements.txt
CHANGED
|
@@ -14,7 +14,7 @@ matplotlib
|
|
| 14 |
ultralytics>=8.3.48
|
| 15 |
paddleocr==2.7.3
|
| 16 |
paddlepaddle-gpu @ https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0b1-cp310-cp310-linux_x86_64.whl
|
| 17 |
-
struct-eqtable
|
| 18 |
detectron2 @ https://wheels-1251341229.cos.ap-shanghai.myqcloud.com/assets/whl/detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl
|
| 19 |
magic-pdf>=1.0.1
|
| 20 |
torch>=2.2.2,<=2.3.1
|
|
@@ -23,4 +23,6 @@ rapid-table>=1.0.3,<2.0.0
|
|
| 23 |
rapidocr-paddle
|
| 24 |
rapidocr-onnxruntime
|
| 25 |
gradio-pdf>=0.0.21
|
| 26 |
-
openai
|
|
|
|
|
|
|
|
|
| 14 |
ultralytics>=8.3.48
|
| 15 |
paddleocr==2.7.3
|
| 16 |
paddlepaddle-gpu @ https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0b1-cp310-cp310-linux_x86_64.whl
|
| 17 |
+
struct-eqtable>=0.3.2
|
| 18 |
detectron2 @ https://wheels-1251341229.cos.ap-shanghai.myqcloud.com/assets/whl/detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl
|
| 19 |
magic-pdf>=1.0.1
|
| 20 |
torch>=2.2.2,<=2.3.1
|
|
|
|
| 23 |
rapidocr-paddle
|
| 24 |
rapidocr-onnxruntime
|
| 25 |
gradio-pdf>=0.0.21
|
| 26 |
+
openai>=1.64.0
|
| 27 |
+
playsound
|
| 28 |
+
gTTS
|
utils.py
CHANGED
|
@@ -5,6 +5,7 @@ import base64
|
|
| 5 |
import re
|
| 6 |
import logging
|
| 7 |
|
|
|
|
| 8 |
def compress_directory_to_zip(directory_path, output_zip_path):
|
| 9 |
"""
|
| 10 |
Compresses the specified directory into a ZIP file.
|
|
|
|
| 5 |
import re
|
| 6 |
import logging
|
| 7 |
|
| 8 |
+
|
| 9 |
def compress_directory_to_zip(directory_path, output_zip_path):
|
| 10 |
"""
|
| 11 |
Compresses the specified directory into a ZIP file.
|