如何解决使用pdfjs将PDF转换为HTMLXML sturcture
我发现此代码以HTML(而非图像)显示PDF。 只有当我可以获取所有页面时,我才不知道当时的工作方式,无法将pdf的最后一页转换为html之类的xml。集中精力只是获得可以检索数据的数据结构。
我无法在日志中获取最后一页的结果。因此,如果有1页PDF,请不要转换。我喜欢将数据从PDF转换为类似HTML(XML)的结构。
var __PDF_DOC,__CURRENT_PAGE,__TOTAL_PAGES,__PAGE_RENDERING_IN_PROGRESS = 0,__CANVAS = $('#pdf-canvas').get(0),_x ="",__o=0,__CANVAS_CTX = __CANVAS.getContext('2d');
function showPDF(pdf_url) {
$("#pdf-loader").show();
PDFJS.getDocument({ url: pdf_url }).then(function(pdf_doc) {
__PDF_DOC = pdf_doc;
__TOTAL_PAGES = __PDF_DOC.numPages;
// Hide the pdf loader and show pdf container in HTML
$("#pdf-loader").hide();
$("#pdf-contents").show();
$("#pdf-total-pages").text(__TOTAL_PAGES);
// Show the first page
showPage(1);
}).catch(function(error) {
// If error re-show the upload button
$("#pdf-loader").hide();
$("#upload-button").show();
alert(error.message);
});;
}
function showPage(page_no) {
__PAGE_RENDERING_IN_PROGRESS = 1;
__CURRENT_PAGE = page_no;
// disable Prev & Next buttons while page is being loaded
$("#pdf-next,#pdf-prev").attr('disabled','disabled');
// While page is being rendered hide the canvas and show a loading message
$("#pdf-canvas").hide();
$("#page-loader").show();
// Update current page in HTML
$("#pdf-current-page").text(page_no);
// Fetch the page
__PDF_DOC.getPage(page_no).then(function(page) {
// As the canvas is of a fixed width we need to set the scale of the viewport accordingly
var scale_required = __CANVAS.width / page.getViewport(1).width;
// Get viewport of the page at required scale
var viewport = page.getViewport(scale_required);
// Set canvas height
__CANVAS.height = viewport.height;
var renderContext = {
canvasContext: __CANVAS_CTX,viewport: viewport
};
// Render the page contents in the canvas
page.render(renderContext).then(function() {
__PAGE_RENDERING_IN_PROGRESS = 0;
// Re-enable Prev & Next buttons
$("#pdf-next,#pdf-prev").removeAttr('disabled');
// Show the canvas and hide the page loader
$("#pdf-canvas").show();
$("#page-loader").hide();
// Return the text contents of the page after the pdf has been rendered in the canvas
return page.getTextContent();
}).then(function(textContent) {
// Get canvas offset
var canvas_offset = $("#pdf-canvas").offset();
// Clear HTML for text layer
sor();
$("#text-layer").html('');
// Assign the CSS created to the text-layer element
$("#text-layer").css({ left: canvas_offset.left + 'px',top: canvas_offset.top + 'px',height: __CANVAS.height + 'px',width: __CANVAS.width + 'px' });
// Pass the data to the method for rendering of text over the pdf canvas.
PDFJS.renderTextLayer({
textContent: textContent,container: $("#text-layer").get(0),viewport: viewport,textDivs: []
});
});
});
}
// Upon click this should should trigger click on the #file-to-upload file input element
// This is better than showing the not-good-looking file input element
$("#upload-button").on('click',function() {
$("#pdf-main-container").show();
var _x ="";
$("#file-to-upload").trigger('click');
});
// When user chooses a PDF file
$("#file-to-upload").on('change',function() {
// Validate whether PDF
if(['application/pdf'].indexOf($("#file-to-upload").get(0).files[0].type) == -1) {
alert('Error : Not a PDF');
return;
}
$("#upload-button").hide();
//_x =" ";
//__CURRENT_PAGE = undefined;
// Send the object url of the pdf
showPDF(URL.createObjectURL($("#file-to-upload").get(0).files[0]));
});
// PrevIoUs page of the PDF
$("#pdf-prev").on('click',function() {
if(__CURRENT_PAGE != 1)
showPage(--__CURRENT_PAGE);
});
// Next page of the PDF
$("#pdf-next").on('click',function() {
if(__CURRENT_PAGE != __TOTAL_PAGES)
showPage(++__CURRENT_PAGE);
});
function sor() {
++__o;
if(__o==__CURRENT_PAGE){
_x = _x + $("#text-layer").html();
}
if((__CURRENT_PAGE != __TOTAL_PAGES) && __o == __CURRENT_PAGE){
showPage(++__CURRENT_PAGE);
}
else {
_x = _x + $("#text-layer").html();
var _y= _x.replace(/(style="([^>]+)")/gi,"");
$("#text-layer,#pdf-canvas").html(" ");
$("#upload-button").show();
$("#pdf-main-container").show();
console.log(_y);
}
}
#upload-button {
width: 150px;
display: block;
margin: 20px auto;
}
#file-to-upload {
display: none;
}
#pdf-main-container {
width: 400px;
margin: 20px auto;
}
#pdf-loader {
display: none;
text-align: center;
color: #999999;
font-size: 13px;
line-height: 100px;
height: 100px;
}
#pdf-contents {
display: none;
}
#pdf-Meta {
overflow: hidden;
margin: 0 0 20px 0;
z-index: 2;
position: relative;
}
#pdf-buttons {
float: left;
}
#page-count-container {
float: right;
}
#pdf-current-page {
display: inline;
}
#pdf-total-pages {
display: inline;
}
#pdf-canvas {
border: 1px solid rgba(0,0.2);
Box-sizing: border-Box;
}
#page-loader {
height: 100px;
line-height: 100px;
text-align: center;
display: none;
color: #999999;
font-size: 13px;
}
#text-layer {
position: absolute;
left: 0;
top: 0;
right: 0;
bottom: 0;
overflow: hidden;
opacity: 0.2;
line-height: 1.0;
}
#text-layer > div {
color: transparent;
position: absolute;
white-space: pre;
cursor: text;
transform-origin: 0% 0%;
}
<!DOCTYPE html>
<html>
<head>
<Meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<Meta name="viewport" content="width=device-width,initial-scale = 1.0,maximum-scale = 1.0,user-scalable=no">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script>
<script src="https://intaxing.in/js/pdf.js"></script>
<script src="https://intaxing.in/js/pdf.worker.js"></script>
</head>
<body>
<button id="upload-button">Select PDF</button>
<input type="file" id="file-to-upload" accept="application/pdf" />
<div id="pdf-main-container">
<div id="pdf-loader">Loading document ...</div>
<div id="pdf-contents">
<div id="pdf-Meta">
<div id="pdf-buttons">
<button id="pdf-prev">PrevIoUs</button>
<button id="pdf-next">Next</button>
</div>
<div id="page-count-container">Page <div id="pdf-current-page"></div> of <div id="pdf-total-pages"></div></div>
</div>
<canvas id="pdf-canvas" width="400"></canvas>
<div id="text-layer"></div>
<div id="page-loader">Loading page ...</div>
</div>
</div>
</body>
</html>
解决方法
我只是使用showpage()并重新加载最后一页两次。
if(__o < __TOTAL_PAGES ){
showPage(++__CURRENT_PAGE);
}
else if(__o == __TOTAL_PAGES){
showPage(__TOTAL_PAGES);
}
else{
var _y= _x.replace(/(style="([^>]+)")/gi,"");
$("#text-layer,#pdf-canvas").html(" ");
$("#upload-button").show();
$("#pdf-main-container").show();
console.log(_y);
}
因此最后一页重新加载两次。这段代码只是添加了代码,所以现在我将日志_y
中的每个页面都获取了版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。