如何解决失败:请求包含不受支持的文档格式pdf文件提取
import * as _ from 'lodash';
import * as aws from "aws-sdk";
import aws_config from './config';
aws.config.update({
accessKeyId: aws_config.awsAccesskeyID,secretAccessKey: aws_config.awsSecretAccessKey,region: aws_config.awsRegion
});
const textract = new aws.Textract();
const getText = (result,blocksMap) => {
let text = "";
if (_.has(result,"Relationships")) {
result.Relationships.forEach(relationship => {
if (relationship.Type === "CHILD") {
relationship.Ids.forEach(childId => {
const word = blocksMap[childId];
if (word.BlockType === "WORD") {
text += `${word.Text} `;
}
if (word.BlockType === "SELECTION_ELEMENT") {
if (word.SelectionStatus === "SELECTED") {
text += `X `;
}
}
});
}
});
}
return text.trim();
};
const findValueBlock = (keyBlock,valueMap) => {
let valueBlock;
keyBlock.Relationships.forEach(relationship => {
if (relationship.Type === "VALUE") {
// eslint-disable-next-line array-callback-return
relationship.Ids.every(valueId => {
if (_.has(valueMap,valueId)) {
valueBlock = valueMap[valueId];
return false;
}
});
}
});
return valueBlock;
};
const getkeyvalueRelationship = (keyMap,valueMap,blockMap) => {
const keyvalues = {};
const keyMapValues = _.values(keyMap);
keyMapValues.forEach(keyMapValue => {
const valueBlock = findValueBlock(keyMapValue,valueMap);
const key = getText(keyMapValue,blockMap);
const value = getText(valueBlock,blockMap);
keyvalues[key] = value;
});
return keyvalues;
};
const getkeyvalueMap = blocks => {
const keyMap = {};
const valueMap = {};
const blockMap = {};
let blockId;
blocks.forEach(block => {
blockId = block.Id;
blockMap[blockId] = block;
if (block.BlockType === "KEY_VALUE_SET") {
if (_.includes(block.EntityTypes,"KEY")) {
keyMap[blockId] = block;
} else {
valueMap[blockId] = block;
}
}
});
return { keyMap,blockMap };
};
const textractScan = async buffer => {
const params = {
Document: {
/* required */
Bytes: buffer
},FeatureTypes: ["FORMS"]
};
const request = textract.analyzeDocument(params);
const data = await request.promise();
if (data && data.Blocks) {
const { keyMap,blockMap } = getkeyvalueMap(data.Blocks);
const keyvalues = getkeyvalueRelationship(keyMap,blockMap);
return keyvalues;
}
// in case no blocks are found return undefined
return undefined;
};
export default textractScan;
let body = await parser.parse(event);
console.log(body);
const results = await textractScan(body.files[0].content);
但是会引发以下错误:
失败:请求的文档格式不受支持
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。