微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

失败:请求包含不受支持的文档格式pdf文件提取

如何解决失败:请求包含不受支持的文档格式pdf文件提取

我正在关注此在线教程:https://medium.com/@hatemalimam/extract-text-and-data-from-any-document-using-amazon-textract-in-node-js-9a72136c6e64

import * as _ from 'lodash';
import * as aws from "aws-sdk";
import aws_config from './config';

aws.config.update({
  accessKeyId: aws_config.awsAccesskeyID,secretAccessKey: aws_config.awsSecretAccessKey,region: aws_config.awsRegion
});

const textract = new aws.Textract();

const getText = (result,blocksMap) => {
  let text = "";

  if (_.has(result,"Relationships")) {
    result.Relationships.forEach(relationship => {
      if (relationship.Type === "CHILD") {
        relationship.Ids.forEach(childId => {
          const word = blocksMap[childId];
          if (word.BlockType === "WORD") {
            text += `${word.Text} `;
          }
          if (word.BlockType === "SELECTION_ELEMENT") {
            if (word.SelectionStatus === "SELECTED") {
              text += `X `;
            }
          }
        });
      }
    });
  }

  return text.trim();
};

const findValueBlock = (keyBlock,valueMap) => {
  let valueBlock;
  keyBlock.Relationships.forEach(relationship => {
    if (relationship.Type === "VALUE") {
      // eslint-disable-next-line array-callback-return
      relationship.Ids.every(valueId => {
        if (_.has(valueMap,valueId)) {
          valueBlock = valueMap[valueId];
          return false;
        }
      });
    }
  });

  return valueBlock;
};

const getkeyvalueRelationship = (keyMap,valueMap,blockMap) => {
  const keyvalues = {};

  const keyMapValues = _.values(keyMap);

  keyMapValues.forEach(keyMapValue => {
    const valueBlock = findValueBlock(keyMapValue,valueMap);
    const key = getText(keyMapValue,blockMap);
    const value = getText(valueBlock,blockMap);
    keyvalues[key] = value;
  });

  return keyvalues;
};

const getkeyvalueMap = blocks => {
  const keyMap = {};
  const valueMap = {};
  const blockMap = {};

  let blockId;
  blocks.forEach(block => {
    blockId = block.Id;
    blockMap[blockId] = block;

    if (block.BlockType === "KEY_VALUE_SET") {
      if (_.includes(block.EntityTypes,"KEY")) {
        keyMap[blockId] = block;
      } else {
        valueMap[blockId] = block;
      }
    }
  });

  return { keyMap,blockMap };
};

const textractScan = async buffer => {
  const params = {
    Document: {
      /* required */
      Bytes: buffer
    },FeatureTypes: ["FORMS"]
  };

  const request = textract.analyzeDocument(params);
  const data = await request.promise();

  if (data && data.Blocks) {
    const { keyMap,blockMap } = getkeyvalueMap(data.Blocks);
    const keyvalues = getkeyvalueRelationship(keyMap,blockMap);

    return keyvalues;
  }

  // in case no blocks are found return undefined
  return undefined;
};
export default textractScan;

在另一个文件中:

let body = await parser.parse(event);
  console.log(body);
  const results = await textractScan(body.files[0].content);

但是会引发以下错误

失败:请求的文档格式不受支持

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。