微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

通过将行拆分为现有列来扩展行

如何解决通过将行拆分为现有列来扩展行

我使用带有以下代码的 tabula-py 命令从 pdf 中读取表格:

table = tabula.read_pdf(files[0],pages = 'all',multiple_tables = True,stream = True)

有时两列中的值会合并为一列(由单个空格分隔)。例如:

col0 col1 col2 col3 col4 col5 col6 col7
a1 b1 c1 d1 e1 f1 g1 h1 不适用 不适用
a2 b2 c2 d2 e2 f2 g2 h2

如何将值重新调整到正确的列中,以获得:

col0 col1 col2 col3 col4 col5 col6 col7
a1 b1 c1 d1 e1 f1 g1 h1
a2 b2 c2 d2 e2 f2 g2 h2

解决方法

  • 以空格分隔输出
  • 替换第 1 步中引用的字符串
  • 以空格分隔回读
col0 col1 col2 col3 col4 col5 col6 col7
  a1   b1   c1   d1   e1   f1   g1   h1
  a2   b2   c2   d2   e2   f2   g2   h2

输出

#include <iostream>
#include <glad/glad.h>
#include <GLFW/glfw3.h>

const char *vertexShaderSource = "#version 330 core\n"
    "layout (location = 0) in vec3 aPos;\n"
    "void main()\n"
    "{\n"
    "   gl_Position = vec4(aPos.x,aPos.y,aPos.z,1.0);\n"
    "}\0";
const char *fragmentShaderSource = "#version 330 core\n"
    "out vec4 FragColor;\n"
    "void main()\n"
    "{\n"
    "   FragColor = vec4(1.0f,0.5f,0.2f,1.0f);\n"
    "}\n\0";

const char *postProcessvertexShaderSource = "#version 330 core\n"
"layout (location = 0) in vec2 position;\n"             
"layout (location = 1) in vec2 inTexCoord;\n"

"out vec2 texCoord;\n"
"void main(){\n"
"    texCoord = inTexCoord;\n"
"    gl_Position = vec4(position.x,position.y,0.0f,1.0f);\n"
"}\n\0";

const char *postProcessFragmentShaderSource = "#version 330 core\n"
"out vec4 fragmentColor;\n"
"in vec2 texCoord;\n"
"//notice the sampler\n"
"uniform sampler2DMS screencapture;\n"
"uniform int viewport_width;\n"
"uniform int viewport_height;\n"

"void main(){\n"
"   //texelFetch requires a vec of ints for indexing (since we're indexing pixel locations)\n"
"   //texture coords is range [0,1],we need range [0,viewport_dim].\n"
"   //texture coords are essentially a percentage,so we can multiply text coords by total size \n"
"   ivec2 vpCoords = ivec2(viewport_width,viewport_height);\n"
"   vpCoords.x = int(vpCoords.x * texCoord.x); \n"
"   vpCoords.y = int(vpCoords.y * texCoord.y);\n"
"   //do a simple average since this is just a demo\n"
"   vec4 sample1 = texelFetch(screencapture,vpCoords,0);\n"
"   vec4 sample2 = texelFetch(screencapture,1);\n"
"   vec4 sample3 = texelFetch(screencapture,2);\n"
"   vec4 sample4 = texelFetch(screencapture,3);\n"
"   fragmentColor = vec4(sample1 + sample2 + sample3 + sample4) / 4.0f;\n"
"}\n\0";

int main()
{
    int width = 800;
    int height = 600;
    
    glfwInit();
    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR,3);
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR,3);
    glfwWindowHint(GLFW_OPENGL_PROFILE,GLFW_OPENGL_CORE_PROFILE);
    glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT,GL_TRUE);

    GLFWwindow* window = glfwCreateWindow(width,height,"OpenglContext",nullptr,nullptr);
    if (!window)
    {
        std::cerr << "failed to create window" << std::endl;
        exit(-1);
    }
    glfwMakeContextCurrent(window);

    if (!gladLoadGLLoader((GLADloadproc)glfwGetProcAddress))
    {
        std::cerr << "failed to initialize glad with processes " << std::endl;
        exit(-1);
    }

    glfwSetInputMode(window,GLFW_CURSOR,GLFW_CURSOR_DISABLED);

    int samples = 4;
    float quadVerts[] = {
        -1.0,-1.0,0.0,1.0,1.0
    };

    GLuint postVAO;
    glGenVertexArrays(1,&postVAO);
    glBindVertexArray(postVAO);

    GLuint postVBO;
    glGenBuffers(1,&postVBO);
    glBindBuffer(GL_ARRAY_BUFFER,postVBO);
    glBufferData(GL_ARRAY_BUFFER,sizeof(quadVerts),quadVerts,GL_STATIC_DRAW);

    glVertexAttribPointer(0,2,GL_FLOAT,GL_FALSE,4 * sizeof(float),reinterpret_cast<void*>(0));
    glEnableVertexAttribArray(0);

    glVertexAttribPointer(1,reinterpret_cast<void*>(2 * sizeof(float)));
    glEnableVertexAttribArray(1);

    glBindVertexArray(0);


    GLuint msaaFB;
    glGenFramebuffers(1,&msaaFB);
    glBindFramebuffer(GL_FRAMEBUFFER,msaaFB); //bind both read/write to the target framebuffer

    GLuint texMutiSampleColor;
    glGenTextures(1,&texMutiSampleColor);
    glBindTexture(GL_TEXTURE_2D_MULTISAMPLE,texMutiSampleColor);
    glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE,samples,GL_RGB,width,GL_TRUE);
    glBindTexture(GL_TEXTURE_2D_MULTISAMPLE,0);
    glFramebufferTexture2D(GL_FRAMEBUFFER,GL_COLOR_ATTACHMENT0,GL_TEXTURE_2D_MULTISAMPLE,texMutiSampleColor,0);

    glBindFramebuffer(GL_FRAMEBUFFER,0);


    // vertex shader
    unsigned int vertexShader = glCreateShader(GL_VERTEX_SHADER);
    glShaderSource(vertexShader,1,&vertexShaderSource,NULL);
    glCompileShader(vertexShader);
    // check for shader compile errors

    // fragment shader
    unsigned int fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
    glShaderSource(fragmentShader,&fragmentShaderSource,NULL);
    glCompileShader(fragmentShader);
    // check for shader compile errors

    // link shaders
    unsigned int shaderProgram = glCreateProgram();
    glAttachShader(shaderProgram,vertexShader);
    glAttachShader(shaderProgram,fragmentShader);
    glLinkProgram(shaderProgram);
    // check for linking errors

    glDeleteShader(vertexShader);
    glDeleteShader(fragmentShader);


    //postprocess vertex shader
    unsigned int postProcessVertexShader = glCreateShader(GL_VERTEX_SHADER);
    glShaderSource(postProcessVertexShader,&postProcessvertexShaderSource,NULL);
    glCompileShader(postProcessVertexShader);

    // postprocess fragment shader
    unsigned int postProcessFragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
    glShaderSource(postProcessFragmentShader,&postProcessFragmentShaderSource,NULL);
    glCompileShader(postProcessFragmentShader);
    // check for shader compile errors

    // link shaders
    unsigned int postProcessShaderProgram = glCreateProgram();
    glAttachShader(postProcessShaderProgram,postProcessVertexShader);
    glAttachShader(postProcessShaderProgram,postProcessFragmentShader);
    glLinkProgram(postProcessShaderProgram);
    // check for linking errors

    glDeleteShader(postProcessVertexShader);
    glDeleteShader(postProcessFragmentShader);

    glUseProgram(postProcessShaderProgram);
    glUniform1i(glGetUniformLocation(postProcessShaderProgram,"screencapture"),0); 
    glUniform1i(glGetUniformLocation(postProcessShaderProgram,"viewport_width"),width); 
    glUniform1i(glGetUniformLocation(postProcessShaderProgram,"viewport_height"),height); 

    float vertices[] = {
        -0.5f,-0.5f,0.0f 
    }; 

    unsigned int VBO,VAO;
    glGenVertexArrays(1,&VAO);
    glGenBuffers(1,&VBO);
    glBindVertexArray(VAO);
    glBindBuffer(GL_ARRAY_BUFFER,VBO);
    glBufferData(GL_ARRAY_BUFFER,sizeof(vertices),vertices,GL_STATIC_DRAW);
    glVertexAttribPointer(0,3,3 * sizeof(float),(void*)0);
    glEnableVertexAttribArray(0);
    glBindBuffer(GL_ARRAY_BUFFER,0); 
    glBindVertexArray(0); 

    bool use_msaa = true;

    while (!glfwWindowShouldClose(window))
    {

        if (glfwGetKey(window,GLFW_KEY_ESCAPE) == GLFW_PRESS)
        {
            glfwSetWindowShouldClose(window,true);
        }

        if (glfwGetKey(window,GLFW_KEY_R) == GLFW_PRESS)
            use_msaa = true;
        if (glfwGetKey(window,GLFW_KEY_T) == GLFW_PRESS)
            use_msaa = false;     

        glClearColor(0.0f,1.0f);
        glClear(GL_COLOR_BUFFER_BIT);

        if (use_msaa) {
            glBindFramebuffer(GL_FRAMEBUFFER,msaaFB);
        }

        glClearColor(0.0f,1.0f);
        glClear(GL_COLOR_BUFFER_BIT);

        // draw our first triangle
        glUseProgram(shaderProgram);
        glBindVertexArray(VAO);
        glDrawArrays(GL_TRIANGLES,3);

        if (use_msaa) {
            glBindFramebuffer(GL_FRAMEBUFFER,0);
            glUseProgram(postProcessShaderProgram);
            glActiveTexture(GL_TEXTURE0);
            glBindTexture(GL_TEXTURE_2D_MULTISAMPLE,texMutiSampleColor);
            glBindVertexArray(postVAO);
            glDrawArrays(GL_TRIANGLES,6);
        }

        glfwSwapBuffers(window);
        glfwPollEvents();

    }
    glfwTerminate();
    // cleanup
}
,

你可以试试吗

table = tabula.read_pdf(files[0],pages = 'all',multiple_tables = True,guess = False,stream = True)

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。