关于偏移量和逐行读取文件的问题

如何解决关于偏移量和逐行读取文件的问题

#include "offsetFinder.h"

/**  Reads a GIS record file (as described in the corresponding project
 *   specification),and determines,for each GIS record contained in that
 *   file,the offset at which that record begins.  The offsets are stored
 *   into an array supplied by the caller.
 * 
 *   Pre:  gisFile is open on a GIS record file
 *         offsets[] is an array large enough to hold the offsets
 *   Post: offsets[] contains the GIS record offsets,in the order
 *            the records occur in the file
 *   Returns:  the number of offsets that were stored in offsets[]
 */
uint32_t findOffsets(FILE* gisFile,uint32_t offsets[]) {
    FILE *op;
    /***  Complete the implementation of this function  ***/
    int count = 0;
    char offsets[1000];
    char *reader;
    op = fopen(gisFile,"r");
    if (!op) {
        perror("Failed to open file!\n");
        exit(1);
    }
    else {
        reader = offsets;
        while (*reader != '\n' && fgets(offsets,sizeof(offsets),op)) {
            count++;
        }
    }
    return count;
}

大家好，我有一个关于这个作业的问题。这样设置好吗？对于 GISData.txt，我应该通读该文件，并且必须返回存储在 offsets[] 中的偏移量。

FEATURE_ID|FEATURE_NAME|FEATURE_CLASS|STATE_ALPHA|STATE_NUMERIC|COUNTY_NAME|COUNTY_NUMERIC|PRIMARY_LAT_DMS|PRIM_LONG_DMS|PRIM_LAT_DEC|PRIM_LONG_DEC|SOURCE_LAT_DMS|SOURCE_LONG_DMS|SOURCE_LAT_DEC|SOURCE_LONG_DEC|ELEV_IN_M|ELEV_IN_FT|MAP_NAME|DATE_CREATED|DATE_EDITED
885513|Siegrest Draw|Valley|NM|35|Eddy|015|323815N|1043256W|32.6376116|-104.5488549|323859N|1043732W|32.6498321|-104.6255227|1095|3592|Parish Ranch|11/13/1980|
885526|AAA Tank|Reservoir|NM|35|Eddy|015|321043N|1041456W|32.1786543|-104.2489615|||||1006|3300|Bond Draw|11/13/1980|06/23/2011
885566|Adobe Draw|Valley|NM|35|Eddy|015|322820N|1042141W|32.4723375|-104.361345|322704N|1042129W|32.4511111|-104.3580556|1007|3304|Carlsbad West|11/13/1980|
885567|Adobe Flat|Flat|NM|35|Eddy|015|322849N|1042119W|32.4803932|-104.3552339|||||1006|3300|Carlsbad West|11/13/1980|
885607|Alacran Hills|Range|NM|35|Eddy|015|322812N|1041055W|32.4701183|-104.1818931|||||1009|3310|Carlsbad East|11/13/1980|
885684|Alkali Lake|Lake|NM|35|Eddy|015|323039N|1041133W|32.5109371|-104.1924802|||||966|3169|Angel Draw|11/13/1980|06/23/2011
885697|Allen Well|Well|NM|35|Eddy|015|322309N|1042120W|32.3859489|-104.3555084|||||1038|3405|Carlsbad West|11/13/1980|

这是 GISData.txt 的片段，每个区域数据（一行）都被视为一条 GIS 记录。 “分配中提到的偏移量是 GIS 数据文件中 GIS 记录开始的位置。

由于每条 GIS 记录占据一整行，因此一条 GIS 记录的偏移量只是 GIS 记录中第一个字节的偏移量。

当然，GIS 数据文件中的第一行不包含 GIS 记录，因此偏移 0 处没有 GIS 记录。”

如果我完全错了，有人可以查看我的代码并修改它吗？谢谢！！

解决方法

“对于 GISData.txt，我应该通读文件，我必须返回存储在 offsets[]....有人可以查看我的代码并修改它，如果我是完全错了吗？”

首先，在这个问题中，offset 的含义似乎有些混乱。而且，在谷歌搜索 "gid offset" 之后，我可以理解为什么。这是a GIS specific definition：

_" 偏移

[cartography] In cartography,the displacement or movement of features so that they do not overlap when displayed at a given scale.

例如，如果符号很宽，则道路可以从河流偏移足以使它们重叠。

[symbology] In symbology,the shift of the origin or insertion point of a symbol in an x and/or y direction.

[ESRI software] In ArcGIS,a change in or the act of changing the z-value for a surface or features in a scene by a constant amount or

使用表达式。可以应用偏移量来绘制特征就在表面之上。”_

还有这个“C 语言的 GIS 系统” definition：

"文件可以被认为是一个字节序列，每个字节距文件开头有一个唯一的偏移量，就像一个大批。因此，每个 GIS 记录都从一个唯一的偏移量开始文件开头"

这两个定义虽然都源自对 gis offset 的搜索，但差异如此之大，以至于无法解释这些术语在该问题中的含义。出于此答案的目的，我将从您在评论中的回复中取出我的队列，并将解决如何解析文件每条记录中的第一个字段。（不包括第 1 行的头记录。）

以下是一些建议考虑的步骤，可用于实现此目的。

要考虑的步骤：

原型设计 如注释中所述，findOffsets() 函数的原型应提供以下内容：filespec、数组大小、数组。注释中未提及，但可能有用的是将读取的最长记录的长度。例如：

uint32_t findOffsets(const char *fileSpec,size_t longestElement,size_t numElements,uint32_t offsets[numElements]);

来自调用函数

读取文件一次以确定记录数。例如：numRecords。见
int count_names(const char *filename,size_t *count){...}
示例 here 例如如何读取文件中的记录数（以及何时需要获取最长记录。）。完成后关闭文件：
使用上一步中的记录数来确定数组的大小。

示例：

uint32_t offsets[numRecords-1];  //-1 skipping header line 
memset(records,sizeof records);

调用findOffsets()

示例：

size_t numOffsets = sizeof records/sizeof *records
uint32_t count = findOffsets("c:\\gis\\data.gis",longestRecord,numOffsets,offsets);
if(count > 0)
{
   //do something with records
}

在 findOffsets() 内部

打开文件以再次读取进程
读取文件的每一行（跳过标题行）
首先解析'|'每行的分隔标记
将解析的标记从字符串转换为整数
关闭文件
返回处理的行数。

下面的代码示例（安全性/错误检查非常有限）显示了如何做到这一点。它使用您的示例文件内容进行了测试，并借用了上面链接的代码，适用于此目的：

const char *fileSpec = "C:\\some_directory\\gisData.gis";

uint32_t findOffsets(const char *fileSpec,uint32_t offsets[numElements]);
int count_lines_in_file(const char *filename,size_t *count);
size_t filesize(const char *fn);

int main(void)
{
    size_t numRecords = 0;
    
    int longestRecord = count_lines_in_file(fileSpec,&numRecords)+1;//+1 room for null terminator
    uint32_t offsets[numRecords -1];//-1 - skipping header line
    memset(offsets,sizeof offsets);//initialize VLA offsets
    int recordsProcessed = findOffsets(fileSpec,numRecords -1,offsets);//do the work
    
    return 0;
}

uint32_t findOffsets(const char *fileSpec,uint32_t offsets[numElements])
{
      char *delim = "|";
      char *tok = NULL;
      char line[longestElement+1]; //+1 - room for null terminator during read.
      memset(line,sizeof line);//initialize VLA line to all zeros 
      int inx = 0;
      FILE *fp = fopen(fileSpec,"r");
      if(fp)
      {
            while(fgets(line,sizeof line,fp))//loop to read all lines in file
            {
                if(!strstr(line,"FEATURE_ID"))//skip header line,process all other lines
                {
                    tok = strtok(line,delim);//extract first field
                    if(tok)
                    {
                        offsets[inx] = atoi(tok);//convert token and store number
                        inx++;
                    }
                }
            }
            fclose(fp);
      }
      return inx;
}

//passes back count of lines in file,and return longest line
int count_lines_in_file(const char *filename,size_t *count)
{
    int len=0,lenKeep = 0;
    FILE *fp = fopen(filename,"r");
    if(fp)
    {
        char *tok = NULL;
        char *delim = "\n";
        int cnt = 0;
        size_t fSize = filesize(filename);
        char *buf = calloc(fSize,1);
        while(fgets(buf,fSize,fp)) //goes to newline for each get
        {
            tok = strtok(buf,delim);
            while(tok)
            {
                cnt++;
                len = strlen(tok);
                if(lenKeep < len) lenKeep = len;
                tok = strtok(NULL,delim);
            }
        }
        *count = cnt;
        fclose(fp);
        free(buf);
    }
    
    return lenKeep;
}

//return file size in bytes (binary read)
size_t filesize(const char *fn)
{
    size_t size = 0;
    FILE*fp = fopen(fn,"rb");
    if(fp)
    {
        fseek(fp,SEEK_END); 
        size = ftell(fp); 
        fseek(fp,SEEK_SET); 
        fclose(fp);
    }
    return size;
}