fread

DNA图谱 / 问答 / 标签

程序员的自我修养: fread-C语言是怎样读取文件的

为了效率的考虑,不至于频繁调用系统函数和访问IO设备,MSVC CRT的fread采用缓冲设计. C语言提供的关于缓冲的函数有:int flush(FILE* stream); int setvbuf(FILE* stream, char* buf, int mode, size_t size); /* 缓冲模式mode有: 1. 无缓冲模式 _IONBF 2. 行缓冲模式 _IOLBF 每收到一个换行符(/n或/r/n), 就将缓冲flush掉 3. 全缓冲模式 _IOFBF 仅当缓冲满时才进行flush */ void setbuf(FILE* stream, char* buf); 等价于 (void)setvbuf(stream, buf, _IOBBF, BUFSIZ); fread的调用过程大概是:fread -> fread_s(增加缓冲溢出保护, 加锁) -> _fread_nolock_s(循环读取,缓冲) -> _read(换行符转换) -> ReadFile(读取文件) 加注释的FILE结构如下:struct _iobuf { char *_ptr; int _cnt; //剩余未读的字节数 char *_base; //文件的缓冲基址 int _flag; //打开文件的属性 int _file; //打开文件的编号 int _charbuf; int _bufsiz; //文件的缓冲的总的大小 char *_tmpfname; }; typedef struct _iobuf FILE; 核心函数_fread_nolock_s(循环读取,缓冲)如下:size_t __cdecl _fread_nolock_s( void *buffer, size_t bufferSize, size_t elementSize, size_t num, FILE *stream ) { char *data; /* point inside the destination buffer to where we need to copy the read chars */当前放进字节的尾部 size_t dataSize; /* space left in the destionation buffer (in bytes) //buffer中剩余字节数*/ size_t total; /* total bytes to read //总共要读的字节数*/ size_t count; /* num bytes left to read //剩下要读的字节数*/ unsigned streambufsize; /* size of stream buffer */ unsigned nbytes; /* how much to read now */ unsigned nread; /* how much we did read */ int c; /* a temp char */ /* initialize local vars */ data = buffer; dataSize = bufferSize; if (elementSize == 0 || num == 0) { return 0; } /* validation */ _VALIDATE_RETURN((buffer != NULL), EINVAL, 0); if (stream == NULL || num > (SIZE_MAX / elementSize)) { if (bufferSize != SIZE_MAX) { memset(buffer, _BUFFER_FILL_PATTERN, bufferSize); } _VALIDATE_RETURN((stream != NULL), EINVAL, 0); _VALIDATE_RETURN(num _bufsiz; } else { /* assume will get _INTERNAL_BUFSIZ buffer */ streambufsize = _INTERNAL_BUFSIZ; } /* here is the main loop -- we go through here until we"re done */ while (count != 0) { /* if the buffer exists and has characters, copy them to user buffer */ if (anybuf(stream) && stream->_cnt != 0) { if(stream->_cnt < 0) { _ASSERTE(("Inconsistent Stream Count. Flush between consecutive read and write", stream->_cnt >= 0)); stream->_flag |= _IOERR; return (total - count) / elementSize; } /* how much do we want? (unsigned)count : stream->_cnt; if (nbytes > dataSize) { if (bufferSize != SIZE_MAX) { memset(buffer, _BUFFER_FILL_PATTERN, bufferSize); } _VALIDATE_RETURN(("buffer too small", 0), ERANGE, 0) } memcpy_s(data, dataSize, stream->_ptr, nbytes); /* update stream and amt of data read */ count -= nbytes; stream->_cnt -= nbytes; stream->_ptr += nbytes; data += nbytes; dataSize -= nbytes; } else if (count >= streambufsize) { /* If we have more than streambufsize chars to read, get data by calling read with an integral number of bufsiz blocks. Note that if the stream is text mode, read will return less chars than we ordered. */ if (streambufsize) { /* In 64bit apps size_t is bigger than unsigned * (which is 32bit even in 64 bit machines), so * we need to split the read into INT_MAX chunks * since _read() only support up to _signed_ int * (even though the in parameter is unsigned). */ if (count > INT_MAX) { /* calc chars to read -- the largest multiple of streambufsize * smaller then INT_MAX */ nbytes = (unsigned)(INT_MAX - INT_MAX % streambufsize); } else { /* calc chars to read -- (count/streambufsize) * streambufsize */ nbytes = (unsigned)(count - count % streambufsize); } } else { nbytes = (count > INT_MAX)?(unsigned)INT_MAX: (unsigned)count; } if (nbytes > dataSize) { if (bufferSize != SIZE_MAX) { memset(buffer, _BUFFER_FILL_PATTERN, bufferSize); } _VALIDATE_RETURN(("buffer too small", 0), ERANGE, 0) } nread = _read(_fileno(stream), data, nbytes); if (nread == 0) { /* end of file -- out of here */ stream->_flag |= _IOEOF; return (total - count) / elementSize; } else if (nread == (unsigned)-1) { /* error -- out of here */ stream->_flag |= _IOERR; return (total - count) / elementSize; } /* update count and data to reflect read */ count -= nread; data += nread; dataSize -= nread; } else { /* less than streambufsize chars to read, so call _filbuf to fill buffer */ if ((c = _filbuf(stream)) == EOF) { /* error or eof, stream flags set by _filbuf */ return (total - count) / elementSize; } /* _filbuf returned a char -- store it */ if (dataSize == 0) { if (bufferSize != SIZE_MAX) { memset(buffer, _BUFFER_FILL_PATTERN, bufferSize); } _VALIDATE_RETURN(("buffer too small", 0), ERANGE, 0) } *data++ = (char) c; --count; --dataSize; /* update buffer size */ streambufsize = stream->_bufsiz; } } /* we finished successfully, so just return num */ return num; } 其中,int __cdecl _filwbuf ( FILE *str ) #endif /* _UNICODE */ { REG1 FILE *stream=NULL; /* In safecrt, we assume we always have a buffer */ _VALIDATE_RETURN(str != NULL, EINVAL, _TEOF); /* Init pointer to _iob2 entry. */ stream = str; if (!inuse(stream) || stream->_flag & _IOSTRG) return(_TEOF); if (stream->_flag & _IOWRT) { stream->_flag |= _IOERR; return(_TEOF); } stream->_flag |= _IOREAD; /* Get a buffer, if necessary. */ if (!anybuf(stream)) { #ifndef _SAFECRT_IMPL _getbuf(stream); #else /* _SAFECRT_IMPL */ /* In safecrt, we assume we always have a buffer */ _VALIDATE_RETURN(FALSE, EINVAL, _TEOF); #endif /* _SAFECRT_IMPL */ } else { stream->_ptr = stream->_base; } stream->_cnt = _read(_fileno(stream), stream->_base, stream->_bufsiz); #ifndef _UNICODE if ((stream->_cnt == 0) || (stream->_cnt == -1)) { #else /* _UNICODE */ if ((stream->_cnt == 0) || (stream->_cnt == 1) || stream->_cnt == -1) { #endif /* _UNICODE */ stream->_flag |= stream->_cnt ? _IOERR : _IOEOF; stream->_cnt = 0; return(_TEOF); } if ( !(stream->_flag & (_IOWRT|_IORW)) && ((_osfile_safe(_fileno(stream)) & (FTEXT|FEOFLAG)) == (FTEXT|FEOFLAG)) ) stream->_flag |= _IOCTRLZ; /* Check for small _bufsiz (_SMALL_BUFSIZ). If it is small and if it is our buffer, then this must be the first _filbuf after an fseek on a read-access-only stream. Restore _bufsiz to its larger value (_INTERNAL_BUFSIZ) so that the next _filbuf call, if one is made, will fill the whole buffer. */ if ( (stream->_bufsiz == _SMALL_BUFSIZ) && (stream->_flag & _IOMYBUF) && !(stream->_flag & _IOSETVBUF) ) { stream->_bufsiz = _INTERNAL_BUFSIZ; } #ifndef _UNICODE stream->_cnt--; return(0xff & *stream->_ptr++); #else /* _UNICODE */ stream->_cnt -= sizeof(wchar_t); return (0xffff & *((wchar_t *)(stream->_ptr))++); #endif /* _UNICODE */ } 代码中分了三种情况:1) 缓冲区不为空此时, 把缓冲区中的数据复制到传入的字符数组中. 2) 缓冲区为空, 需要读取的数据大于缓冲的尺寸此时, 直接调用函数_fread把文件中的内容写到传入的字符数组中. 3) 缓冲区为空, 需要读取的数据不大于缓冲的尺寸此时, 调用函数_fread读满缓冲区, 并再写缓冲区的一个字符到传入的字符数组中. 若未读满传入的字符数组, 循环执行上述1~3过程, 直到读满或读到文件末尾(EOF).