当前位置：首页 > news >正文

Linux基础I/O

news 来源：原创 2024/9/24 5:33:13

一，系统文件I/O

写文件:

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
int main()
{umask(0);int fd = open("myfile", O_WRONLY | O_CREAT, 0644);if (fd < 0) {perror("open");return 1;}int count = 5;const char* msg = "hello bit!\n";int len = strlen(msg);while (count--) {write(fd, msg, len);//fd: 后面讲， msg：缓冲区首地址， len: 本次读取，期望写入多少个字节的数据。 返回值：实际写了多少字节数据}close(fd);return 0;
}

读文件

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
int main()
{int fd = open("myfile", O_RDONLY);if (fd < 0) {perror("open");return 1;}const char* msg = "hello bit!\n";char buf[1024];while (1) {ssize_t s = read(fd, buf, strlen(msg));//类比writeif (s > 0) {printf("%s", buf);}else {break;}}close(fd);return 0;
}

接口介绍

open man open

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int open(const char *pathname, int flags);
int open(const char *pathname, int flags, mode_t mode);
pathname: 要打开或创建的目标文件
flags: 打开文件时，可以传入多个参数选项，用下面的一个或者多个常量进行“或”运算，构成flags。
参数:O_RDONLY: 只读打开O_WRONLY: 只写打开O_RDWR : 读，写打开这三个常量，必须指定一个且只能指定一个O_CREAT : 若文件不存在，则创建它。需要使用mode选项，来指明新文件的访问权限O_APPEND: 追加写
返回值：成功：新打开的文件描述符失败：-1

mode_t 理解：文件权限，具体直接 man 手册，比什么都清楚

open 函数具体使用哪个，和具体应用场景相关，如目标文件不存在，需要 open创建，则第三个参数表示创建文件的默认权限 , 否则，使用两个参数的 open 。

文件描述符fd

Linux六字真言，先组织，在描述
组织好的文件数据，会被统一的管理起来
而每个管理起来的文件数据，都会有一个对应的下标，或者是编号，对应的就是文件描述符fd
Linux进程默认情况下会有3个缺省打开的文件描述符，分别是标准输入0，标准输出1，标准错误2
0,1,2 对应的物理设备一般是：键盘，显示器，显示器

所以输入输出还可以采用如下方式：

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
int main()
{char buf[1024];ssize_t s = read(0, buf, sizeof(buf));if (s > 0) {buf[s] = 0;write(1, buf, strlen(buf));write(2, buf, strlen(buf));}return 0;
}

而现在知道，文件描述符就是从 0 开始的小整数。当我们打开文件时，操作系统在内存中要创建相应的数据结构来描述目标文件。于是就有了file 结构体。表示一个已经打开的文件对象。而进程执行 open 系统调用，所以必须让进程和文件关联起来。每个进程都有一个指针*files, 指向一张表 files_struct, 该表最重要的部分就是包涵一个指针数组，每个元素都是一个指向打开文件的指针！所以，本质上，文件描述符就是该数组的下标。所以，只要拿着文件描述符，就可以找到对应的文件

文件描述符的分配规则

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main()
{int fd = open("myfile", O_RDONLY);if (fd < 0) {perror("open");return 1;}printf("fd: %d\n", fd);close(fd);return 0;
}

输出发现是 fd: 3

关闭 0 或者 2 ，在看

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
int main()
{close(0);//close(2);int fd = open("myfile", O_RDONLY);if (fd < 0) {perror("open");return 1;}printf("fd: %d\n", fd);close(fd);return 0;
}

发现是结果是： fd: 0 或者 fd 2 可见，文件描述符的分配规则：在 files_struct 数组当中，找到当前没有被使用的最小的一个下标，作为新的文件描述符

重定向

那如果关闭1呢？看代码：

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
int main()
{close(1);int fd = open("myfile", O_WRONLY | O_CREAT, 00644);if (fd < 0) {perror("open");return 1;}printf("fd: %d\n", fd);fflush(stdout);close(fd);exit(0);
}

此时，我们发现，本来应该输出到显示器上的内容，输出到了文件 myfile 当中，其中， fd ＝ 1 。这种现象叫做输出重定向。常见的重定向有:>, >>, <

使用 dup2 系统调用

函数原型如下:

#include <unistd.h>
int dup2(int oldfd, int newfd);

示例代码

#include <fcntl.h>
int main() {int fd = open("./log", O_CREAT | O_RDWR);if (fd < 0) {perror("open");return 1;}close(1);dup2(fd, 1);for (;;) {char buf[1024] = { 0 };ssize_t read_size = read(0, buf, sizeof(buf) - 1);if (read_size < 0) {perror("read");break;}printf("%s", buf);fflush(stdout);}return 0;
}

printf 是 C 库当中的 IO 函数，一般往 stdout 中输出，但是 stdout 底层访问文件的时候，找的还是 fd:1, 但此时， fd:1 下标所表示内容，已经变成了myfile 的地址，不再是显示器文件的地址，所以，输出的任何消息都会往文件中写入，进而完成输出重定向

理解文件系统

每行包含 7 列:

FILE

我们在读写文件时数据不会直接传输，会先存储到缓冲区之中，然后再以特定的刷新策略，把数据冲文件的缓冲区中刷新出来

缓冲区的刷新策略有3种

直接刷新（即刚写入就刷新）
行刷新（遇到\n就会触发刷新机制连同\n在内的所有数据全部刷新出去）对应实例显示器
满内存刷新（数据缓冲区达到数据存取的上限）对应实例磁盘

#include <stdio.h>
#include <string.h>
int main()
{const char *msg0="hello printf\n";const char *msg1="hello fwrite\n";const char *msg2="hello write\n";printf("%s", msg0);fwrite(msg1, strlen(msg0), 1, stdout);write(1, msg2, strlen(msg2));fork();return 0;

运行出结果：

hello printf
hello fwrite
hello write

但如果对进程实现输出重定向呢？ ./hello > file ，我们发现结果变成了：

hello write
hello printf
hello fwrite
hello printf
hello fwrite

我们想磁盘中写入数据发现有俩份，这其实是因为子进程会继承父进程的代码和数据，当其中一个进程要刷新缓冲区的时候会发生写时拷贝，所以当父子进程结束是会有俩份数据

如果有兴趣，可以看看 FILE 结构体

typedef struct _IO_FILE FILE ; 在 /usr/include/stdio.h

在 / usr / include / libio.h
struct _IO_FILE {int _flags; /* High-order word is _IO_MAGIC; rest is flags. */
#define _IO_file_flags _flags//缓冲区相关/* The following pointers correspond to the C++ streambuf protocol. *//* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */char* _IO_read_ptr; /* Current read pointer */char* _IO_read_end; /* End of get area. */char* _IO_read_base; /* Start of putback+get area. */char* _IO_write_base; /* Start of put area. */char* _IO_write_ptr; /* Current put pointer. */char* _IO_write_end; /* End of put area. */char* _IO_buf_base; /* Start of reserve area. */char* _IO_buf_end; /* End of reserve area. *//* The following fields are used to support backing up and undo. */char* _IO_save_base; /* Pointer to start of non-current get area. */char* _IO_backup_base; /* Pointer to first valid character of backup area */char* _IO_save_end; /* Pointer to end of non-current get area. */struct _IO_marker* _markers;struct _IO_FILE* _chain;int _fileno; //封装的文件描述符
#if 0int _blksize;
#elseint _flags2;
#endif_IO_off_t _old_offset; /* This used to be _offset but it's too small. */
#define __HAVE_COLUMN /* temporary *//* 1+column number of pbase(); 0 is unknown. */unsigned short _cur_column;signed char _vtable_offset;char _shortbuf[1];/* char* _save_gptr; char* _save_egptr; */_IO_lock_t* _lock;
#ifdef _IO_USE_OLD_IO_FILE
};

理解文件系统


[root@localhost linux]# ls -l
总用量 12
-rwxr-xr-x. 1 root root 7438 "9月 13 14:56" a.out
-rw-r--r--. 1 root root 654 "9月 13 14:56" test.c

每行包含 7 列：

模式或者权限
硬连接数
文件所有者
文件所属组
大小
最后修改时间
文件名

inode

为了能解释清楚 inode 我们先简单了解一下文件系统

Linux ext2 文件系统，上图为磁盘文件系统图（内核内存映像肯定有所不同），磁盘是典型的块设备，硬盘分区被划分为一个个的block 。一个 block 的大小是由格式化的时候确定的，并且不可以更改。例如 mke2fs 的 -b 选项可以设定block 大小为 1024 、 2048 或 4096 字节。而上图中启动块（ Boot Block ）的大小是确定的

Block Group ： ext2 文件系统会根据分区的大小划分为数个 Block Group 。而每个 Block Group 都有着相同的结构组成。政府管理各区的例子
数据区：存放文件内容
i 节点表 : 存放文件属性如文件大小，所有者，最近修改时间等
inode 位图（ inode Bitmap ）：每个 bit 表示一个 inode 是否空闲可用
块位图（ Block Bitmap ）： Block Bitmap 中记录着 Data Block 中哪个数据块已经被占用，哪个数据块没有被占用
GDT ， Group Descriptor Table ：块组描述符，描述块组属性信息
超级块（ Super Block ）：存放文件系统本身的结构信息。记录的信息主要有： bolck 和 inode 的总量，未使用的block 和 inode 的数量，一个 block 和 inode 的大小，最近一次挂载的时间，最近一次写入数据的时间，最近一次检验磁盘的时间等其他文件系统的相关信息。Super Block 的信息被破坏，可以说整个文件系统结构就被破坏了

理解硬链接

硬链接：

我们对main.c进行硬链接发现main1.c main.c的文件第一例（操作系统在硬盘中寻址编号）的数字相等第三列的数字变为2

所以我们我们可以知道硬链接就是给寻址编号起别名，而第三列数据就是该寻址编号对应的文件名数目

软链接：

通过观察第一列的数据可以发现test.c 和 test的寻址空间是不一样的所以二者是俩个独立的文件

那么二者之间有什么关系？

我们对test 使用cat 命令可以发现，能看见test.c 里面的内容

软链接是通过名字引用另外一个文件，在shell中的做法，如果类比到Windows我们可以理解为是一种快捷方式。

动态库和静态库

静态库（ .a ）：程序在编译链接的时候把库的代码链接到可执行文件中。程序运行的时候将不再需要静态库
动态库可以在多个程序间共享，所以动态链接使得可执行文件更小，节省了磁盘空间。操作系统采用虚拟内存机制允许物理内存中的一份动态库被要用到该库的所有进程共用，节省了内存和磁盘间
在可执行文件开始运行以前，外部函数的机器码由操作系统从磁盘上的该动态库中复制到内存中，这个过程称为动态链接（dynamic linking ）
一个与动态库链接的可执行文件仅仅包含它用到的函数入口地址的一个表，而不是外部函数所在目标文件的整个机器码
动态库（ .so ）：程序在运行的时候才去链接动态库的代码，多个程序共享使用库的代码