Linux的文件系统与设备文件

  1. Linux文件系统实现中重要的数据结构
  2. 设备文件系统(devfs)
  3. sysfs

在Linux的整体设计中,体现了一种抽象,模块与分层的设计思想,其中对于文件系统的实现也不例外。

在文件系统的实现可分为四层,分别为VFS,FS,FS Driver,Device。VFS层向上提供统一的文件系统访问接口,向下对具体的各类不同文件系统进行抽象;FS层是不同文件系统的具体实现;FS Driver层则对应与文件系统底层不同设备的访问驱动;Device层则是具体的硬件设备。

文件系统分层

Linux文件系统实现中重要的数据结构

  • file结构体
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;

/*
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
enum rw_hint f_write_hint;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;

u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;

#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links;
struct list_head f_tfile_llink;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
errseq_t f_wb_err;
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */

struct file_handle {
__u32 handle_bytes;
int handle_type;
/* file identifier */
unsigned char f_handle[0];
};

file结构体代表一个打开的文件,系统中每个打开的文件在内核空间都有一个file结构体与之对应。通常它在文件打开时创建,并传递给在文件上操作的任何函数。

  • inode结构体
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
struct inode {
umode_t i_mode;
unsigned short i_opflags;
kuid_t i_uid;
kgid_t i_gid;
unsigned int i_flags;

#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
#endif

const struct inode_operations *i_op;
struct super_block *i_sb;
struct address_space *i_mapping;

#ifdef CONFIG_SECURITY
void *i_security;
#endif

/* Stat data, not accessed from path walking */
unsigned long i_ino;
/*
* Filesystems may only read i_nlink directly. They shall use the
* following functions for modification:
*
* (set|clear|inc|drop)_nlink
* inode_(inc|dec)_link_count
*/
union {
const unsigned int i_nlink;
unsigned int __i_nlink;
};
dev_t i_rdev;
loff_t i_size;
struct timespec64 i_atime;
struct timespec64 i_mtime;
struct timespec64 i_ctime;
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
u8 i_blkbits;
u8 i_write_hint;
blkcnt_t i_blocks;

#ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount;
#endif

/* Misc */
unsigned long i_state;
struct rw_semaphore i_rwsem;

unsigned long dirtied_when; /* jiffies of first dirtying */
unsigned long dirtied_time_when;

struct hlist_node i_hash;
struct list_head i_io_list; /* backing dev IO list */
#ifdef CONFIG_CGROUP_WRITEBACK
struct bdi_writeback *i_wb; /* the associated cgroup wb */

/* foreign inode detection, see wbc_detach_inode() */
int i_wb_frn_winner;
u16 i_wb_frn_avg_time;
u16 i_wb_frn_history;
#endif
struct list_head i_lru; /* inode LRU list */
struct list_head i_sb_list;
struct list_head i_wb_list; /* backing dev writeback list */
union {
struct hlist_head i_dentry;
struct rcu_head i_rcu;
};
atomic64_t i_version;
atomic_t i_count;
atomic_t i_dio_count;
atomic_t i_writecount;
#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
atomic_t i_readcount; /* struct files open RO */
#endif
union {
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
void (*free_inode)(struct inode *);
};
struct file_lock_context *i_flctx;
struct address_space i_data;
struct list_head i_devices;
union {
struct pipe_inode_info *i_pipe;
struct block_device *i_bdev;
struct cdev *i_cdev;
char *i_link;
unsigned i_dir_seq;
};

__u32 i_generation;

#ifdef CONFIG_FSNOTIFY
__u32 i_fsnotify_mask; /* all events this inode cares about */
struct fsnotify_mark_connector __rcu *i_fsnotify_marks;
#endif

#ifdef CONFIG_FS_ENCRYPTION
struct fscrypt_info *i_crypt_info;
#endif

#ifdef CONFIG_FS_VERITY
struct fsverity_info *i_verity_info;
#endif

void *i_private; /* fs or device private pointer */
} __randomize_layout;

inode是文件系统管理的最小单位,其中包含文件访问的权限、属主、组、大小、生成时间、访问时间、最后修改时间等信息,也是连接文件与文件目录的桥梁。

  • bus_type结构体
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
struct bus_type {
const char *name;
const char *dev_name;
struct device *dev_root;
const struct attribute_group **bus_groups;
const struct attribute_group **dev_groups;
const struct attribute_group **drv_groups;

int (*match)(struct device *dev, struct device_driver *drv);
int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
int (*probe)(struct device *dev);
void (*sync_state)(struct device *dev);
int (*remove)(struct device *dev);
void (*shutdown)(struct device *dev);

int (*online)(struct device *dev);
int (*offline)(struct device *dev);

int (*suspend)(struct device *dev, pm_message_t state);
int (*resume)(struct device *dev);

int (*num_vf)(struct device *dev);

int (*dma_configure)(struct device *dev);

const struct dev_pm_ops *pm;

const struct iommu_ops *iommu_ops;

struct subsys_private *p;
struct lock_class_key lock_key;

bool need_parent_lock;
};

bus_type结构体是对总线的抽象,定义了总线通用的操作接口。

  • device_driver及device结构体
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
struct device_driver {
const char *name;
struct bus_type *bus;

struct module *owner;
const char *mod_name; /* used for built-in modules */

bool suppress_bind_attrs; /* disables bind/unbind via sysfs */
enum probe_type probe_type;

const struct of_device_id *of_match_table;
const struct acpi_device_id *acpi_match_table;

int (*probe) (struct device *dev);
void (*sync_state)(struct device *dev);
int (*remove) (struct device *dev);
void (*shutdown) (struct device *dev);
int (*suspend) (struct device *dev, pm_message_t state);
int (*resume) (struct device *dev);
const struct attribute_group **groups;
const struct attribute_group **dev_groups;

const struct dev_pm_ops *pm;
void (*coredump) (struct device *dev);

struct driver_private *p;
};

struct device {
struct kobject kobj;
struct device *parent;

struct device_private *p;

const char *init_name; /* initial name of the device */
const struct device_type *type;

struct bus_type *bus; /* type of bus device is on */
struct device_driver *driver; /* which driver has allocated this
device */
void *platform_data; /* Platform specific data, device
core doesn't touch it */
void *driver_data; /* Driver data, set and get with
dev_set_drvdata/dev_get_drvdata */
#ifdef CONFIG_PROVE_LOCKING
struct mutex lockdep_mutex;
#endif
struct mutex mutex; /* mutex to synchronize calls to
* its driver.
*/

struct dev_links_info links;
struct dev_pm_info power;
struct dev_pm_domain *pm_domain;

#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
struct irq_domain *msi_domain;
#endif
#ifdef CONFIG_PINCTRL
struct dev_pin_info *pins;
#endif
#ifdef CONFIG_GENERIC_MSI_IRQ
struct list_head msi_list;
#endif

const struct dma_map_ops *dma_ops;
u64 *dma_mask; /* dma mask (if dma'able device) */
u64 coherent_dma_mask;/* Like dma_mask, but for
alloc_coherent mappings as
not all hardware supports
64 bit addresses for consistent
allocations such descriptors. */
u64 bus_dma_limit; /* upstream dma constraint */
unsigned long dma_pfn_offset;

struct device_dma_parameters *dma_parms;

struct list_head dma_pools; /* dma pools (if dma'ble) */

#ifdef CONFIG_DMA_DECLARE_COHERENT
struct dma_coherent_mem *dma_mem; /* internal for coherent mem
override */
#endif
#ifdef CONFIG_DMA_CMA
struct cma *cma_area; /* contiguous memory area for dma
allocations */
#endif
/* arch specific additions */
struct dev_archdata archdata;

struct device_node *of_node; /* associated device tree node */
struct fwnode_handle *fwnode; /* firmware device node */

#ifdef CONFIG_NUMA
int numa_node; /* NUMA node this device is close to */
#endif
dev_t devt; /* dev_t, creates the sysfs "dev" */
u32 id; /* device instance */

spinlock_t devres_lock;
struct list_head devres_head;

struct class *class;
const struct attribute_group **groups; /* optional groups */

void (*release)(struct device *dev);
struct iommu_group *iommu_group;
struct iommu_fwspec *iommu_fwspec;
struct iommu_param *iommu_param;

bool offline_disabled:1;
bool offline:1;
bool of_node_reused:1;
bool state_synced:1;
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
bool dma_coherent:1;
#endif
};

这两个结构体分别是设备驱动和设备的抽象,它们都必须依附于一种总线,因此都包含bus_type的指针。

设备和驱动是分开注册的,每当某个设备或驱动注册后,都会通过其依附的总线所提供的match接口去寻找可与它配对的另一半,当匹配成功时,总线的prope接口就会被执行。

设备文件系统(devfs)

设备文件系统在linux文件系统中属于特殊的文件系统,是在linux2.4内核开始引进的,它的出现使得设备抽象成了文件系统中的文件,令驱动程序能够更好地对设备进行管理。内核中提供了如下接口用于创建和撤销对应的设备文件。

1
2
3
4
5
6
/* 创建设备目录 */
devfs_handle_t devfs_mk_dir(devfs_handle_t dir, const char *name, void *info);
/* 创建设备文件 */
devfs_handle_t devfs_register(devfs_handle_t dir, const char *name, unsigned int flags, unsigned int major, unsigned int minor, umode_t mode, void *ops, void *info);
/* 撤销设备文件 */
void devfs_unregister(devfs_handle_t de);

devfs在初期引进时对于设备管理是一个很大的启发,随着linux的进化与发展,对于机制与策略分离的设计思想而言,devfs显然不符合要求,同时其自身存在一些很难解决的问题,最后被抛弃,最终被udev所取代。

sysfs

sysfs时在linux2.6内核引入的,与提供进程和状态信息的proc文件系统十分相似,sysfs提供了包括所有系统硬件的层级视图。

sysfs把连接在系统上的设备和总线组织成一个分级的文件,它们可以由用户空间存取,向用户空间导出内核数据结构以及它们的属性.sysfs的目的就是展示设备驱动模型中各组件的层级关系。

总线、驱动和设备最终都会落实到sysfs中的一个目录,通过追踪代码,可以认为以上总线、驱动和设备的数据结构都是kobject的派生类,即kobject可以看作是总线、驱动和设备的抽象基类。一个kobject对应sysfs中的一个目录。

总线、驱动和设备中的各attribute则落实到sysfs中的一个文件。attribute会伴随着show()和store()这两个函数,分别用于读写该attribute对应的sysfs文件。

如下定义了总线、驱动及设备相关的attribute结构体。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
struct attribute {
const char *name;
umode_t mode;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
bool ignore_lockdep:1;
struct lock_class_key *key;
struct lock_class_key skey;
#endif
};

struct bus_attribute {
struct attribute attr;
ssize_t (*show)(struct bus_type *bus, char *buf);
ssize_t (*store)(sturct bus_type *bus, const char *buf, size_t count);
};

struct driver_attribute {
struct attribute attr;
ssize_t (*show)(struct device_driver *driver, char *buf);
ssize_t (*store)(sturct device_driver *driver, const char *buf, size_t count);
};

struct device_attribute {
struct attribute attr;
ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf);
ssize_t (*store)(sturct device *dev, struct device_attribute *attr, const char *buf, size_t count);
};

事实上,sysfs源于bus_type、device_driver、device,而目录中的文件则来源于attribute。


转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 yxhlfx@163.com

文章标题:Linux的文件系统与设备文件

本文作者:红尘追风

发布时间:2015-09-10, 10:26:46

原始链接:http://www.micernel.com/2015/09/10/Linux%E6%96%87%E4%BB%B6%E7%B3%BB%E7%BB%9F%E5%8F%8A%E8%AE%BE%E5%A4%87%E6%96%87%E4%BB%B6/

版权声明: "署名-非商用-相同方式共享 4.0" 转载请保留原文链接及作者。

目录