Linux的文件系统与设备文件
在Linux的整体设计中,体现了一种抽象,模块与分层的设计思想,其中对于文件系统的实现也不例外。
在文件系统的实现可分为四层,分别为VFS,FS,FS Driver,Device。VFS层向上提供统一的文件系统访问接口,向下对具体的各类不同文件系统进行抽象;FS层是不同文件系统的具体实现;FS Driver层则对应与文件系统底层不同设备的访问驱动;Device层则是具体的硬件设备。
Linux文件系统实现中重要的数据结构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 struct file { union { struct llist_node fu_llist; struct rcu_head fu_rcuhead; } f_u; struct path f_path; struct inode *f_inode; /* cached value */ const struct file_operations *f_op; /* * Protects f_ep_links, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; enum rw_hint f_write_hint; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; struct mutex f_pos_lock; loff_t f_pos; struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; u64 f_version; #ifdef CONFIG_SECURITY void *f_security; #endif /* needed for tty driver, and maybe others */ void *private_data; #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct list_head f_ep_links; struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; errseq_t f_wb_err; } __randomize_layout __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { __u32 handle_bytes; int handle_type; /* file identifier */ unsigned char f_handle[0]; };
file结构体代表一个打开的文件,系统中每个打开的文件在内核空间都有一个file结构体与之对应。通常它在文件打开时创建,并传递给在文件上操作的任何函数。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 struct inode { umode_t i_mode; unsigned short i_opflags; kuid_t i_uid; kgid_t i_gid; unsigned int i_flags; #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; #endif const struct inode_operations *i_op; struct super_block *i_sb; struct address_space *i_mapping; #ifdef CONFIG_SECURITY void *i_security; #endif /* Stat data, not accessed from path walking */ unsigned long i_ino; /* * Filesystems may only read i_nlink directly. They shall use the * following functions for modification: * * (set|clear|inc|drop)_nlink * inode_(inc|dec)_link_count */ union { const unsigned int i_nlink; unsigned int __i_nlink; }; dev_t i_rdev; loff_t i_size; struct timespec64 i_atime; struct timespec64 i_mtime; struct timespec64 i_ctime; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; u8 i_write_hint; blkcnt_t i_blocks; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; #endif /* Misc */ unsigned long i_state; struct rw_semaphore i_rwsem; unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_time_when; struct hlist_node i_hash; struct list_head i_io_list; /* backing dev IO list */ #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *i_wb; /* the associated cgroup wb */ /* foreign inode detection, see wbc_detach_inode() */ int i_wb_frn_winner; u16 i_wb_frn_avg_time; u16 i_wb_frn_history; #endif struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; struct list_head i_wb_list; /* backing dev writeback list */ union { struct hlist_head i_dentry; struct rcu_head i_rcu; }; atomic64_t i_version; atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING) atomic_t i_readcount; /* struct files open RO */ #endif union { const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ void (*free_inode)(struct inode *); }; struct file_lock_context *i_flctx; struct address_space i_data; struct list_head i_devices; union { struct pipe_inode_info *i_pipe; struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; unsigned i_dir_seq; }; __u32 i_generation; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_info *i_crypt_info; #endif #ifdef CONFIG_FS_VERITY struct fsverity_info *i_verity_info; #endif void *i_private; /* fs or device private pointer */ } __randomize_layout;
inode是文件系统管理的最小单位,其中包含文件访问的权限、属主、组、大小、生成时间、访问时间、最后修改时间等信息,也是连接文件与文件目录的桥梁。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 struct bus_type { const char *name; const char *dev_name; struct device *dev_root; const struct attribute_group **bus_groups; const struct attribute_group **dev_groups; const struct attribute_group **drv_groups; int (*match)(struct device *dev, struct device_driver *drv); int (*uevent)(struct device *dev, struct kobj_uevent_env *env); int (*probe)(struct device *dev); void (*sync_state)(struct device *dev); int (*remove)(struct device *dev); void (*shutdown)(struct device *dev); int (*online)(struct device *dev); int (*offline)(struct device *dev); int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); int (*num_vf)(struct device *dev); int (*dma_configure)(struct device *dev); const struct dev_pm_ops *pm; const struct iommu_ops *iommu_ops; struct subsys_private *p; struct lock_class_key lock_key; bool need_parent_lock; };
bus_type结构体是对总线的抽象,定义了总线通用的操作接口。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 struct device_driver { const char *name; struct bus_type *bus; struct module *owner; const char *mod_name; /* used for built-in modules */ bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ enum probe_type probe_type; const struct of_device_id *of_match_table; const struct acpi_device_id *acpi_match_table; int (*probe) (struct device *dev); void (*sync_state)(struct device *dev); int (*remove) (struct device *dev); void (*shutdown) (struct device *dev); int (*suspend) (struct device *dev, pm_message_t state); int (*resume) (struct device *dev); const struct attribute_group **groups; const struct attribute_group **dev_groups; const struct dev_pm_ops *pm; void (*coredump) (struct device *dev); struct driver_private *p; }; struct device { struct kobject kobj; struct device *parent; struct device_private *p; const char *init_name; /* initial name of the device */ const struct device_type *type; struct bus_type *bus; /* type of bus device is on */ struct device_driver *driver; /* which driver has allocated this device */ void *platform_data; /* Platform specific data, device core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ #ifdef CONFIG_PROVE_LOCKING struct mutex lockdep_mutex; #endif struct mutex mutex; /* mutex to synchronize calls to * its driver. */ struct dev_links_info links; struct dev_pm_info power; struct dev_pm_domain *pm_domain; #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN struct irq_domain *msi_domain; #endif #ifdef CONFIG_PINCTRL struct dev_pin_info *pins; #endif #ifdef CONFIG_GENERIC_MSI_IRQ struct list_head msi_list; #endif const struct dma_map_ops *dma_ops; u64 *dma_mask; /* dma mask (if dma'able device) */ u64 coherent_dma_mask;/* Like dma_mask, but for alloc_coherent mappings as not all hardware supports 64 bit addresses for consistent allocations such descriptors. */ u64 bus_dma_limit; /* upstream dma constraint */ unsigned long dma_pfn_offset; struct device_dma_parameters *dma_parms; struct list_head dma_pools; /* dma pools (if dma'ble) */ #ifdef CONFIG_DMA_DECLARE_COHERENT struct dma_coherent_mem *dma_mem; /* internal for coherent mem override */ #endif #ifdef CONFIG_DMA_CMA struct cma *cma_area; /* contiguous memory area for dma allocations */ #endif /* arch specific additions */ struct dev_archdata archdata; struct device_node *of_node; /* associated device tree node */ struct fwnode_handle *fwnode; /* firmware device node */ #ifdef CONFIG_NUMA int numa_node; /* NUMA node this device is close to */ #endif dev_t devt; /* dev_t, creates the sysfs "dev" */ u32 id; /* device instance */ spinlock_t devres_lock; struct list_head devres_head; struct class *class; const struct attribute_group **groups; /* optional groups */ void (*release)(struct device *dev); struct iommu_group *iommu_group; struct iommu_fwspec *iommu_fwspec; struct iommu_param *iommu_param; bool offline_disabled:1; bool offline:1; bool of_node_reused:1; bool state_synced:1; #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) bool dma_coherent:1; #endif };
这两个结构体分别是设备驱动和设备的抽象,它们都必须依附于一种总线,因此都包含bus_type的指针。
设备和驱动是分开注册的,每当某个设备或驱动注册后,都会通过其依附的总线所提供的match接口去寻找可与它配对的另一半,当匹配成功时,总线的prope接口就会被执行。
设备文件系统(devfs) 设备文件系统在linux文件系统中属于特殊的文件系统,是在linux2.4内核开始引进的,它的出现使得设备抽象成了文件系统中的文件,令驱动程序能够更好地对设备进行管理。内核中提供了如下接口用于创建和撤销对应的设备文件。
1 2 3 4 5 6 /* 创建设备目录 */ devfs_handle_t devfs_mk_dir(devfs_handle_t dir, const char *name, void *info); /* 创建设备文件 */ devfs_handle_t devfs_register(devfs_handle_t dir, const char *name, unsigned int flags, unsigned int major, unsigned int minor, umode_t mode, void *ops, void *info); /* 撤销设备文件 */ void devfs_unregister(devfs_handle_t de);
devfs在初期引进时对于设备管理是一个很大的启发,随着linux的进化与发展,对于机制与策略分离的设计思想而言,devfs显然不符合要求,同时其自身存在一些很难解决的问题,最后被抛弃,最终被udev所取代。
sysfs sysfs时在linux2.6内核引入的,与提供进程和状态信息的proc文件系统十分相似,sysfs提供了包括所有系统硬件的层级视图。
sysfs把连接在系统上的设备和总线组织成一个分级的文件,它们可以由用户空间存取,向用户空间导出内核数据结构以及它们的属性.sysfs的目的就是展示设备驱动模型中各组件的层级关系。
总线、驱动和设备最终都会落实到sysfs中的一个目录,通过追踪代码,可以认为以上总线、驱动和设备的数据结构都是kobject的派生类,即kobject可以看作是总线、驱动和设备的抽象基类。一个kobject对应sysfs中的一个目录。
总线、驱动和设备中的各attribute则落实到sysfs中的一个文件。attribute会伴随着show()和store()这两个函数,分别用于读写该attribute对应的sysfs文件。
如下定义了总线、驱动及设备相关的attribute结构体。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 struct attribute { const char *name; umode_t mode; #ifdef CONFIG_DEBUG_LOCK_ALLOC bool ignore_lockdep:1; struct lock_class_key *key; struct lock_class_key skey; #endif }; struct bus_attribute { struct attribute attr; ssize_t (*show)(struct bus_type *bus, char *buf); ssize_t (*store)(sturct bus_type *bus, const char *buf, size_t count); }; struct driver_attribute { struct attribute attr; ssize_t (*show)(struct device_driver *driver, char *buf); ssize_t (*store)(sturct device_driver *driver, const char *buf, size_t count); }; struct device_attribute { struct attribute attr; ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf); ssize_t (*store)(sturct device *dev, struct device_attribute *attr, const char *buf, size_t count); };
事实上,sysfs源于bus_type、device_driver、device,而目录中的文件则来源于attribute。
转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 yxhlfx@163.com