檔案IO流程
用戶行程read、write在高速緩沖塊上讀寫資料,高速緩沖塊和塊設備交換資料,
- 什么時機將磁盤塊資料讀到緩沖塊?
- 什么時機將緩沖塊資料刷到磁盤塊?

函式呼叫關系
- read/write(c庫函式,通過int 80呼叫sys_read/sys_write)
- sys_read/sys_write
- block_read/block_write
- breada
- getblk
- sync_dev
- ll_rw_block
- getblk
- breada
- block_read/block_write
- sys_read/sys_write
sys_read與sys_write
代碼檔案:linux-0.11/fs/read_write.c
系統呼叫sys_read與sys_write是內核提供給用戶程式呼叫的IO介面,若IO設備是塊設備,底層分別呼叫block_read與block_write進行塊設備的讀寫,
sys_read
int sys_read(unsigned int fd,char * buf,int count)
{
struct file * file;
struct m_inode * inode;
// 通過檔案描述符,在file表中找到file結構地址
if (fd>=NR_OPEN || count<0 || !(file=current->filp[fd]))
return -EINVAL;
if (!count)
return 0;
verify_area(buf,count);
inode = file->f_inode; // 通過file的f_inode訪問inode節點
//判斷是什么設備:管道、字符設備、塊設備
//如果是塊設備,呼叫block_read讀塊設備
if (inode->i_pipe)
return (file->f_mode&1)?read_pipe(inode,buf,count):-EIO;
if (S_ISCHR(inode->i_mode))
return rw_char(READ,inode->i_zone[0],buf,count,&file->f_pos);
if (S_ISBLK(inode->i_mode))
return block_read(inode->i_zone[0],&file->f_pos,buf,count);
if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode)) {
if (count+file->f_pos > inode->i_size)
count = inode->i_size - file->f_pos;
if (count<=0)
return 0;
return file_read(inode,file,buf,count);
}
printk("(Read)inode->i_mode=%06o\n\r",inode->i_mode);
return -EINVAL;
}
sys_write
int sys_write(unsigned int fd,char * buf,int count)
{
struct file * file;
struct m_inode * inode;
if (fd>=NR_OPEN || count <0 || !(file=current->filp[fd]))
return -EINVAL;
if (!count)
return 0;
//判斷是什么設備:管道、字符設備、塊設備
//如果是塊設備,呼叫block_write讀塊設備
inode=file->f_inode;
if (inode->i_pipe)
return (file->f_mode&2)?write_pipe(inode,buf,count):-EIO;
if (S_ISCHR(inode->i_mode))
return rw_char(WRITE,inode->i_zone[0],buf,count,&file->f_pos);
if (S_ISBLK(inode->i_mode))
return block_write(inode->i_zone[0],&file->f_pos,buf,count);
if (S_ISREG(inode->i_mode))
return file_write(inode,file,buf,count);
printk("(Write)inode->i_mode=%06o\n\r",inode->i_mode);
return -EINVAL;
}
block_read與block_write
block_read與block_write負責塊設備的讀寫,他們底層呼叫breada函式獲取緩沖塊,然后在緩沖塊上讀寫資料,
block_write
代碼檔案:linux-0.11/fs/block_dev.c
int block_write(int dev, long * pos, char * buf, int count)
{
int block = *pos >> BLOCK_SIZE_BITS;// pos所在檔案資料塊號
int offset = *pos & (BLOCK_SIZE-1); // pos在資料塊中偏移值
int chars;
int written = 0;
struct buffer_head * bh; //指向當前寫緩沖塊
register char * p;
// 向緩沖塊中寫資料,通過getblk獲取緩沖塊,獲取緩沖塊的同時會讀取磁盤塊資料到緩沖塊
// 資料量較多時,通過bread一次性快取3個磁盤塊資料到緩沖塊,減小磁盤IO次數
while (count>0) {
chars = BLOCK_SIZE - offset;
if (chars > count)
chars=count;
if (chars == BLOCK_SIZE)
//獲取高速緩沖塊,并建立其與磁盤塊的映射關系
bh = getblk(dev,block);
else
// 讀取的資料超過一個磁盤塊,呼叫breada讀多個塊
// breada底層呼叫getblk快取3個連續磁盤塊的資料
bh = breada(dev,block,block+1,block+2,-1);
block++;
if (!bh)
return written?written:-EIO;
p = offset + bh->b_data;
offset = 0;
*pos += chars;
written += chars;
count -= chars;
while (chars-->0)
*(p++) = get_fs_byte(buf++);
//完成對緩沖塊的資料寫入后,設定緩沖塊的修改位dirt,然后釋放緩沖塊(參考計數減一)
bh->b_dirt = 1;
brelse(bh);
}
return written;
}
block_read
代碼檔案:linux-0.11/fs/block_dev.c
int block_read(int dev, unsigned long * pos, char * buf, int count)
{
int block = *pos >> BLOCK_SIZE_BITS;
int offset = *pos & (BLOCK_SIZE-1);
int chars;
int read = 0;
struct buffer_head * bh;
register char * p;
while (count>0) {
chars = BLOCK_SIZE-offset;
if (chars > count)
chars = count;
if (!(bh = breada(dev,block,block+1,block+2,-1)))
return read?read:-EIO;
block++;
p = offset + bh->b_data;
offset = 0;
*pos += chars;
read += chars;
count -= chars;
while (chars-->0)
put_fs_byte(*(p++),buf++);
//完成對緩沖塊的資料讀取之后,釋放緩沖塊(參考計數減一)
brelse(bh);
}
return read;
}
bread
代碼檔案:linux-0.11/fs/buffer.c
- bread:塊讀取函式
- breada:塊提前預讀函式
- bread_page:頁塊讀取函式,一個記憶體頁通常為4k大小、磁盤塊通常為1k大小
bread、breada、bread_page三者功能相似,用法不同,三者均會呼叫getblk獲取緩沖塊,并呼叫ll_rw_block讀資料到緩沖塊,
struct buffer_head * bread(int dev,int block)
{
struct buffer_head * bh;
if (!(bh=getblk(dev,block)))
panic("bread: getblk returned NULL\n");
if (bh->b_uptodate)
return bh;
// 呼叫ll_rw_block讀磁盤塊資料到緩沖區
ll_rw_block(READ,bh);
wait_on_buffer(bh);
if (bh->b_uptodate)
return bh;
brelse(bh);
return NULL;
}
getblk
代碼檔案:linux-0.11/fs/buffer.c
bread系列函式通過getblk獲取緩沖塊,在必要的時候,會呼叫sync_dev函式將臟緩沖塊資料寫入磁盤,
getblk代碼邏輯復雜,需要對資源可用性進行復雜的檢查,資源不可用時,需要睡眠,被喚醒之后又要進行一些檢查判斷資源是否可用,復雜邏輯可以暫時不考慮,避免陷入代碼細節,
僅考慮getblk獲取空閑塊之后的代碼邏輯,getblk獲取可用緩沖塊后,若緩沖塊dirt位為1,表示緩沖塊有資料未同步到磁盤,getblk將呼叫sync_dev將資料同步到磁盤,然后占用該緩沖塊,
struct buffer_head * getblk(int dev,int block)
{
struct buffer_head * tmp, * bh;
repeat:
// 搜索hash表,如果指定塊已經在高速緩沖中,則回傳對應緩沖區頭指標,退出,
if ((bh = get_hash_table(dev,block)))
return bh;
// 掃描空閑資料塊鏈表,尋找空閑緩沖區,
tmp = free_list;
do {
// 如果該緩沖區正被使用(參考計數不等于0)
if (tmp->b_count)
continue;
// 找到可用緩沖塊,且滿足一些條件
if (!bh || BADNESS(tmp)<BADNESS(bh)) {
bh = tmp;
if (!BADNESS(tmp))
break;
}
/* and repeat until we find something good */
} while ((tmp = tmp->b_next_free) != free_list);
// 沒有可用緩沖塊,則睡眠等待有空閑緩沖塊可用,
// 當有空閑緩沖塊可用時本行程會被的喚醒,
if (!bh) {
sleep_on(&buffer_wait); //睡眠在緩沖區上
goto repeat;
}
//等待緩沖區解鎖?
wait_on_buffer(bh);
if (bh->b_count)
goto repeat;
// 分配到的緩沖塊dirt位為1(表示有資料未同步到磁盤)
// 呼叫sync_dev將資料同步到磁盤,并睡眠在該緩沖塊上
while (bh->b_dirt) {
sync_dev(bh->b_dev);
wait_on_buffer(bh);
if (bh->b_count)
goto repeat;
}
/* NOTE!! While we slept waiting for this block, somebody else might */
/* already have added "this" block to the cache. check it */
if (find_buffer(dev,block))
goto repeat;
/* OK, FINALLY we know that this buffer is the only one of it's kind, */
/* and that it's unused (b_count=0), unlocked (b_lock=0), and clean */
// 對空閑緩沖塊的處理
// 占用空閑緩沖塊,置參考計數為1,復位修改標志和有效(更新)標志,
bh->b_count=1;
bh->b_dirt=0;
bh->b_uptodate=0;
// 從原hash佇列和空閑佇列塊鏈表中移出該緩沖區頭,根據此新的設備號和塊號重新插入空閑鏈表和hash佇列
// 讓該緩沖區用于指定設備和其上的指定塊,
// 根據此新的設備號和塊號重新哈希,并插入回應的hash佇列
remove_from_queues(bh);
bh->b_dev=dev;
bh->b_blocknr=block; //加鎖
insert_into_queues(bh);
return bh;
}
sync_dev
代碼檔案:linux-0.11/fs/buffer.c
呼叫ll_rw_block將緩沖塊內資料寫入磁盤,getblk管理緩沖塊時,若其它行程需要某緩沖塊,且緩沖塊具有臟(dirt位為1)資料,呼叫sync_dev將資料寫入磁盤,
int sync_dev(int dev)
{
int i;
struct buffer_head * bh;
bh = start_buffer;
for (i=0 ; i<NR_BUFFERS ; i++,bh++) {
if (bh->b_dev != dev)
continue;
wait_on_buffer(bh);
if (bh->b_dev == dev && bh->b_dirt)
// 呼叫ll_rw_block寫緩沖區資料到磁盤塊
ll_rw_block(WRITE,bh);
}
bh = start_buffer;
for (i=0 ; i<NR_BUFFERS ; i++,bh++) {
if (bh->b_dev != dev)
continue;
wait_on_buffer(bh);
if (bh->b_dev == dev && bh->b_dirt)
ll_rw_block(WRITE,bh);
}
return 0;
}
ll_rw_block
代碼檔案:linux-0.11/kernel/blk_drv/ll_rw_blk.c
將緩沖塊的資料寫入磁盤塊,獲將磁盤塊資料讀入緩沖塊,底層通過設備請求佇列完成讀寫,
void ll_rw_block(int rw, struct buffer_head * bh)
{
unsigned int major;
if ((major=MAJOR(bh->b_dev)) >= NR_BLK_DEV ||
!(blk_dev[major].request_fn)) {
printk("Trying to read nonexistent block-device\n\r");
return;
}
// 將讀寫請求加入設備請求佇列
make_request(major,rw,bh);
}
設備中斷處理程式
代碼檔案:linux-0.11/kernel/blk_drv/hd.c
- 讀完成中斷處理程式
設備完成讀扇區資料后,發出讀中斷,讀中斷處理程式read_intr執行,若當前讀請求還有資料要讀,則繼續完成當前請求的資料讀,因為,一次讀請求可能讀若干連續扇區資料,磁盤每次只能寫讀一個扇區資料,完成一次讀請求的所有資料讀之后,將呼叫do_hd_request處理下一個寫請求,
static void read_intr(void)
{
if (win_result()) {
bad_rw_intr();
do_hd_request();
return;
}
port_read(HD_DATA,CURRENT->buffer,256);
CURRENT->errors = 0;
CURRENT->buffer += 512;
CURRENT->sector++;
if (--CURRENT->nr_sectors) {
do_hd = &read_intr;
return;
}
end_request(1);
do_hd_request();
}
- 寫完成中斷處理程式
與寫完成中斷處理程式程序類似,
static void write_intr(void)
{
if (win_result()) {
bad_rw_intr();
do_hd_request(); //處理下一個請求
return;
}
if (--CURRENT->nr_sectors) {
CURRENT->sector++;
CURRENT->buffer += 512;
do_hd = &write_intr;
port_write(HD_DATA,CURRENT->buffer,256);
return;
}
end_request(1);
do_hd_request();
}
- 處理讀寫佇列請求
處理設備請求佇列的讀寫請求,設備中斷處理程式不斷呼叫do_hd_request處理請求佇列,直到請求佇列為空,
void do_hd_request(void)
{
int i,r = 0;
unsigned int block,dev;
unsigned int sec,head,cyl;
unsigned int nsect;
INIT_REQUEST;
dev = MINOR(CURRENT->dev);
block = CURRENT->sector;
if (dev >= 5*NR_HD || block+2 > hd[dev].nr_sects) {
end_request(0);
goto repeat;
}
block += hd[dev].start_sect;
dev /= 5;
__asm__("divl %4":"=a" (block),"=d" (sec):"0" (block),"1" (0),
"r" (hd_info[dev].sect));
__asm__("divl %4":"=a" (cyl),"=d" (head):"0" (block),"1" (0),
"r" (hd_info[dev].head));
sec++;
nsect = CURRENT->nr_sectors;
if (reset) {
reset = 0;
recalibrate = 1;
reset_hd(CURRENT_DEV);
return;
}
if (recalibrate) {
recalibrate = 0;
hd_out(dev,hd_info[CURRENT_DEV].sect,0,0,0,
WIN_RESTORE,&recal_intr);
return;
}
if (CURRENT->cmd == WRITE) {
hd_out(dev,nsect,sec,head,cyl,WIN_WRITE,&write_intr);
for(i=0 ; i<3000 && !(r=inb_p(HD_STATUS)&DRQ_STAT) ; i++)
/* nothing */ ;
if (!r) {
bad_rw_intr();
goto repeat;
}
port_write(HD_DATA,CURRENT->buffer,256);
} else if (CURRENT->cmd == READ) {
hd_out(dev,nsect,sec,head,cyl,WIN_READ,&read_intr);
} else
panic("unknown hd-command");
}
轉載請註明出處,本文鏈接:https://www.uj5u.com/caozuo/453731.html
標籤:Linux
下一篇:Linux之export命令
