时间:2023-05-19 15:17
人气:
作者:admin
背景
最近在排查一个网络问题,ifconfig eth0 up 后,网卡link up比较慢。因此,分析了下从ifconfig up 到网络驱动的调用流程。这里顺便作个记录。
ifconfig eth0 up 调用的是busybox 的命令,因此从busybox 源码入手,逐步分析下调用流程。代码介绍文件位于:networking/ifenslave.c
ifconfig eth0 up
ifconfig eth0 up 和 ifconfig eth0 down 分别对应busybox 的set_if_up()和set_if_down().
staticintset_if_down(char*ifname,intflags)
{
intres=set_if_flags(ifname,flags&~IFF_UP);
if(res)
bb_perror_msg("%s:can'tdown",ifname);
returnres;
}
staticintset_if_up(char*ifname,intflags)
{
intres=set_if_flags(ifname,flags|IFF_UP);
if(res)
bb_perror_msg("%s:can'tup",ifname);
returnres;
}
比如,当我们敲ifconfig eth0 down时,实则就是调用:
set_if_down("eth0",master_flags.ifr_flags);
set_if_flags()会将网卡名,up / down 标志位flags通过ioctl命令SIOCSIFFLAGS 传递给内核网卡驱动。
staticintset_if_flags(char*ifname,intflags)
{
structifreqifr;
ifr.ifr_flags=flags;
returnset_ifrname_and_do_ioctl(SIOCSIFFLAGS,&ifr,ifname);
}
dev_ifsioc
接着深入到内核代码中,看下SIOCSIFFLAGS命令在哪里实现。代码位于kernel etcoredev_ioctl.c。
staticintdev_ifsioc(structnet*net,structifreq*ifr,unsignedintcmd)
{
interr;
structnet_device*dev=__dev_get_by_name(net,ifr->ifr_name);
conststructnet_device_ops*ops;
if(!dev)
return-ENODEV;
ops=dev->netdev_ops;
switch(cmd){
caseSIOCSIFFLAGS:/*Setinterfaceflags*/
returndev_change_flags(dev,ifr->ifr_flags);
caseSIOCSIFMETRIC:/*Setthemetricontheinterface
(currentlyunused)*/
return-EOPNOTSUPP;
...................
}
returnerr;
}
dev_ifsioc()会调用__dev_get_by_name()根据 网卡名遍历 net链表,如果匹配到则返回net_device结构体指针。接着,SIOCSIFFLAGS会调用到dev_change_flags(),最后调用到__dev_change_flags()。
dev_change_flags
intdev_change_flags(structnet_device*dev,unsignedintflags)
{
intret;
unsignedintchanges,old_flags=dev->flags,old_gflags=dev->gflags;
ret=__dev_change_flags(dev,flags);
if(ret< 0)
return ret;
changes = (old_flags ^ dev->flags)|(old_gflags^dev->gflags);
__dev_notify_flags(dev,old_flags,changes);
returnret;
}
int__dev_change_flags(structnet_device*dev,unsignedintflags)
{
unsignedintold_flags=dev->flags;
intret;
ASSERT_RTNL();
/*
*Settheflagsonourdevice.
*/
dev->flags=(flags&(IFF_DEBUG|IFF_NOTRAILERS|IFF_NOARP|
IFF_DYNAMIC|IFF_MULTICAST|IFF_PORTSEL|
IFF_AUTOMEDIA))|
(dev->flags&(IFF_UP|IFF_VOLATILE|IFF_PROMISC|
IFF_ALLMULTI));
/*
*Loadinthecorrectmulticastlistnowtheflagshavechanged.
*/
if((old_flags^flags)&IFF_MULTICAST)
dev_change_rx_flags(dev,IFF_MULTICAST);
dev_set_rx_mode(dev);
/*
*Havewedownedtheinterface.WehandleIFF_UPourselves
*accordingtouserattemptstosetit,ratherthanblindly
*settingit.
*/
ret=0;
/*两个标识有一个是IFF_UP*/
if((old_flags^flags)&IFF_UP)
ret=((old_flags&IFF_UP)?__dev_close:__dev_open)(dev);//通过flags判断调用__dev_close还是__dev_open
if((flags^dev->gflags)&IFF_PROMISC){
intinc=(flags&IFF_PROMISC)?1:-1;
unsignedintold_flags=dev->flags;
dev->gflags^=IFF_PROMISC;
if(__dev_set_promiscuity(dev,inc,false)>=0)
if(dev->flags!=old_flags)
dev_set_rx_mode(dev);
}
/*NOTE:orderofsynchronizationofIFF_PROMISCandIFF_ALLMULTI
isimportant.Some(broken)driverssetIFF_PROMISC,when
IFF_ALLMULTIisrequestednotaskingusandnotreporting.
*/
if((flags^dev->gflags)&IFF_ALLMULTI){
intinc=(flags&IFF_ALLMULTI)?1:-1;
dev->gflags^=IFF_ALLMULTI;
__dev_set_allmulti(dev,inc,false);
}
returnret;
}
在__dev_change_flags(dev, flags)函数中,通过判断flag的IFF_UP位上的值是否相反,来实现是调用__dev_close()还是__dev_open()来开关eth0。
__dev_close
__dev_close中会将当前的net_device加入到等待设备关闭列表中。
staticint__dev_close(structnet_device*dev)
{
intretval;
LIST_HEAD(single);
list_add(&dev->close_list,&single);
retval=__dev_close_many(&single);
list_del(&single);
returnretval;
}
__dev_close_many
__dev_close_many通知设备正在关闭,等待未发送完的数据发送完,最后清除开启标记。
staticint__dev_close_many(structlist_head*head)
{
structnet_device*dev;
ASSERT_RTNL();
might_sleep();
list_for_each_entry(dev,head,close_list){
/*Temporarilydisablenetpolluntiltheinterfaceisdown*/
/*禁用netpoll*/
netpoll_poll_disable(dev);
/*通知设备正在关闭*/
call_netdevice_notifiers(NETDEV_GOING_DOWN,dev);
/*清除start标志位*/
clear_bit(__LINK_STATE_START,&dev->state);
/*Synchronizetoscheduledpoll.Wecannottouchpolllist,it
*canbeevenondifferentcpu.Sojustclearnetif_running().
*
*dev->stop()willinvokenapi_disable()onallofit's
*napi_structinstancesonthisdevice.
*/
smp_mb__after_atomic();/*Commitnetif_running().*/
}
/*未发送完的数据发送完*/
dev_deactivate_many(head);
list_for_each_entry(dev,head,close_list){
conststructnet_device_ops*ops=dev->netdev_ops;
/*
*Callthedevicespecificclose.Thiscannotfail.
*OnlyifdeviceisUP
*
*WeallowittobecalledevenafteraDETACHhot-plug
*event.
*/
/*调用设备关闭操作*/
if(ops->ndo_stop)
ops->ndo_stop(dev);
/*标记设备关闭*/
dev->flags&=~IFF_UP;
/*启用netpoll*/
netpoll_poll_enable(dev);
}
return0;
}
ndo_stop
ndo_stop为关闭网卡时,不同网卡驱动注册的不同的关闭函数,我们以海思的网卡驱动为例,分析下ndo_stop函数的实现。代码位于kerneldrivers etethernethisiliconhnshns_enet.c。
hns_nic_net_stop
staticinthns_nic_net_stop(structnet_device*ndev)
{
hns_nic_net_down(ndev);
return0;
}
hns_nic_net_down
staticvoidhns_nic_net_down(structnet_device*ndev)
{
inti;
structhnae_ae_ops*ops;
structhns_nic_priv*priv=netdev_priv(ndev);
if(test_and_set_bit(NIC_STATE_DOWN,&priv->state))
return;
(void)del_timer_sync(&priv->service_timer);
netif_tx_stop_all_queues(ndev);
netif_carrier_off(ndev);
netif_tx_disable(ndev);
priv->link=0;
if(priv->phy)
phy_stop(priv->phy);
ops=priv->ae_handle->dev->ops;
if(ops->stop)
ops->stop(priv->ae_handle);
netif_tx_stop_all_queues(ndev);
for(i=priv->ae_handle->q_num-1;i>=0;i--){
hns_nic_ring_close(ndev,i);
hns_nic_ring_close(ndev,i+priv->ae_handle->q_num);
/*cleantxbuffers*/
hns_nic_tx_clr_all_bufs(priv->ring_data+i);
}
}
hns_nic_net_down()中会调用netif_carrier_off()通知内核子系统网络断开。下面我们详细分析下netif_carrier_off()的实现。
netif_carrier_off()
voidnetif_carrier_off(structnet_device*dev)
{
/*设置网卡为载波断开状态即nocarrier状态,上行时软中断下半部读到该状态不会进行网卡收包*/
if(!test_and_set_bit(__LINK_STATE_NOCARRIER,&dev->state)){
if(dev->reg_state==NETREG_UNINITIALIZED)
return;
/*增加设备改变状态*/
atomic_inc(&dev->carrier_changes);
/*加入事件处理队列进行处理*/
linkwatch_fire_event(dev);
}
}
linkwatch_fire_event()
linkwatch_fire_event()函数将设备加入到事件队列,并且进行事件调度,调度中会根据是否为紧急事件做不同处理。
voidlinkwatch_fire_event(structnet_device*dev)
{
/*判断是否是紧急处理的事件*/
boolurgent=linkwatch_urgent_event(dev);
/*判断是否是紧急处理的事件*/
if(!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING,&dev->state)){
/*添加事件到事件列表*/
linkwatch_add_event(dev);
}elseif(!urgent)
/*设备以前已经设置了pending标记,不是紧急事件,直接返回*/
return;
/*事件调度*/
linkwatch_schedule_work(urgent);
}
linkwatch_urgent_event()
linkwatch_urgent_event()判断是否是否需要紧急处理。
staticboollinkwatch_urgent_event(structnet_device*dev)
{
/*设备未运行,非紧急*/
if(!netif_running(dev))
returnfalse;
/*设备的索引号与连接索引号不等,紧急*/
if(dev->ifindex!=dev_get_iflink(dev))
returntrue;
/*设备作为teamport,紧急*/
if(dev->priv_flags&IFF_TEAM_PORT)
returntrue;
/*连接与否&&发送队列排队规则改变与否*/
returnnetif_carrier_ok(dev)&&qdisc_tx_changing(dev);
}
linkwatch_add_event()
linkwatch_add_event()将设备加入到事件处理链表。
staticvoidlinkwatch_add_event(structnet_device*dev)
{
unsignedlongflags;
spin_lock_irqsave(&lweventlist_lock,flags);
/*若未添加,则添加设备到事件列表*/
if(list_empty(&dev->link_watch_list)){
list_add_tail(&dev->link_watch_list,&lweventlist);
dev_hold(dev);
}
spin_unlock_irqrestore(&lweventlist_lock,flags);
}
linkwatch_schedule_work()
linkwatch_schedule_work()对事件处理进行调度,紧急事件立即执行,非紧急事件延后执行。
staticvoidlinkwatch_schedule_work(inturgent)
{
unsignedlongdelay=linkwatch_nextevent-jiffies;
/*已经设置了紧急标记,则返回*/
if(test_bit(LW_URGENT,&linkwatch_flags))
return;
/*需要紧急调度*/
if(urgent){
/*之前设置了,则返回*/
if(test_and_set_bit(LW_URGENT,&linkwatch_flags))
return;
/*未设置紧急,则立即执行*/
delay=0;
}
/*如果大于1s则立即执行*/
if(delay>HZ)
delay=0;
/*如果设置了紧急标记,则立即执行*/
if(test_bit(LW_URGENT,&linkwatch_flags))
mod_delayed_work(system_wq,&linkwatch_work,0);
else
/*未设置紧急标记,则按照delay执行*/
schedule_delayed_work(&linkwatch_work,delay);
}
__linkwatch_run_queue()
__linkwatch_run_queue()完成对事件调度队列中设备的处理。
staticvoid__linkwatch_run_queue(inturgent_only)
{
structnet_device*dev;
LIST_HEAD(wrk);
/*
*Limitthenumberoflinkwatcheventstoone
*persecondsothatarunawaydriverdoesnot
*causeastormofmessagesonthenetlink
*socket.Thislimitdoesnotapplytoupevents
*whilethedeviceqdiscisdown.
*/
/*已达到调度时间*/
if(!urgent_only)
linkwatch_nextevent=jiffies+HZ;
/*Limitwrap-aroundeffectondelay.*/
/*
未到达调度时间,并且下一次调度在当前时间的1s以后
那么设置调度时间是当前时间
*/
elseif(time_after(linkwatch_nextevent,jiffies+HZ))
linkwatch_nextevent=jiffies;
/*清除紧急标识*/
clear_bit(LW_URGENT,&linkwatch_flags);
spin_lock_irq(&lweventlist_lock);
list_splice_init(&lweventlist,&wrk);
/*遍历链表*/
while(!list_empty(&wrk)){
/*获取设备*/
dev=list_first_entry(&wrk,structnet_device,link_watch_list);
/*从链表移除设备*/
list_del_init(&dev->link_watch_list);
/*未到达调度时间&&不需要紧急处理*/
if(urgent_only&&!linkwatch_urgent_event(dev)){
/*添加到链表尾部*/
list_add_tail(&dev->link_watch_list,&lweventlist);
/*继续处理*/
continue;
}
spin_unlock_irq(&lweventlist_lock);
/*处理设备*/
linkwatch_do_dev(dev);
spin_lock_irq(&lweventlist_lock);
}
/*链表有未处理事件,则以非紧急状态调度队列*/
if(!list_empty(&lweventlist))
linkwatch_schedule_work(0);
spin_unlock_irq(&lweventlist_lock);
}
linkwatch_do_dev()
linkwatch_do_dev()完成对某个设备的状态改变处理。
staticvoidlinkwatch_do_dev(structnet_device*dev)
{
/*
*Makesuretheabovereadiscompletesinceitcanbe
*rewrittenassoonasweclearthebitbelow.
*/
smp_mb__before_atomic();
/*Weareabouttohandlethisdevice,
*soneweventscanbeaccepted
*/
/*清除pending标记*/
clear_bit(__LINK_STATE_LINKWATCH_PENDING,&dev->state);
rfc2863_policy(dev);
/*如果设备启动状态*/
if(dev->flags&IFF_UP){
/*链路连接*/
if(netif_carrier_ok(dev))
/*启用排队规则*/
dev_activate(dev);
else
/*关闭排队规则*/
dev_deactivate(dev);
/*设备状态改变处理,执行netdev_chain上设备状态变更回调*/
netdev_state_change(dev);
}
dev_put(dev);
}
phy_stop()
最后,hns_nic_net_down()中会调用phy_stop()将网卡link down。
voidphy_stop(structphy_device*phydev)
{
mutex_lock(&phydev->lock);
if(PHY_HALTED==phydev->state)
gotoout_unlock;
if(phy_interrupt_is_valid(phydev)){
/*DisablePHYInterrupts*/
phy_config_interrupt(phydev,PHY_INTERRUPT_DISABLED);
/*Clearanypendinginterrupts*/
phy_clear_interrupt(phydev);
}
phydev->state=PHY_HALTED;
out_unlock:
mutex_unlock(&phydev->lock);
/*Cannotcallflush_scheduled_work()hereasdesiredbecause
*ofrtnl_lock(),butPHY_HALTEDshallguaranteephy_change()
*willnotreenableinterrupts.
*/
}
phy_stop()将phydev->state设置为PHY_HALTED,将网卡关闭。
__dev_open
__dev_open为设备启用核心函数,该函数打开eth0,设置启用标记,并且设置接收模式,排队规则等。
staticint__dev_open(structnet_device*dev)
{
conststructnet_device_ops*ops=dev->netdev_ops;
intret;
ASSERT_RTNL();
/*设备不可用*/
if(!netif_device_present(dev))
return-ENODEV;
/*Blocknetpollfromtryingtodoanyrxpathservicing.
*Ifwedon'tdothisthereisachancendo_poll_controller
*orndo_pollmayberunningwhileweopenthedevice
*/
/*禁用netpoll*/
netpoll_poll_disable(dev);
/*设备打开前通知*/
ret=call_netdevice_notifiers(NETDEV_PRE_UP,dev);
ret=notifier_to_errno(ret);
if(ret)
returnret;
/*设置设备打开标记,设备将设置IFF_UP标志位*/
set_bit(__LINK_STATE_START,&dev->state);
/*校验地址*/
if(ops->ndo_validate_addr)
ret=ops->ndo_validate_addr(dev);
/*执行打开*/
if(!ret&&ops->ndo_open)
ret=ops->ndo_open(dev);
/*启用netpoll*/
netpoll_poll_enable(dev);
/*失败,清除打开标记*/
if(ret)
clear_bit(__LINK_STATE_START,&dev->state);
/*设备打开操作*/
else{
/*设置打开标记*/
dev->flags|=IFF_UP;
/*设置接收模式*/
dev_set_rx_mode(dev);
/*初始化排队规则*/
dev_activate(dev);
/*加入设备数据到熵池*/
add_device_randomness(dev->dev_addr,dev->addr_len);
}
returnret;
}
hns_nic_net_open()
我们以海思的网卡驱动为例,分析下ndo_open()函数的实现。代码位于kerneldrivers etethernethisiliconhnshns_enet.c。
staticinthns_nic_net_open(structnet_device*ndev)
{
structhns_nic_priv*priv=netdev_priv(ndev);
structhnae_handle*h=priv->ae_handle;
intret;
if(test_bit(NIC_STATE_TESTING,&priv->state))
return-EBUSY;
priv->link=0;
netif_carrier_off(ndev);
/*设置txqueue的个数*/
ret=netif_set_real_num_tx_queues(ndev,h->q_num);
if(ret< 0) {
netdev_err(ndev, "netif_set_real_num_tx_queues fail, ret=%d!
",
ret);
return ret;
}
/*设置rx queue的个数*/
ret = netif_set_real_num_rx_queues(ndev, h->q_num);
if(ret< 0) {
netdev_err(ndev,
"netif_set_real_num_rx_queues fail, ret=%d!
", ret);
return ret;
}
/*启动网卡*/
ret = hns_nic_net_up(ndev);
if (ret) {
netdev_err(ndev,
"hns net up fail, ret=%d!
", ret);
return ret;
}
return 0;
}
hns_nic_net_up()
staticinthns_nic_net_up(structnet_device*ndev)
{
structhns_nic_priv*priv=netdev_priv(ndev);
structhnae_handle*h=priv->ae_handle;
inti,j,k;
intret;
/*初始化中断,并设置中断函数为hns_irq_handle,每个rx和txqueue都对应一个中断*/
ret=hns_nic_init_irq(priv);
if(ret!=0){
netdev_err(ndev,"hnsinitirqfailed!ret=%d
",ret);
returnret;
}
for(i=0;i< h->q_num*2;i++){
/*使能中断,使能napi*/
ret=hns_nic_ring_open(ndev,i);
if(ret)
gotoout_has_some_queues;
}
for(k=0;k< h->q_num;k++)
h->dev->ops->toggle_queue_status(h->qs[k],1);
/*设置mac地址*/
ret=h->dev->ops->set_mac_addr(h,ndev->dev_addr);
if(ret)
gotoout_set_mac_addr_err;
/*hns的start函数为null*/
ret=h->dev->ops->start?h->dev->ops->start(h):0;
if(ret)
gotoout_start_err;
if(priv->phy)
/*启动phy*/
phy_start(priv->phy);
clear_bit(NIC_STATE_DOWN,&priv->state);
/*修改time每一秒到期一次*/
(void)mod_timer(&priv->service_timer,jiffies+SERVICE_TIMER_HZ);
return0;
out_start_err:
netif_stop_queue(ndev);
out_set_mac_addr_err:
for(k=0;k< h->q_num;k++)
h->dev->ops->toggle_queue_status(h->qs[k],0);
out_has_some_queues:
for(j=i-1;j>=0;j--)
hns_nic_ring_close(ndev,j);
set_bit(NIC_STATE_DOWN,&priv->state);
returnret;
}
phy_start()
最后会调用到phy_start()启动网卡。
voidphy_start(structphy_device*phydev)
{
booldo_resume=false;
interr=0;
mutex_lock(&phydev->lock);
switch(phydev->state){
casePHY_STARTING:
phydev->state=PHY_PENDING;
break;
casePHY_READY:
phydev->state=PHY_UP;
break;
casePHY_HALTED:
/*makesureinterruptsarere-enabledforthePHY*/
err=phy_enable_interrupts(phydev);
if(err< 0)
break;
phydev->state=PHY_RESUMING;
do_resume=true;
break;
default:
break;
}
mutex_unlock(&phydev->lock);
/*ifphywassuspended,bringthephysicallinkupagain*/
if(do_resume)
phy_resume(phydev);
}
审核编辑:刘清
下一篇:求一种力传感器组合测试方案
从profibusDP转ModbusTCP,一网打尽转换技巧!