对于arp协议,内核默认的STALE状态超时时长为60秒钟。
struct neigh_table arp_tbl = {
.family = AF_INET,
.key_len = 4,
.protocol = cpu_to_be16(ETH_P_IP),
...
.id = "arp_cache",
.parms = {
...
.data = {
...
[NEIGH_VAR_GC_STALETIME] = 60 * HZ,
通过PROC文件gc_stale_time可查看和修改此时长,如下,修改为10分钟。
/ # cat /proc/sys/net/ipv4/neigh/eth0/gc_stale_time
60
/ # echo 600 > /proc/sys/net/ipv4/neigh/eth0/gc_stale_time
/ #
/ # cat /proc/sys/net/ipv4/neigh/eth0/gc_stale_time
600
内核中静态变量neigh_sysctl_table定义了gc_stale_time的PROC文件信息。
static struct neigh_sysctl_table {
struct ctl_table_header *sysctl_header;
struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
} neigh_sysctl_template __read_mostly = {
.neigh_vars = {
...
NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
1. netlink配置接口
除了以上的PROC文件外,还可使用ip ntable命令查看和修改设备的邻居表参数。
# ip ntable show dev eth0
inet arp_cache
dev eth0
refcnt 12 reachable 28884 base_reachable 30000 retrans 1000
gc_stale 60000 delay_probe 5000 queue 31
app_probes 0 ucast_probes 3 mcast_probes 3
anycast_delay 1000 proxy_delay 800 proxy_queue 64 locktime 1000
与PROC文件不同,这里显示的gc_stale时间单位为毫秒。如下将设备eth0的邻居表参数gc_stale_time修改为10分钟。
# ip ntable change name arp_cache dev eth0 gc_stale 600000
内核函数neigh_init负责以上ip ntable change命令的处理。
static int __init neigh_init(void)
{
...
rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0);
如下为neightbl_set的实现,函数nla_get_msecs读取IP命令行设置的gc_stale的毫秒值参数。
static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack)
{
struct neigh_table *tbl;
struct nlattr *tb[NDTA_MAX+1];
if (tb[NDTA_PARMS]) {
struct neigh_parms *p;
p = lookup_neigh_parms(tbl, net, ifindex);
...
for (i = 1; i <= NDTPA_MAX; i++) {
if (tbp[i] == NULL) continue;
switch (i) {
...
case NDTPA_GC_STALETIME:
NEIGH_VAR_SET(p, GC_STALETIME, nla_get_msecs(tbp[i]));
break;
对于arp协议,宏NEIGH_VAR_SET将修改全局变量arp_tbl的成员parms的data数组,具体为以NEIGH_VAR_GC_STALETIME为所对应的成员的值。函数nla_get_msecs将命令行输入的毫秒值转换为内核使用的jiffies值。
#define NEIGH_VAR_SET(p, attr, val) neigh_var_set(p, NEIGH_VAR_ ## attr, val)
static inline void neigh_var_set(struct neigh_parms *p, int index, int val)
{
set_bit(index, p->data_state);
p->data[index] = val;
}
显示命令ip ntable show由内核中的函数neightbl_fill_parms填充值,对于gc_stale的值,由nla_put_msecs填充。
static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
{
...
if ((parms->dev &&
...
nla_put_msecs(skb, NDTPA_GC_STALETIME,
NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
如下函数nla_put_msecs,其需要将内核使用gc_stale的jiffies表示的值转换为ip ntable show显示时的毫秒值,通过jiffies_to_msecs实现。
static inline int nla_put_msecs(struct sk_buff *skb, int attrtype,
unsigned long njiffies, int padattr)
{
u64 tmp = jiffies_to_msecs(njiffies);
return nla_put_64bit(skb, attrtype, sizeof(u64), &tmp, padattr);
}
2. STALE状态表项处理
在邻居表初始化时,创建了一个延迟工作项,注册的处理函数为neigh_periodic_work。
void neigh_table_init(int index, struct neigh_table *tbl)
{
tbl->parms.reachable_time =
neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
...
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
tbl->parms.reachable_time);
对于arp,其执行周期初始化为30秒钟。
struct neigh_table arp_tbl = {
...
.parms = {
.tbl = &arp_tbl,
.reachable_time = 30 * HZ,
.data = {
[NEIGH_VAR_MCAST_PROBES] = 3,
[NEIGH_VAR_UCAST_PROBES] = 3,
[NEIGH_VAR_RETRANS_TIME] = 1 * HZ,
[NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
如下处理函数neigh_periodic_work,如果邻居项的引用计数为1,并且,空闲了gc_stale定义的时长没有被使用,内核将释放此邻居项。
static void neigh_periodic_work(struct work_struct *work)
{
struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
struct neighbour *n;
struct neighbour __rcu **np;
struct neigh_hash_table *nht;
...
nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock));
...
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
np = &nht->hash_buckets[i];
while ((n = rcu_dereference_protected(*np,
lockdep_is_held(&tbl->lock))) != NULL) {
...
if (refcount_read(&n->refcnt) == 1 &&
(state == NUD_FAILED ||
time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
*np = n->next;
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
continue;
}
内核版本 5.0