如下命令创建LCP接口对,VPP物理接口eth0,对应linux接口host-eth0。LCP插件创建了virtio类型的tap1接口,用于和host-eth0连通。

vpp# lcp create eth0 host-if host-eth0
vpp#
vpp#
vpp# show lcp
itf-pair: [0] eth0 tap1 host-eth0 10 type tap
vpp#
vpp#
vpp# show interface
              Name               Idx    State  MTU (L3/IP4/IP6/MPLS)     Counter          Count
eth0                              1     down         9000/0/0/0
tap1                              2      up       1920099631/0/0/0

如下定义节点linux-cp-xc-ip4,类型VLIB_NODE_TYPE_INTERNAL。节点属于ip-unicast/ip4-multicast 特性arc。用于处理linux发往VPP的流量。

VLIB_REGISTER_NODE (lcp_xc_ip4) = { .name = "linux-cp-xc-ip4",
                    .vector_size = sizeof (u32),
                    .format_trace = format_lcp_xc_trace,
                    .type = VLIB_NODE_TYPE_INTERNAL,
                    .sibling_of = "ip4-rewrite" };

VNET_FEATURE_INIT (lcp_xc_ip4_ucast_node, static) = {
  .arc_name = "ip4-unicast",
  .node_name = "linux-cp-xc-ip4",
};
VNET_FEATURE_INIT (lcp_xc_ip4_mcast_node, static) = {
  .arc_name = "ip4-multicast",
  .node_name = "linux-cp-xc-ip4",
};

节点处理函数lcp_xc_ip4如下,实际处理由函数lcp_xc_inline完成。

VLIB_NODE_FN (lcp_xc_ip4)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
  return (lcp_xc_inline (vm, node, frame, AF_IP4));
}

函数lcp_xc_inline用户处理有linux接收到的IPv4或者IPv6报文。根据参数af,选择对应的ip_lookup_main_t结构。

static_always_inline u32
lcp_xc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
           ip_address_family_t af)
{
  u32 n_left_from, *from, *to_next, n_left_to_next;
  lcp_xc_next_t next_index;
  ip_lookup_main_t *lm;

  next_index = 0;
  n_left_from = frame->n_vectors;
  from = vlib_frame_vector_args (frame);

  if (AF_IP4 == af)
    lm = &ip4_main.lookup_main;
  else
    lm = &ip6_main.lookup_main;

开始时,并不能确定处理报文的下一个节点,假定下一个节点索引为0,vlib_get_next_frame函数获取此节点当前可接收的向量起始位置(to_next)和数量(n_left_to_next)。随后,如果报文的下一节点索引不是0,将进行确认和修复。

  while (n_left_from > 0)
  {
    vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
    while (n_left_from > 0 && n_left_to_next > 0)
    {
      const ethernet_header_t *eth;
      const lcp_itf_pair_t *lip;
      u32 next0, bi0, lipi, ai;
      vlib_buffer_t *b0;
      const ip_adjacency_t *adj;

      bi0 = to_next[0] = from[0];

      from += 1;
      to_next += 1;
      n_left_from -= 1;
      n_left_to_next -= 1;

根据接收到报文的接口索引(tap1索引2),找到LCP接口对,进而找到对应的VPP物理接口(lip_phy_sw_if_index为eth0),此接口为报文的出接口(VLIB_TX)。

      b0 = vlib_get_buffer (vm, bi0);

      lipi = lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
      lip = lcp_itf_pair_get (lipi);

      vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;

根据报文的MAC层数据信息,和出接口,找到邻居索引,进而找到ip_adjacency_t结构。其中保存的下一个节点信息next0,为报文要发往的下一个节点(如果与next_index不相等,说明之前假设的有误)。

      vlib_buffer_advance (b0, -lip->lip_rewrite_len);
      eth = vlib_buffer_get_current (b0);

      ai = ADJ_INDEX_INVALID;
      if (!ethernet_address_cast (eth->dst_address))
        ai = lcp_adj_lkup ((u8 *) eth, lip->lip_rewrite_len,
                   vnet_buffer (b0)->sw_if_index[VLIB_TX]);
      if (ai == ADJ_INDEX_INVALID)
        ai = lip->lip_phy_adjs.adj_index[af];

      adj = adj_get (ai);
      vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
      next0 = adj->rewrite_header.next_index;
      vnet_buffer (b0)->ip.save_rewrite_length = lip->lip_rewrite_len;

      if (PREDICT_FALSE (adj->rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
        vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index,
          vnet_buffer (b0)->sw_if_index[VLIB_TX], &next0, b0, adj->ia_cfg_index);

如果开启了trace命令,填充trace信息,包括VPP出接口索引和邻居索引。

      if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED))) {
          lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
          t->phy_sw_if_index = lip->lip_phy_sw_if_index;
          t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
      }

函数vlib_validate_buffer_enqueue_x1根据正确的下一跳节点next0,进行修正。

      vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
                       n_left_to_next, bi0, next0);
    }
    vlib_put_next_frame (vm, node, next_index, n_left_to_next);
  }

  return frame->n_vectors;

如果next0与next_index相等,表明while循环开始时使用的节点索引是正确的,不需要修正。否则,还原错误使用的节点(或者,下一节点改变了),随后,根据正确下一节点索引,获取其可用的向量起始位置,和可用的数量。将报文索引bi0赋值到下一节点第一个可用位置,并增加1,可用的位置数量减少1。

#define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
do {                                    \
  ASSERT (bi0 != 0);                            \ 
  if (PREDICT_FALSE (next0 != next_index))              \
    {                                   \
      vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);   \
      next_index = next0;                       \
      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
                                    \
      to_next[0] = bi0;                         \
      to_next += 1;                         \
      n_left_to_next -= 1;                      \
    }                                   \
} while (0) 

TRACE信息

节点virtio-input由tap1接收到报文,经由节点ethernet-input和ip-input,到达节点linux-cp-xc-ip4,随后由VPP物理接口eth0的发送函数发送出去。

vpp# show trace 
------------------- Start of thread 1 vpp_wk_0 -------------------
Packet 1

00:36:17:622826: virtio-input
  virtio: hw_if_index 2 next-index 4 vring 0 len 98
    hdr: flags 0x00 gso_type 0x00 hdr_len 0 gso_size 0 csum_start 0 csum_offset 0 num_buffers 1
00:36:17:622828: ethernet-input
  IP4: 00:60:e0:6f:9c:e5 -> 6c:24:08:6b:e3:3d
00:36:17:622829: ip4-input
  ICMP: 17.1.2.1 -> 17.1.2.2
    tos 0x00, ttl 64, length 84, checksum 0x9be8 dscp CS0 ecn NON_ECN
    fragment id 0x78bc, flags DONT_FRAGMENT
  ICMP echo_request checksum 0x6897 id 13712
00:36:17:622831: linux-cp-xc-ip4
  lcp-xc: itf:1 adj:4
00:36:17:622832: eth0-output
  eth0 
  IP4: 00:60:e0:6f:9c:e5 -> 6c:24:08:6b:e3:3d
  ICMP: 17.1.2.1 -> 17.1.2.2
    tos 0x00, ttl 64, length 84, checksum 0x9be8 dscp CS0 ecn NON_ECN
    fragment id 0x78bc, flags DONT_FRAGMENT
  ICMP echo_request checksum 0x6897 id 13712
00:36:17:622833: eth0-tx
  eth0 tx queue 1
  buffer 0x9cb1e: current data 0, length 98, buffer-pool 0, ref-count 1, totlen-nifb 0, trace handle 0x1000000
                  l2-hdr-offset 0 l3-hdr-offset 14 
  PKT MBUF: port 65535, nb_segs 1, pkt_len 98
    buf_len 2176, data_len 98, ol_flags 0x0, data_off 128, phys_addr 0x8852c800
    packet_type 0x0 l2_len 0 l3_len 0 outer_l2_len 0 outer_l3_len 0 
    rss 0x0 fdir.hi 0x0 fdir.lo 0x0
  IP4: 00:60:e0:6f:9c:e5 -> 6c:24:08:6b:e3:3d
  ICMP: 17.1.2.1 -> 17.1.2.2
    tos 0x00, ttl 64, length 84, checksum 0x9be8 dscp CS0 ecn NON_ECN
    fragment id 0x78bc, flags DONT_FRAGMENT
  ICMP echo_request checksum 0x6897 id 13712

邻居信息

LCP中显示两条邻居信息(IPv4和IPv6各一条)。

vpp# show lcp adj
linux-cp adjacencies:
Hash table 'linux-cp adjacencies'
    2 active elements 2 active buckets
    1 free lists
    0 linear search buckets
    heap: 1 chunk(s) allocated
          bytes: used 160k, scrap 151.63k

详细的邻居信息如下:

vpp# show adj
[@0] ipv4-glean: [src:0.0.0.0/0] eth0: mtu:9000 next:1 flags:[] ffffffffffff0060e06f9ce50806
[@1] ipv4-glean: [src:17.1.2.0/24] eth0: mtu:9000 next:1 flags:[] ffffffffffff0060e06f9ce50806
[@2] ipv4-mcast: eth0: mtu:9000 next:5 flags:[] 01005e0000000060e06f9ce50800
[@3] ipv6-mcast: eth0: mtu:9000 next:5 flags:[] 3333000000000060e06f9ce586dd
[@4] ipv4 via 17.1.2.2 eth0: mtu:9000 next:5 flags:[] 6c24086be33d0060e06f9ce50800
[@5] ipv6-glean: [src:fe80::/10] eth0: mtu:9000 next:2 flags:[] ffffffffffff0060e06f9ce586dd
[@6] ipv6-glean: [src:0.0.0.0/0] eth0: mtu:9000 next:2 flags:[] ffffffffffff0060e06f9ce586dd
[@7] ipv6-glean: [src:3ff0::/64] eth0: mtu:9000 next:2 flags:[] ffffffffffff0060e06f9ce586dd
[@8] ipv6 via fe80::2054:c655:7c14:d518 eth0: mtu:9000 next:5 flags:[] 6c24086be33d0060e06f9ce586dd
[@9] ipv6 via 3ff0::2 eth0: mtu:9000 next:5 flags:[] 6c24086be33d0060e06f9ce586dd

邻居信息中的next字段表示下一节点索引,其值为5,即如下的eth0-output节点为下一节点。

vpp# show node linux-cp-xc-ip4
node linux-cp-xc-ip4, type internal, state active, index 151, sibling-of ip4-rewrite
  node function variants:
    Name             Priority  Active  Description
    default                 0    yes   default

  next nodes:
    next-index  node-index               Node               Vectors
         0          614                ip4-drop                0
         1          630             ip4-icmp-error             0
         2          562                ip4-frag                0
         3          613             ip4-not-enabled            0
         4          356            ip4-dvr-reinject            0
         5          702               eth0-output              0

  known previous nodes:
    ip4-mpls-label-disposition-uniform ip4-mpls-label-disposition-pipe (37ip4-input-no-checksum (610)
    ip4-input (611)
Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐