return 0;
}
+-------------------------------------------------------------------------------
++ PACKET_QDISC_BYPASS
+-------------------------------------------------------------------------------
+
+If there is a requirement to load the network with many packets in a similar
+fashion as pktgen does, you might set the following option after socket
+creation:
+
+ int one = 1;
+ setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one));
+
+This has the side-effect, that packets sent through PF_PACKET will bypass the
+kernel's qdisc layer and are forcedly pushed to the driver directly. Meaning,
+packet are not buffered, tc disciplines are ignored, increased loss can occur
+and such packets are also not visible to other PF_PACKET sockets anymore. So,
+you have been warned; generally, this can be useful for stress testing various
+components of a system.
+
+On default, PACKET_QDISC_BYPASS is disabled and needs to be explicitly enabled
+on PF_PACKET sockets.
+
-------------------------------------------------------------------------------
+ PACKET_TIMESTAMP
-------------------------------------------------------------------------------
static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);
+static int packet_direct_xmit(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ const struct net_device_ops *ops = dev->netdev_ops;
+ netdev_features_t features;
+ struct netdev_queue *txq;
+ u16 queue_map;
+ int ret;
+
+ if (unlikely(!netif_running(dev) ||
+ !netif_carrier_ok(dev))) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
+ features = netif_skb_features(skb);
+ if (skb_needs_linearize(skb, features) &&
+ __skb_linearize(skb)) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
+ queue_map = skb_get_queue_mapping(skb);
+ txq = netdev_get_tx_queue(dev, queue_map);
+
+ __netif_tx_lock_bh(txq);
+ if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
+ ret = NETDEV_TX_BUSY;
+ kfree_skb(skb);
+ goto out;
+ }
+
+ ret = ops->ndo_start_xmit(skb, dev);
+ if (likely(dev_xmit_complete(ret)))
+ txq_trans_update(txq);
+ else
+ kfree_skb(skb);
+out:
+ __netif_tx_unlock_bh(txq);
+ return ret;
+}
+
static struct net_device *packet_cached_dev_get(struct packet_sock *po)
{
struct net_device *dev;
RCU_INIT_POINTER(po->cached_dev, NULL);
}
+static bool packet_use_direct_xmit(const struct packet_sock *po)
+{
+ return po->xmit == packet_direct_xmit;
+}
+
+static u16 packet_pick_tx_queue(struct net_device *dev)
+{
+ return (u16) smp_processor_id() % dev->real_num_tx_queues;
+}
+
/* register_prot_hook must be invoked with the po->bind_lock held,
* or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()).
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
- skb_probe_transport_header(skb, 0);
- if (po->tp_tx_has_off) {
+ if (!packet_use_direct_xmit(po))
+ skb_probe_transport_header(skb, 0);
+ if (unlikely(po->tp_tx_has_off)) {
int off_min, off_max, off;
off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
off_max = po->tx_ring.frame_size - tp_len;
}
}
+ skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
skb->destructor = tpacket_destruct_skb;
__packet_set_status(po, ph, TP_STATUS_SENDING);
atomic_inc(&po->tx_ring.pending);
status = TP_STATUS_SEND_REQUEST;
- err = dev_queue_xmit(skb);
+ err = po->xmit(skb);
if (unlikely(err > 0)) {
err = net_xmit_errno(err);
if (err && __packet_get_status(po, ph) ==
return skb;
}
-static int packet_snd(struct socket *sock,
- struct msghdr *msg, size_t len)
+static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
skb->dev = dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
+ skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
if (po->has_vnet_hdr) {
if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
len += vnet_hdr_len;
}
- skb_probe_transport_header(skb, reserve);
-
+ if (!packet_use_direct_xmit(po))
+ skb_probe_transport_header(skb, reserve);
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
- /*
- * Now send it
- */
-
- err = dev_queue_xmit(skb);
+ err = po->xmit(skb);
if (err > 0 && (err = net_xmit_errno(err)) != 0)
goto out_unlock;
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
+
if (po->tx_ring.pg_vec)
return tpacket_snd(po, msg);
else
po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
po->num = proto;
+ po->xmit = dev_queue_xmit;
packet_cached_dev_reset(po);
po->tp_tx_has_off = !!val;
return 0;
}
+ case PACKET_QDISC_BYPASS:
+ {
+ int val;
+
+ if (optlen != sizeof(val))
+ return -EINVAL;
+ if (copy_from_user(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
break;
+ case PACKET_QDISC_BYPASS:
+ val = packet_use_direct_xmit(po);
+ break;
default:
return -ENOPROTOOPT;
}