return;
ip = llist_entry(node, struct xfs_inode, i_gclist);
- trace_xfs_inodegc_worker(ip->i_mount, __return_address);
+ trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits));
+ WRITE_ONCE(gc->shrinker_hits, 0);
llist_for_each_entry_safe(ip, n, node, i_gclist) {
xfs_iflags_set(ip, XFS_INACTIVATING);
xfs_inodegc_inactivate(ip);
/*
* Make the frontend wait for inactivations when:
*
+ * - Memory shrinkers queued the inactivation worker and it hasn't finished.
* - The queue depth exceeds the maximum allowable percpu backlog.
*
* Note: If the current thread is running a transaction, we don't ever want to
static inline bool
xfs_inodegc_want_flush_work(
struct xfs_inode *ip,
- unsigned int items)
+ unsigned int items,
+ unsigned int shrinker_hits)
{
if (current->journal_info)
return false;
+ if (shrinker_hits > 0)
+ return true;
+
if (items > XFS_INODEGC_MAX_BACKLOG)
return true;
struct xfs_mount *mp = ip->i_mount;
struct xfs_inodegc *gc;
int items;
+ unsigned int shrinker_hits;
trace_xfs_inode_set_need_inactive(ip);
spin_lock(&ip->i_flags_lock);
llist_add(&ip->i_gclist, &gc->list);
items = READ_ONCE(gc->items);
WRITE_ONCE(gc->items, items + 1);
+ shrinker_hits = READ_ONCE(gc->shrinker_hits);
put_cpu_ptr(gc);
if (!xfs_is_inodegc_enabled(mp))
queue_work(mp->m_inodegc_wq, &gc->work);
}
- if (xfs_inodegc_want_flush_work(ip, items)) {
+ if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
trace_xfs_inodegc_throttle(mp, __return_address);
flush_work(&gc->work);
}
xfs_qm_dqdetach(ip);
xfs_inodegc_set_reclaimable(ip);
}
+
+/*
+ * Register a phony shrinker so that we can run background inodegc sooner when
+ * there's memory pressure. Inactivation does not itself free any memory but
+ * it does make inodes reclaimable, which eventually frees memory.
+ *
+ * The count function, seek value, and batch value are crafted to trigger the
+ * scan function during the second round of scanning. Hopefully this means
+ * that we reclaimed enough memory that initiating metadata transactions won't
+ * make things worse.
+ */
+#define XFS_INODEGC_SHRINKER_COUNT (1UL << DEF_PRIORITY)
+#define XFS_INODEGC_SHRINKER_BATCH ((XFS_INODEGC_SHRINKER_COUNT / 2) + 1)
+
+static unsigned long
+xfs_inodegc_shrinker_count(
+ struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct xfs_mount *mp = container_of(shrink, struct xfs_mount,
+ m_inodegc_shrinker);
+ struct xfs_inodegc *gc;
+ int cpu;
+
+ if (!xfs_is_inodegc_enabled(mp))
+ return 0;
+
+ for_each_online_cpu(cpu) {
+ gc = per_cpu_ptr(mp->m_inodegc, cpu);
+ if (!llist_empty(&gc->list))
+ return XFS_INODEGC_SHRINKER_COUNT;
+ }
+
+ return 0;
+}
+
+static unsigned long
+xfs_inodegc_shrinker_scan(
+ struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct xfs_mount *mp = container_of(shrink, struct xfs_mount,
+ m_inodegc_shrinker);
+ struct xfs_inodegc *gc;
+ int cpu;
+ bool no_items = true;
+
+ if (!xfs_is_inodegc_enabled(mp))
+ return SHRINK_STOP;
+
+ trace_xfs_inodegc_shrinker_scan(mp, sc, __return_address);
+
+ for_each_online_cpu(cpu) {
+ gc = per_cpu_ptr(mp->m_inodegc, cpu);
+ if (!llist_empty(&gc->list)) {
+ unsigned int h = READ_ONCE(gc->shrinker_hits);
+
+ WRITE_ONCE(gc->shrinker_hits, h + 1);
+ queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
+ no_items = false;
+ }
+ }
+
+ /*
+ * If there are no inodes to inactivate, we don't want the shrinker
+ * to think there's deferred work to call us back about.
+ */
+ if (no_items)
+ return LONG_MAX;
+
+ return SHRINK_STOP;
+}
+
+/* Register a shrinker so we can accelerate inodegc and throttle queuing. */
+int
+xfs_inodegc_register_shrinker(
+ struct xfs_mount *mp)
+{
+ struct shrinker *shrink = &mp->m_inodegc_shrinker;
+
+ shrink->count_objects = xfs_inodegc_shrinker_count;
+ shrink->scan_objects = xfs_inodegc_shrinker_scan;
+ shrink->seeks = 0;
+ shrink->flags = SHRINKER_NONSLAB;
+ shrink->batch = XFS_INODEGC_SHRINKER_BATCH;
+
+ return register_shrinker(shrink);
+}
DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
+TRACE_EVENT(xfs_inodegc_worker,
+ TP_PROTO(struct xfs_mount *mp, unsigned int shrinker_hits),
+ TP_ARGS(mp, shrinker_hits),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, shrinker_hits)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->shrinker_hits = shrinker_hits;
+ ),
+ TP_printk("dev %d:%d shrinker_hits %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->shrinker_hits)
+);
+
DECLARE_EVENT_CLASS(xfs_fs_class,
TP_PROTO(struct xfs_mount *mp, void *caller_ip),
TP_ARGS(mp, caller_ip),
DEFINE_FS_EVENT(xfs_inodegc_flush);
DEFINE_FS_EVENT(xfs_inodegc_start);
DEFINE_FS_EVENT(xfs_inodegc_stop);
-DEFINE_FS_EVENT(xfs_inodegc_worker);
DEFINE_FS_EVENT(xfs_inodegc_queue);
DEFINE_FS_EVENT(xfs_inodegc_throttle);
DEFINE_FS_EVENT(xfs_fs_sync_fs);
DEFINE_FS_EVENT(xfs_blockgc_worker);
DEFINE_FS_EVENT(xfs_blockgc_flush_all);
+TRACE_EVENT(xfs_inodegc_shrinker_scan,
+ TP_PROTO(struct xfs_mount *mp, struct shrink_control *sc,
+ void *caller_ip),
+ TP_ARGS(mp, sc, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned long, nr_to_scan)
+ __field(void *, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->nr_to_scan = sc->nr_to_scan;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d nr_to_scan %lu caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->nr_to_scan,
+ __entry->caller_ip)
+);
+
DECLARE_EVENT_CLASS(xfs_ag_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno),
TP_ARGS(mp, agno),