diff options
Diffstat (limited to 'kernel/rcu/tree.c')
-rw-r--r-- | kernel/rcu/tree.c | 169 |
1 files changed, 124 insertions, 45 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index da6f5213fb74..8e78b2430c16 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -156,6 +156,7 @@ static void invoke_rcu_core(void); static void rcu_report_exp_rdp(struct rcu_data *rdp); static void sync_sched_exp_online_cleanup(int cpu); static void check_cb_ovld_locked(struct rcu_data *rdp, struct rcu_node *rnp); +static bool rcu_rdp_is_offloaded(struct rcu_data *rdp); /* rcuc/rcub kthread realtime priority */ static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; @@ -648,7 +649,6 @@ static noinstr void rcu_eqs_enter(bool user) instrumentation_begin(); trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); - rdp = this_cpu_ptr(&rcu_data); rcu_prepare_for_idle(); rcu_preempt_deferred_qs(current); @@ -1077,7 +1077,6 @@ noinstr void rcu_nmi_enter(void) } else if (!in_nmi()) { instrumentation_begin(); rcu_irq_enter_check_tick(); - instrumentation_end(); } else { instrumentation_begin(); } @@ -1672,7 +1671,7 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp) { bool ret = false; bool need_qs; - const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist); + const bool offloaded = rcu_rdp_is_offloaded(rdp); raw_lockdep_assert_held_rcu_node(rnp); @@ -2128,7 +2127,7 @@ static void rcu_gp_cleanup(void) needgp = true; } /* Advance CBs to reduce false positives below. */ - offloaded = rcu_segcblist_is_offloaded(&rdp->cblist); + offloaded = rcu_rdp_is_offloaded(rdp); if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) { WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT); WRITE_ONCE(rcu_state.gp_req_activity, jiffies); @@ -2327,7 +2326,7 @@ rcu_report_qs_rdp(struct rcu_data *rdp) unsigned long flags; unsigned long mask; bool needwake = false; - const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist); + const bool offloaded = rcu_rdp_is_offloaded(rdp); struct rcu_node *rnp; WARN_ON_ONCE(rdp->cpu != smp_processor_id()); @@ -2414,7 +2413,7 @@ int rcutree_dying_cpu(unsigned int cpu) blkd = !!(rnp->qsmask & rdp->grpmask); trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), - blkd ? TPS("cpuofl") : TPS("cpuofl-bgp")); + blkd ? TPS("cpuofl-bgp") : TPS("cpuofl")); return 0; } @@ -2497,7 +2496,7 @@ static void rcu_do_batch(struct rcu_data *rdp) int div; bool __maybe_unused empty; unsigned long flags; - const bool offloaded = rcu_segcblist_is_offloaded(&rdp->cblist); + const bool offloaded = rcu_rdp_is_offloaded(rdp); struct rcu_head *rhp; struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); long bl, count = 0; @@ -3066,7 +3065,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued")); /* Go handle any RCU core processing required. */ - if (unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) { + if (unlikely(rcu_rdp_is_offloaded(rdp))) { __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */ } else { __call_rcu_core(rdp, head, flags); @@ -3229,8 +3228,7 @@ krc_this_cpu_lock(unsigned long *flags) static inline void krc_this_cpu_unlock(struct kfree_rcu_cpu *krcp, unsigned long flags) { - raw_spin_unlock(&krcp->lock); - local_irq_restore(flags); + raw_spin_unlock_irqrestore(&krcp->lock, flags); } static inline struct kvfree_rcu_bulk_data * @@ -3464,7 +3462,7 @@ static void fill_page_cache_func(struct work_struct *work) for (i = 0; i < rcu_min_cached_objs; i++) { bnode = (struct kvfree_rcu_bulk_data *) - __get_free_page(GFP_KERNEL | __GFP_NOWARN); + __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); if (bnode) { raw_spin_lock_irqsave(&krcp->lock, flags); @@ -3493,37 +3491,62 @@ run_page_cache_worker(struct kfree_rcu_cpu *krcp) } } +// Record ptr in a page managed by krcp, with the pre-krc_this_cpu_lock() +// state specified by flags. If can_alloc is true, the caller must +// be schedulable and not be holding any locks or mutexes that might be +// acquired by the memory allocator or anything that it might invoke. +// Returns true if ptr was successfully recorded, else the caller must +// use a fallback. static inline bool -kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu *krcp, void *ptr) +add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp, + unsigned long *flags, void *ptr, bool can_alloc) { struct kvfree_rcu_bulk_data *bnode; int idx; - if (unlikely(!krcp->initialized)) + *krcp = krc_this_cpu_lock(flags); + if (unlikely(!(*krcp)->initialized)) return false; - lockdep_assert_held(&krcp->lock); idx = !!is_vmalloc_addr(ptr); /* Check if a new block is required. */ - if (!krcp->bkvhead[idx] || - krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) { - bnode = get_cached_bnode(krcp); - /* Switch to emergency path. */ + if (!(*krcp)->bkvhead[idx] || + (*krcp)->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) { + bnode = get_cached_bnode(*krcp); + if (!bnode && can_alloc) { + krc_this_cpu_unlock(*krcp, *flags); + + // __GFP_NORETRY - allows a light-weight direct reclaim + // what is OK from minimizing of fallback hitting point of + // view. Apart of that it forbids any OOM invoking what is + // also beneficial since we are about to release memory soon. + // + // __GFP_NOMEMALLOC - prevents from consuming of all the + // memory reserves. Please note we have a fallback path. + // + // __GFP_NOWARN - it is supposed that an allocation can + // be failed under low memory or high memory pressure + // scenarios. + bnode = (struct kvfree_rcu_bulk_data *) + __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + *krcp = krc_this_cpu_lock(flags); + } + if (!bnode) return false; /* Initialize the new block. */ bnode->nr_records = 0; - bnode->next = krcp->bkvhead[idx]; + bnode->next = (*krcp)->bkvhead[idx]; /* Attach it to the head. */ - krcp->bkvhead[idx] = bnode; + (*krcp)->bkvhead[idx] = bnode; } /* Finally insert. */ - krcp->bkvhead[idx]->records - [krcp->bkvhead[idx]->nr_records++] = ptr; + (*krcp)->bkvhead[idx]->records + [(*krcp)->bkvhead[idx]->nr_records++] = ptr; return true; } @@ -3561,8 +3584,6 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) ptr = (unsigned long *) func; } - krcp = krc_this_cpu_lock(&flags); - // Queue the object but don't yet schedule the batch. if (debug_rcu_head_queue(ptr)) { // Probable double kfree_rcu(), just leak. @@ -3570,12 +3591,11 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) __func__, head); // Mark as success and leave. - success = true; - goto unlock_return; + return; } kasan_record_aux_stack(ptr); - success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr); + success = add_ptr_to_bulk_krc_lock(&krcp, &flags, ptr, !head); if (!success) { run_page_cache_worker(krcp); @@ -3774,8 +3794,8 @@ EXPORT_SYMBOL_GPL(synchronize_rcu); * get_state_synchronize_rcu - Snapshot current RCU state * * Returns a cookie that is used by a later call to cond_synchronize_rcu() - * to determine whether or not a full grace period has elapsed in the - * meantime. + * or poll_state_synchronize_rcu() to determine whether or not a full + * grace period has elapsed in the meantime. */ unsigned long get_state_synchronize_rcu(void) { @@ -3789,13 +3809,76 @@ unsigned long get_state_synchronize_rcu(void) EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); /** + * start_poll_synchronize_rcu - Snapshot and start RCU grace period + * + * Returns a cookie that is used by a later call to cond_synchronize_rcu() + * or poll_state_synchronize_rcu() to determine whether or not a full + * grace period has elapsed in the meantime. If the needed grace period + * is not already slated to start, notifies RCU core of the need for that + * grace period. + * + * Interrupts must be enabled for the case where it is necessary to awaken + * the grace-period kthread. + */ +unsigned long start_poll_synchronize_rcu(void) +{ + unsigned long flags; + unsigned long gp_seq = get_state_synchronize_rcu(); + bool needwake; + struct rcu_data *rdp; + struct rcu_node *rnp; + + lockdep_assert_irqs_enabled(); + local_irq_save(flags); + rdp = this_cpu_ptr(&rcu_data); + rnp = rdp->mynode; + raw_spin_lock_rcu_node(rnp); // irqs already disabled. + needwake = rcu_start_this_gp(rnp, rdp, gp_seq); + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + if (needwake) + rcu_gp_kthread_wake(); + return gp_seq; +} +EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu); + +/** + * poll_state_synchronize_rcu - Conditionally wait for an RCU grace period + * + * @oldstate: return from call to get_state_synchronize_rcu() or start_poll_synchronize_rcu() + * + * If a full RCU grace period has elapsed since the earlier call from + * which oldstate was obtained, return @true, otherwise return @false. + * If @false is returned, it is the caller's responsibilty to invoke this + * function later on until it does return @true. Alternatively, the caller + * can explicitly wait for a grace period, for example, by passing @oldstate + * to cond_synchronize_rcu() or by directly invoking synchronize_rcu(). + * + * Yes, this function does not take counter wrap into account. + * But counter wrap is harmless. If the counter wraps, we have waited for + * more than 2 billion grace periods (and way more on a 64-bit system!). + * Those needing to keep oldstate values for very long time periods + * (many hours even on 32-bit systems) should check them occasionally + * and either refresh them or set a flag indicating that the grace period + * has completed. + */ +bool poll_state_synchronize_rcu(unsigned long oldstate) +{ + if (rcu_seq_done(&rcu_state.gp_seq, oldstate)) { + smp_mb(); /* Ensure GP ends before subsequent accesses. */ + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu); + +/** * cond_synchronize_rcu - Conditionally wait for an RCU grace period * * @oldstate: return value from earlier call to get_state_synchronize_rcu() * * If a full RCU grace period has elapsed since the earlier call to - * get_state_synchronize_rcu(), just return. Otherwise, invoke - * synchronize_rcu() to wait for a full grace period. + * get_state_synchronize_rcu() or start_poll_synchronize_rcu(), just return. + * Otherwise, invoke synchronize_rcu() to wait for a full grace period. * * Yes, this function does not take counter wrap into account. But * counter wrap is harmless. If the counter wraps, we have waited for @@ -3804,10 +3887,8 @@ EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); */ void cond_synchronize_rcu(unsigned long oldstate) { - if (!rcu_seq_done(&rcu_state.gp_seq, oldstate)) + if (!poll_state_synchronize_rcu(oldstate)) synchronize_rcu(); - else - smp_mb(); /* Ensure GP ends before subsequent accesses. */ } EXPORT_SYMBOL_GPL(cond_synchronize_rcu); @@ -3843,13 +3924,13 @@ static int rcu_pending(int user) return 1; /* Does this CPU have callbacks ready to invoke? */ - if (!rcu_segcblist_is_offloaded(&rdp->cblist) && + if (!rcu_rdp_is_offloaded(rdp) && rcu_segcblist_ready_cbs(&rdp->cblist)) return 1; /* Has RCU gone idle with this CPU needing another grace period? */ if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) && - !rcu_segcblist_is_offloaded(&rdp->cblist) && + !rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) return 1; @@ -3968,7 +4049,7 @@ void rcu_barrier(void) for_each_possible_cpu(cpu) { rdp = per_cpu_ptr(&rcu_data, cpu); if (cpu_is_offline(cpu) && - !rcu_segcblist_is_offloaded(&rdp->cblist)) + !rcu_rdp_is_offloaded(rdp)) continue; if (rcu_segcblist_n_cbs(&rdp->cblist) && cpu_online(cpu)) { rcu_barrier_trace(TPS("OnlineQ"), cpu, @@ -4083,15 +4164,13 @@ int rcutree_prepare_cpu(unsigned int cpu) rdp->dynticks_nesting = 1; /* CPU not up, no tearing. */ rcu_dynticks_eqs_online(); raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ + /* - * Lock in case the CB/GP kthreads are still around handling - * old callbacks (longer term we should flush all callbacks - * before completing CPU offline) + * Only non-NOCB CPUs that didn't have early-boot callbacks need to be + * (re-)initialized. */ - rcu_nocb_lock(rdp); - if (rcu_segcblist_empty(&rdp->cblist)) /* No early-boot CBs? */ + if (!rcu_segcblist_is_enabled(&rdp->cblist)) rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ - rcu_nocb_unlock(rdp); /* * Add CPU to leaf rcu_node pending-online bitmask. Any needed @@ -4291,7 +4370,7 @@ void rcutree_migrate_callbacks(int cpu) struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); bool needwake; - if (rcu_segcblist_is_offloaded(&rdp->cblist) || + if (rcu_rdp_is_offloaded(rdp) || rcu_segcblist_empty(&rdp->cblist)) return; /* No callbacks to migrate. */ @@ -4309,7 +4388,7 @@ void rcutree_migrate_callbacks(int cpu) rcu_segcblist_disable(&rdp->cblist); WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist)); - if (rcu_segcblist_is_offloaded(&my_rdp->cblist)) { + if (rcu_rdp_is_offloaded(my_rdp)) { raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */ __call_rcu_nocb_wake(my_rdp, true, flags); } else { |