printk: refactor and rework printing logic
authorJohn Ogness <john.ogness@linutronix.de>
Thu, 21 Apr 2022 21:22:44 +0000 (23:28 +0206)
committerPetr Mladek <pmladek@suse.com>
Fri, 22 Apr 2022 19:30:57 +0000 (21:30 +0200)
Refactor/rework printing logic in order to prepare for moving to
threaded console printing.

- Move @console_seq into struct console so that the current
  "position" of each console can be tracked individually.

- Move @console_dropped into struct console so that the current drop
  count of each console can be tracked individually.

- Modify printing logic so that each console independently loads,
  prepares, and prints its next record.

- Remove exclusive_console logic. Since console positions are
  handled independently, replaying past records occurs naturally.

- Update the comments explaining why preemption is disabled while
  printing from printk() context.

With these changes, there is a change in behavior: the console
replaying the log (formerly exclusive console) will no longer block
other consoles. New messages appear on the other consoles while the
newly added console is still replaying.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20220421212250.565456-10-john.ogness@linutronix.de
include/linux/console.h
kernel/printk/printk.c

index 7cd758a..8c1686e 100644 (file)
@@ -151,6 +151,8 @@ struct console {
        int     cflag;
        uint    ispeed;
        uint    ospeed;
+       u64     seq;
+       unsigned long dropped;
        void    *data;
        struct   console *next;
 };
index e36d3ed..3dea8bb 100644 (file)
@@ -281,11 +281,6 @@ static bool panic_in_progress(void)
 static int console_locked, console_suspended;
 
 /*
- * If exclusive_console is non-NULL then only this console is to be printed to.
- */
-static struct console *exclusive_console;
-
-/*
  *     Array of consoles built from command line options (console=)
  */
 
@@ -374,12 +369,6 @@ static u64 syslog_seq;
 static size_t syslog_partial;
 static bool syslog_time;
 
-/* All 3 protected by @console_sem. */
-/* the next printk record to write to the console */
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
-
 struct latched_seq {
        seqcount_latch_t        latch;
        u64                     val[2];
@@ -1933,47 +1922,26 @@ static int console_trylock_spinning(void)
 }
 
 /*
- * Call the console drivers, asking them to write out
- * log_buf[start] to log_buf[end - 1].
- * The console_lock must be held.
+ * Call the specified console driver, asking it to write out the specified
+ * text and length. For non-extended consoles, if any records have been
+ * dropped, a dropped message will be written out first.
  */
-static void call_console_drivers(const char *ext_text, size_t ext_len,
-                                const char *text, size_t len)
+static void call_console_driver(struct console *con, const char *text, size_t len)
 {
        static char dropped_text[64];
-       size_t dropped_len = 0;
-       struct console *con;
+       size_t dropped_len;
 
        trace_console_rcuidle(text, len);
 
-       if (!console_drivers)
-               return;
-
-       if (console_dropped) {
+       if (con->dropped && !(con->flags & CON_EXTENDED)) {
                dropped_len = snprintf(dropped_text, sizeof(dropped_text),
                                       "** %lu printk messages dropped **\n",
-                                      console_dropped);
-               console_dropped = 0;
+                                      con->dropped);
+               con->dropped = 0;
+               con->write(con, dropped_text, dropped_len);
        }
 
-       for_each_console(con) {
-               if (exclusive_console && con != exclusive_console)
-                       continue;
-               if (!(con->flags & CON_ENABLED))
-                       continue;
-               if (!con->write)
-                       continue;
-               if (!cpu_online(smp_processor_id()) &&
-                   !(con->flags & CON_ANYTIME))
-                       continue;
-               if (con->flags & CON_EXTENDED)
-                       con->write(con, ext_text, ext_len);
-               else {
-                       if (dropped_len)
-                               con->write(con, dropped_text, dropped_len);
-                       con->write(con, text, len);
-               }
-       }
+       con->write(con, text, len);
 }
 
 /*
@@ -2283,15 +2251,18 @@ asmlinkage int vprintk_emit(int facility, int level,
        /* If called from the scheduler, we can not call up(). */
        if (!in_sched) {
                /*
-                * Disable preemption to avoid being preempted while holding
-                * console_sem which would prevent anyone from printing to
-                * console
+                * The caller may be holding system-critical or
+                * timing-sensitive locks. Disable preemption during
+                * printing of all remaining records to all consoles so that
+                * this context can return as soon as possible. Hopefully
+                * another printk() caller will take over the printing.
                 */
                preempt_disable();
                /*
                 * Try to acquire and then immediately release the console
-                * semaphore.  The release will print out buffers and wake up
-                * /dev/kmsg and syslog() users.
+                * semaphore. The release will print out buffers. With the
+                * spinning variant, this context tries to take over the
+                * printing from another printing context.
                 */
                if (console_trylock_spinning())
                        console_unlock();
@@ -2329,11 +2300,9 @@ EXPORT_SYMBOL(_printk);
 
 #define prb_read_valid(rb, seq, r)     false
 #define prb_first_valid_seq(rb)                0
+#define prb_next_seq(rb)               0
 
 static u64 syslog_seq;
-static u64 console_seq;
-static u64 exclusive_console_stop_seq;
-static unsigned long console_dropped;
 
 static size_t record_print_text(const struct printk_record *r,
                                bool syslog, bool time)
@@ -2350,8 +2319,7 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
                                  struct dev_printk_info *dev_info) { return 0; }
 static void console_lock_spinning_enable(void) { }
 static int console_lock_spinning_disable_and_check(void) { return 0; }
-static void call_console_drivers(const char *ext_text, size_t ext_len,
-                                const char *text, size_t len) {}
+static void call_console_driver(struct console *con, const char *text, size_t len) { }
 static bool suppress_message_printing(int level) { return false; }
 
 #endif /* CONFIG_PRINTK */
@@ -2622,22 +2590,6 @@ int is_console_locked(void)
 EXPORT_SYMBOL(is_console_locked);
 
 /*
- * Check if we have any console that is capable of printing while cpu is
- * booting or shutting down. Requires console_sem.
- */
-static int have_callable_console(void)
-{
-       struct console *con;
-
-       for_each_console(con)
-               if ((con->flags & CON_ENABLED) &&
-                               (con->flags & CON_ANYTIME))
-                       return 1;
-
-       return 0;
-}
-
-/*
  * Return true when this CPU should unlock console_sem without pushing all
  * messages to the console. This reduces the chance that the console is
  * locked when the panic CPU tries to use it.
@@ -2657,15 +2609,182 @@ static bool abandon_console_lock_in_panic(void)
 }
 
 /*
- * Can we actually use the console at this time on this cpu?
+ * Check if the given console is currently capable and allowed to print
+ * records.
+ *
+ * Requires the console_lock.
+ */
+static inline bool console_is_usable(struct console *con)
+{
+       if (!(con->flags & CON_ENABLED))
+               return false;
+
+       if (!con->write)
+               return false;
+
+       /*
+        * Console drivers may assume that per-cpu resources have been
+        * allocated. So unless they're explicitly marked as being able to
+        * cope (CON_ANYTIME) don't call them until this CPU is officially up.
+        */
+       if (!cpu_online(raw_smp_processor_id()) &&
+           !(con->flags & CON_ANYTIME))
+               return false;
+
+       return true;
+}
+
+static void __console_unlock(void)
+{
+       console_locked = 0;
+       up_console_sem();
+}
+
+/*
+ * Print one record for the given console. The record printed is whatever
+ * record is the next available record for the given console.
+ *
+ * @handover will be set to true if a printk waiter has taken over the
+ * console_lock, in which case the caller is no longer holding the
+ * console_lock. Otherwise it is set to false.
+ *
+ * Returns false if the given console has no next record to print, otherwise
+ * true.
  *
- * Console drivers may assume that per-cpu resources have been allocated. So
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
- * call them until this CPU is officially up.
+ * Requires the console_lock.
  */
-static inline int can_use_console(void)
+static bool console_emit_next_record(struct console *con, bool *handover)
 {
-       return cpu_online(raw_smp_processor_id()) || have_callable_console();
+       static char ext_text[CONSOLE_EXT_LOG_MAX];
+       static char text[CONSOLE_LOG_MAX];
+       static int panic_console_dropped;
+       struct printk_info info;
+       struct printk_record r;
+       unsigned long flags;
+       char *write_text;
+       size_t len;
+
+       prb_rec_init_rd(&r, &info, text, sizeof(text));
+
+       *handover = false;
+
+       if (!prb_read_valid(prb, con->seq, &r))
+               return false;
+
+       if (con->seq != r.info->seq) {
+               con->dropped += r.info->seq - con->seq;
+               con->seq = r.info->seq;
+               if (panic_in_progress() && panic_console_dropped++ > 10) {
+                       suppress_panic_printk = 1;
+                       pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
+               }
+       }
+
+       /* Skip record that has level above the console loglevel. */
+       if (suppress_message_printing(r.info->level)) {
+               con->seq++;
+               goto skip;
+       }
+
+       if (con->flags & CON_EXTENDED) {
+               write_text = &ext_text[0];
+               len = info_print_ext_header(ext_text, sizeof(ext_text), r.info);
+               len += msg_print_ext_body(ext_text + len, sizeof(ext_text) - len,
+                                         &r.text_buf[0], r.info->text_len, &r.info->dev_info);
+       } else {
+               write_text = &text[0];
+               len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
+       }
+
+       /*
+        * While actively printing out messages, if another printk()
+        * were to occur on another CPU, it may wait for this one to
+        * finish. This task can not be preempted if there is a
+        * waiter waiting to take over.
+        *
+        * Interrupts are disabled because the hand over to a waiter
+        * must not be interrupted until the hand over is completed
+        * (@console_waiter is cleared).
+        */
+       printk_safe_enter_irqsave(flags);
+       console_lock_spinning_enable();
+
+       stop_critical_timings();        /* don't trace print latency */
+       call_console_driver(con, write_text, len);
+       start_critical_timings();
+
+       con->seq++;
+
+       *handover = console_lock_spinning_disable_and_check();
+       printk_safe_exit_irqrestore(flags);
+skip:
+       return true;
+}
+
+/*
+ * Print out all remaining records to all consoles.
+ *
+ * @do_cond_resched is set by the caller. It can be true only in schedulable
+ * context.
+ *
+ * @next_seq is set to the sequence number after the last available record.
+ * The value is valid only when this function returns true. It means that all
+ * usable consoles are completely flushed.
+ *
+ * @handover will be set to true if a printk waiter has taken over the
+ * console_lock, in which case the caller is no longer holding the
+ * console_lock. Otherwise it is set to false.
+ *
+ * Returns true when there was at least one usable console and all messages
+ * were flushed to all usable consoles. A returned false informs the caller
+ * that everything was not flushed (either there were no usable consoles or
+ * another context has taken over printing or it is a panic situation and this
+ * is not the panic CPU). Regardless the reason, the caller should assume it
+ * is not useful to immediately try again.
+ *
+ * Requires the console_lock.
+ */
+static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover)
+{
+       bool any_usable = false;
+       struct console *con;
+       bool any_progress;
+
+       *next_seq = 0;
+       *handover = false;
+
+       do {
+               any_progress = false;
+
+               for_each_console(con) {
+                       bool progress;
+
+                       if (!console_is_usable(con))
+                               continue;
+                       any_usable = true;
+
+                       progress = console_emit_next_record(con, handover);
+                       if (*handover)
+                               return false;
+
+                       /* Track the next of the highest seq flushed. */
+                       if (con->seq > *next_seq)
+                               *next_seq = con->seq;
+
+                       if (!progress)
+                               continue;
+                       any_progress = true;
+
+                       /* Allow panic_cpu to take over the consoles safely. */
+                       if (abandon_console_lock_in_panic())
+                               return false;
+
+                       if (do_cond_resched)
+                               cond_resched();
+               }
+       } while (any_progress);
+
+       return any_usable;
 }
 
 /**
@@ -2678,28 +2797,20 @@ static inline int can_use_console(void)
  * by printk().  If this is the case, console_unlock(); emits
  * the output prior to releasing the lock.
  *
- * If there is output waiting, we wake /dev/kmsg and syslog() users.
- *
  * console_unlock(); may be called from any context.
  */
 void console_unlock(void)
 {
-       static char ext_text[CONSOLE_EXT_LOG_MAX];
-       static char text[CONSOLE_LOG_MAX];
-       static int panic_console_dropped;
-       unsigned long flags;
-       bool do_cond_resched, retry;
-       struct printk_info info;
-       struct printk_record r;
-       u64 __maybe_unused next_seq;
+       bool do_cond_resched;
+       bool handover;
+       bool flushed;
+       u64 next_seq;
 
        if (console_suspended) {
                up_console_sem();
                return;
        }
 
-       prb_rec_init_rd(&r, &info, text, sizeof(text));
-
        /*
         * Console drivers are called with interrupts disabled, so
         * @console_may_schedule should be cleared before; however, we may
@@ -2708,125 +2819,34 @@ void console_unlock(void)
         * between lines if allowable.  Not doing so can cause a very long
         * scheduling stall on a slow console leading to RCU stall and
         * softlockup warnings which exacerbate the issue with more
-        * messages practically incapacitating the system.
-        *
-        * console_trylock() is not able to detect the preemptive
-        * context reliably. Therefore the value must be stored before
-        * and cleared after the "again" goto label.
+        * messages practically incapacitating the system. Therefore, create
+        * a local to use for the printing loop.
         */
        do_cond_resched = console_may_schedule;
-again:
-       console_may_schedule = 0;
-
-       /*
-        * We released the console_sem lock, so we need to recheck if
-        * cpu is online and (if not) is there at least one CON_ANYTIME
-        * console.
-        */
-       if (!can_use_console()) {
-               console_locked = 0;
-               up_console_sem();
-               return;
-       }
 
-       for (;;) {
-               size_t ext_len = 0;
-               int handover;
-               size_t len;
-
-skip:
-               if (!prb_read_valid(prb, console_seq, &r))
-                       break;
-
-               if (console_seq != r.info->seq) {
-                       console_dropped += r.info->seq - console_seq;
-                       console_seq = r.info->seq;
-                       if (panic_in_progress() && panic_console_dropped++ > 10) {
-                               suppress_panic_printk = 1;
-                               pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
-                       }
-               }
-
-               if (suppress_message_printing(r.info->level)) {
-                       /*
-                        * Skip record we have buffered and already printed
-                        * directly to the console when we received it, and
-                        * record that has level above the console loglevel.
-                        */
-                       console_seq++;
-                       goto skip;
-               }
+       do {
+               console_may_schedule = 0;
 
-               /* Output to all consoles once old messages replayed. */
-               if (unlikely(exclusive_console &&
-                            console_seq >= exclusive_console_stop_seq)) {
-                       exclusive_console = NULL;
-               }
+               flushed = console_flush_all(do_cond_resched, &next_seq, &handover);
+               if (!handover)
+                       __console_unlock();
 
                /*
-                * Handle extended console text first because later
-                * record_print_text() will modify the record buffer in-place.
+                * Abort if there was a failure to flush all messages to all
+                * usable consoles. Either it is not possible to flush (in
+                * which case it would be an infinite loop of retrying) or
+                * another context has taken over printing.
                 */
-               if (nr_ext_console_drivers) {
-                       ext_len = info_print_ext_header(ext_text,
-                                               sizeof(ext_text),
-                                               r.info);
-                       ext_len += msg_print_ext_body(ext_text + ext_len,
-                                               sizeof(ext_text) - ext_len,
-                                               &r.text_buf[0],
-                                               r.info->text_len,
-                                               &r.info->dev_info);
-               }
-               len = record_print_text(&r,
-                               console_msg_format & MSG_FORMAT_SYSLOG,
-                               printk_time);
-               console_seq++;
+               if (!flushed)
+                       break;
 
                /*
-                * While actively printing out messages, if another printk()
-                * were to occur on another CPU, it may wait for this one to
-                * finish. This task can not be preempted if there is a
-                * waiter waiting to take over.
-                *
-                * Interrupts are disabled because the hand over to a waiter
-                * must not be interrupted until the hand over is completed
-                * (@console_waiter is cleared).
+                * Some context may have added new records after
+                * console_flush_all() but before unlocking the console.
+                * Re-check if there is a new record to flush. If the trylock
+                * fails, another context is already handling the printing.
                 */
-               printk_safe_enter_irqsave(flags);
-               console_lock_spinning_enable();
-
-               stop_critical_timings();        /* don't trace print latency */
-               call_console_drivers(ext_text, ext_len, text, len);
-               start_critical_timings();
-
-               handover = console_lock_spinning_disable_and_check();
-               printk_safe_exit_irqrestore(flags);
-               if (handover)
-                       return;
-
-               /* Allow panic_cpu to take over the consoles safely */
-               if (abandon_console_lock_in_panic())
-                       break;
-
-               if (do_cond_resched)
-                       cond_resched();
-       }
-
-       /* Get consistent value of the next-to-be-used sequence number. */
-       next_seq = console_seq;
-
-       console_locked = 0;
-       up_console_sem();
-
-       /*
-        * Someone could have filled up the buffer again, so re-check if there's
-        * something to flush. In case we cannot trylock the console_sem again,
-        * there's a new owner and the console_unlock() from them will do the
-        * flush, no worries.
-        */
-       retry = prb_read_valid(prb, next_seq, NULL);
-       if (retry && !abandon_console_lock_in_panic() && console_trylock())
-               goto again;
+       } while (prb_read_valid(prb, next_seq, NULL) && console_trylock());
 }
 EXPORT_SYMBOL(console_unlock);
 
@@ -2886,8 +2906,14 @@ void console_flush_on_panic(enum con_flush_mode mode)
        console_trylock();
        console_may_schedule = 0;
 
-       if (mode == CONSOLE_REPLAY_ALL)
-               console_seq = prb_first_valid_seq(prb);
+       if (mode == CONSOLE_REPLAY_ALL) {
+               struct console *c;
+               u64 seq;
+
+               seq = prb_first_valid_seq(prb);
+               for_each_console(c)
+                       c->seq = seq;
+       }
        console_unlock();
 }
 
@@ -3127,26 +3153,15 @@ void register_console(struct console *newcon)
        if (newcon->flags & CON_EXTENDED)
                nr_ext_console_drivers++;
 
+       newcon->dropped = 0;
        if (newcon->flags & CON_PRINTBUFFER) {
-               /*
-                * console_unlock(); will print out the buffered messages
-                * for us.
-                *
-                * We're about to replay the log buffer.  Only do this to the
-                * just-registered console to avoid excessive message spam to
-                * the already-registered consoles.
-                *
-                * Set exclusive_console with disabled interrupts to reduce
-                * race window with eventual console_flush_on_panic() that
-                * ignores console_lock.
-                */
-               exclusive_console = newcon;
-               exclusive_console_stop_seq = console_seq;
-
                /* Get a consistent copy of @syslog_seq. */
                mutex_lock(&syslog_lock);
-               console_seq = syslog_seq;
+               newcon->seq = syslog_seq;
                mutex_unlock(&syslog_lock);
+       } else {
+               /* Begin with next message. */
+               newcon->seq = prb_next_seq(prb);
        }
        console_unlock();
        console_sysfs_notify();