drivers/net/dsa/sja1105/sja1105_tas.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
   3  */
   4 #include "sja1105.h"
   5
   6 #define SJA1105_TAS_CLKSRC_DISABLED     0
   7 #define SJA1105_TAS_CLKSRC_STANDALONE   1
   8 #define SJA1105_TAS_CLKSRC_AS6802       2
   9 #define SJA1105_TAS_CLKSRC_PTP          3
  10 #define SJA1105_TAS_MAX_DELTA           BIT(19)
  11 #define SJA1105_GATE_MASK               GENMASK_ULL(SJA1105_NUM_TC - 1, 0)
  12
  13 #define work_to_sja1105_tas(d) \
  14         container_of((d), struct sja1105_tas_data, tas_work)
  15 #define tas_to_sja1105(d) \
  16         container_of((d), struct sja1105_private, tas_data)
  17
  18 /* This is not a preprocessor macro because the "ns" argument may or may not be
  19  * s64 at caller side. This ensures it is properly type-cast before div_s64.
  20  */
  21 static s64 ns_to_sja1105_delta(s64 ns)
  22 {
  23         return div_s64(ns, 200);
  24 }
  25
  26 static s64 sja1105_delta_to_ns(s64 delta)
  27 {
  28         return delta * 200;
  29 }
  30
  31 /* Calculate the first base_time in the future that satisfies this
  32  * relationship:
  33  *
  34  * future_base_time = base_time + N x cycle_time >= now, or
  35  *
  36  *      now - base_time
  37  * N >= ---------------
  38  *         cycle_time
  39  *
  40  * Because N is an integer, the ceiling value of the above "a / b" ratio
  41  * is in fact precisely the floor value of "(a + b - 1) / b", which is
  42  * easier to calculate only having integer division tools.
  43  */
  44 static s64 future_base_time(s64 base_time, s64 cycle_time, s64 now)
  45 {
  46         s64 a, b, n;
  47
  48         if (base_time >= now)
  49                 return base_time;
  50
  51         a = now - base_time;
  52         b = cycle_time;
  53         n = div_s64(a + b - 1, b);
  54
  55         return base_time + n * cycle_time;
  56 }
  57
  58 static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
  59 {
  60         struct sja1105_tas_data *tas_data = &priv->tas_data;
  61         struct dsa_switch *ds = priv->ds;
  62         s64 earliest_base_time = S64_MAX;
  63         s64 latest_base_time = 0;
  64         s64 its_cycle_time = 0;
  65         s64 max_cycle_time = 0;
  66         int port;
  67
  68         tas_data->enabled = false;
  69
  70         for (port = 0; port < SJA1105_NUM_PORTS; port++) {
  71                 const struct tc_taprio_qopt_offload *offload;
  72
  73                 offload = tas_data->offload[port];
  74                 if (!offload)
  75                         continue;
  76
  77                 tas_data->enabled = true;
  78
  79                 if (max_cycle_time < offload->cycle_time)
  80                         max_cycle_time = offload->cycle_time;
  81                 if (latest_base_time < offload->base_time)
  82                         latest_base_time = offload->base_time;
  83                 if (earliest_base_time > offload->base_time) {
  84                         earliest_base_time = offload->base_time;
  85                         its_cycle_time = offload->cycle_time;
  86                 }
  87         }
  88
  89         if (!tas_data->enabled)
  90                 return 0;
  91
  92         /* Roll the earliest base time over until it is in a comparable
  93          * time base with the latest, then compare their deltas.
  94          * We want to enforce that all ports' base times are within
  95          * SJA1105_TAS_MAX_DELTA 200ns cycles of one another.
  96          */
  97         earliest_base_time = future_base_time(earliest_base_time,
  98                                               its_cycle_time,
  99                                               latest_base_time);
 100         while (earliest_base_time > latest_base_time)
 101                 earliest_base_time -= its_cycle_time;
 102         if (latest_base_time - earliest_base_time >
 103             sja1105_delta_to_ns(SJA1105_TAS_MAX_DELTA)) {
 104                 dev_err(ds->dev,
 105                         "Base times too far apart: min %llu max %llu\n",
 106                         earliest_base_time, latest_base_time);
 107                 return -ERANGE;
 108         }
 109
 110         tas_data->earliest_base_time = earliest_base_time;
 111         tas_data->max_cycle_time = max_cycle_time;
 112
 113         dev_dbg(ds->dev, "earliest base time %lld ns\n", earliest_base_time);
 114         dev_dbg(ds->dev, "latest base time %lld ns\n", latest_base_time);
 115         dev_dbg(ds->dev, "longest cycle time %lld ns\n", max_cycle_time);
 116
 117         return 0;
 118 }
 119
 120 /* Lo and behold: the egress scheduler from hell.
 121  *
 122  * At the hardware level, the Time-Aware Shaper holds a global linear arrray of
 123  * all schedule entries for all ports. These are the Gate Control List (GCL)
 124  * entries, let's call them "timeslots" for short. This linear array of
 125  * timeslots is held in BLK_IDX_SCHEDULE.
 126  *
 127  * Then there are a maximum of 8 "execution threads" inside the switch, which
 128  * iterate cyclically through the "schedule". Each "cycle" has an entry point
 129  * and an exit point, both being timeslot indices in the schedule table. The
 130  * hardware calls each cycle a "subschedule".
 131  *
 132  * Subschedule (cycle) i starts when
 133  *   ptpclkval >= ptpschtm + BLK_IDX_SCHEDULE_ENTRY_POINTS[i].delta.
 134  *
 135  * The hardware scheduler iterates BLK_IDX_SCHEDULE with a k ranging from
 136  *   k = BLK_IDX_SCHEDULE_ENTRY_POINTS[i].address to
 137  *   k = BLK_IDX_SCHEDULE_PARAMS.subscheind[i]
 138  *
 139  * For each schedule entry (timeslot) k, the engine executes the gate control
 140  * list entry for the duration of BLK_IDX_SCHEDULE[k].delta.
 141  *
 142  *         +---------+
 143  *         |         | BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS
 144  *         +---------+
 145  *              |
 146  *              +-----------------+
 147  *                                | .actsubsch
 148  *  BLK_IDX_SCHEDULE_ENTRY_POINTS v
 149  *                 +-------+-------+
 150  *                 |cycle 0|cycle 1|
 151  *                 +-------+-------+
 152  *                   |  |      |  |
 153  *  +----------------+  |      |  +-------------------------------------+
 154  *  |   .subschindx     |      |             .subschindx                |
 155  *  |                   |      +---------------+                        |
 156  *  |          .address |        .address      |                        |
 157  *  |                   |                      |                        |
 158  *  |                   |                      |                        |
 159  *  |  BLK_IDX_SCHEDULE v                      v                        |
 160  *  |              +-------+-------+-------+-------+-------+------+     |
 161  *  |              |entry 0|entry 1|entry 2|entry 3|entry 4|entry5|     |
 162  *  |              +-------+-------+-------+-------+-------+------+     |
 163  *  |                                  ^                    ^  ^  ^     |
 164  *  |                                  |                    |  |  |     |
 165  *  |        +-------------------------+                    |  |  |     |
 166  *  |        |              +-------------------------------+  |  |     |
 167  *  |        |              |              +-------------------+  |     |
 168  *  |        |              |              |                      |     |
 169  *  | +---------------------------------------------------------------+ |
 170  *  | |subscheind[0]<=subscheind[1]<=subscheind[2]<=...<=subscheind[7]| |
 171  *  | +---------------------------------------------------------------+ |
 172  *  |        ^              ^                BLK_IDX_SCHEDULE_PARAMS    |
 173  *  |        |              |                                           |
 174  *  +--------+              +-------------------------------------------+
 175  *
 176  *  In the above picture there are two subschedules (cycles):
 177  *
 178  *  - cycle 0: iterates the schedule table from 0 to 2 (and back)
 179  *  - cycle 1: iterates the schedule table from 3 to 5 (and back)
 180  *
 181  *  All other possible execution threads must be marked as unused by making
 182  *  their "subschedule end index" (subscheind) equal to the last valid
 183  *  subschedule's end index (in this case 5).
 184  */
 185 static int sja1105_init_scheduling(struct sja1105_private *priv)
 186 {
 187         struct sja1105_schedule_entry_points_entry *schedule_entry_points;
 188         struct sja1105_schedule_entry_points_params_entry
 189                                         *schedule_entry_points_params;
 190         struct sja1105_schedule_params_entry *schedule_params;
 191         struct sja1105_tas_data *tas_data = &priv->tas_data;
 192         struct sja1105_schedule_entry *schedule;
 193         struct sja1105_table *table;
 194         int schedule_start_idx;
 195         s64 entry_point_delta;
 196         int schedule_end_idx;
 197         int num_entries = 0;
 198         int num_cycles = 0;
 199         int cycle = 0;
 200         int i, k = 0;
 201         int port, rc;
 202
 203         rc = sja1105_tas_set_runtime_params(priv);
 204         if (rc < 0)
 205                 return rc;
 206
 207         /* Discard previous Schedule Table */
 208         table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
 209         if (table->entry_count) {
 210                 kfree(table->entries);
 211                 table->entry_count = 0;
 212         }
 213
 214         /* Discard previous Schedule Entry Points Parameters Table */
 215         table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
 216         if (table->entry_count) {
 217                 kfree(table->entries);
 218                 table->entry_count = 0;
 219         }
 220
 221         /* Discard previous Schedule Parameters Table */
 222         table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
 223         if (table->entry_count) {
 224                 kfree(table->entries);
 225                 table->entry_count = 0;
 226         }
 227
 228         /* Discard previous Schedule Entry Points Table */
 229         table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
 230         if (table->entry_count) {
 231                 kfree(table->entries);
 232                 table->entry_count = 0;
 233         }
 234
 235         /* Figure out the dimensioning of the problem */
 236         for (port = 0; port < SJA1105_NUM_PORTS; port++) {
 237                 if (tas_data->offload[port]) {
 238                         num_entries += tas_data->offload[port]->num_entries;
 239                         num_cycles++;
 240                 }
 241         }
 242
 243         /* Nothing to do */
 244         if (!num_cycles)
 245                 return 0;
 246
 247         /* Pre-allocate space in the static config tables */
 248
 249         /* Schedule Table */
 250         table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
 251         table->entries = kcalloc(num_entries, table->ops->unpacked_entry_size,
 252                                  GFP_KERNEL);
 253         if (!table->entries)
 254                 return -ENOMEM;
 255         table->entry_count = num_entries;
 256         schedule = table->entries;
 257
 258         /* Schedule Points Parameters Table */
 259         table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
 260         table->entries = kcalloc(SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
 261                                  table->ops->unpacked_entry_size, GFP_KERNEL);
 262         if (!table->entries)
 263                 /* Previously allocated memory will be freed automatically in
 264                  * sja1105_static_config_free. This is true for all early
 265                  * returns below.
 266                  */
 267                 return -ENOMEM;
 268         table->entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT;
 269         schedule_entry_points_params = table->entries;
 270
 271         /* Schedule Parameters Table */
 272         table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
 273         table->entries = kcalloc(SJA1105_MAX_SCHEDULE_PARAMS_COUNT,
 274                                  table->ops->unpacked_entry_size, GFP_KERNEL);
 275         if (!table->entries)
 276                 return -ENOMEM;
 277         table->entry_count = SJA1105_MAX_SCHEDULE_PARAMS_COUNT;
 278         schedule_params = table->entries;
 279
 280         /* Schedule Entry Points Table */
 281         table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
 282         table->entries = kcalloc(num_cycles, table->ops->unpacked_entry_size,
 283                                  GFP_KERNEL);
 284         if (!table->entries)
 285                 return -ENOMEM;
 286         table->entry_count = num_cycles;
 287         schedule_entry_points = table->entries;
 288
 289         /* Finally start populating the static config tables */
 290         schedule_entry_points_params->clksrc = SJA1105_TAS_CLKSRC_PTP;
 291         schedule_entry_points_params->actsubsch = num_cycles - 1;
 292
 293         for (port = 0; port < SJA1105_NUM_PORTS; port++) {
 294                 const struct tc_taprio_qopt_offload *offload;
 295                 /* Relative base time */
 296                 s64 rbt;
 297
 298                 offload = tas_data->offload[port];
 299                 if (!offload)
 300                         continue;
 301
 302                 schedule_start_idx = k;
 303                 schedule_end_idx = k + offload->num_entries - 1;
 304                 /* This is the base time expressed as a number of TAS ticks
 305                  * relative to PTPSCHTM, which we'll (perhaps improperly) call
 306                  * the operational base time.
 307                  */
 308                 rbt = future_base_time(offload->base_time,
 309                                        offload->cycle_time,
 310                                        tas_data->earliest_base_time);
 311                 rbt -= tas_data->earliest_base_time;
 312                 /* UM10944.pdf 4.2.2. Schedule Entry Points table says that
 313                  * delta cannot be zero, which is shitty. Advance all relative
 314                  * base times by 1 TAS delta, so that even the earliest base
 315                  * time becomes 1 in relative terms. Then start the operational
 316                  * base time (PTPSCHTM) one TAS delta earlier than planned.
 317                  */
 318                 entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
 319
 320                 schedule_entry_points[cycle].subschindx = cycle;
 321                 schedule_entry_points[cycle].delta = entry_point_delta;
 322                 schedule_entry_points[cycle].address = schedule_start_idx;
 323
 324                 /* The subschedule end indices need to be
 325                  * monotonically increasing.
 326                  */
 327                 for (i = cycle; i < 8; i++)
 328                         schedule_params->subscheind[i] = schedule_end_idx;
 329
 330                 for (i = 0; i < offload->num_entries; i++, k++) {
 331                         s64 delta_ns = offload->entries[i].interval;
 332
 333                         schedule[k].delta = ns_to_sja1105_delta(delta_ns);
 334                         schedule[k].destports = BIT(port);
 335                         schedule[k].resmedia_en = true;
 336                         schedule[k].resmedia = SJA1105_GATE_MASK &
 337                                         ~offload->entries[i].gate_mask;
 338                 }
 339                 cycle++;
 340         }
 341
 342         return 0;
 343 }
 344
 345 /* Be there 2 port subschedules, each executing an arbitrary number of gate
 346  * open/close events cyclically.
 347  * None of those gate events must ever occur at the exact same time, otherwise
 348  * the switch is known to act in exotically strange ways.
 349  * However the hardware doesn't bother performing these integrity checks.
 350  * So here we are with the task of validating whether the new @admin offload
 351  * has any conflict with the already established TAS configuration in
 352  * tas_data->offload.  We already know the other ports are in harmony with one
 353  * another, otherwise we wouldn't have saved them.
 354  * Each gate event executes periodically, with a period of @cycle_time and a
 355  * phase given by its cycle's @base_time plus its offset within the cycle
 356  * (which in turn is given by the length of the events prior to it).
 357  * There are two aspects to possible collisions:
 358  * - Collisions within one cycle's (actually the longest cycle's) time frame.
 359  *   For that, we need to compare the cartesian product of each possible
 360  *   occurrence of each event within one cycle time.
 361  * - Collisions in the future. Events may not collide within one cycle time,
 362  *   but if two port schedules don't have the same periodicity (aka the cycle
 363  *   times aren't multiples of one another), they surely will some time in the
 364  *   future (actually they will collide an infinite amount of times).
 365  */
 366 static bool
 367 sja1105_tas_check_conflicts(struct sja1105_private *priv, int port,
 368                             const struct tc_taprio_qopt_offload *admin)
 369 {
 370         struct sja1105_tas_data *tas_data = &priv->tas_data;
 371         const struct tc_taprio_qopt_offload *offload;
 372         s64 max_cycle_time, min_cycle_time;
 373         s64 delta1, delta2;
 374         s64 rbt1, rbt2;
 375         s64 stop_time;
 376         s64 t1, t2;
 377         int i, j;
 378         s32 rem;
 379
 380         offload = tas_data->offload[port];
 381         if (!offload)
 382                 return false;
 383
 384         /* Check if the two cycle times are multiples of one another.
 385          * If they aren't, then they will surely collide.
 386          */
 387         max_cycle_time = max(offload->cycle_time, admin->cycle_time);
 388         min_cycle_time = min(offload->cycle_time, admin->cycle_time);
 389         div_s64_rem(max_cycle_time, min_cycle_time, &rem);
 390         if (rem)
 391                 return true;
 392
 393         /* Calculate the "reduced" base time of each of the two cycles
 394          * (transposed back as close to 0 as possible) by dividing to
 395          * the cycle time.
 396          */
 397         div_s64_rem(offload->base_time, offload->cycle_time, &rem);
 398         rbt1 = rem;
 399
 400         div_s64_rem(admin->base_time, admin->cycle_time, &rem);
 401         rbt2 = rem;
 402
 403         stop_time = max_cycle_time + max(rbt1, rbt2);
 404
 405         /* delta1 is the relative base time of each GCL entry within
 406          * the established ports' TAS config.
 407          */
 408         for (i = 0, delta1 = 0;
 409              i < offload->num_entries;
 410              delta1 += offload->entries[i].interval, i++) {
 411                 /* delta2 is the relative base time of each GCL entry
 412                  * within the newly added TAS config.
 413                  */
 414                 for (j = 0, delta2 = 0;
 415                      j < admin->num_entries;
 416                      delta2 += admin->entries[j].interval, j++) {
 417                         /* t1 follows all possible occurrences of the
 418                          * established ports' GCL entry i within the
 419                          * first cycle time.
 420                          */
 421                         for (t1 = rbt1 + delta1;
 422                              t1 <= stop_time;
 423                              t1 += offload->cycle_time) {
 424                                 /* t2 follows all possible occurrences
 425                                  * of the newly added GCL entry j
 426                                  * within the first cycle time.
 427                                  */
 428                                 for (t2 = rbt2 + delta2;
 429                                      t2 <= stop_time;
 430                                      t2 += admin->cycle_time) {
 431                                         if (t1 == t2) {
 432                                                 dev_warn(priv->ds->dev,
 433                                                          "GCL entry %d collides with entry %d of port %d\n",
 434                                                          j, i, port);
 435                                                 return true;
 436                                         }
 437                                 }
 438                         }
 439                 }
 440         }
 441
 442         return false;
 443 }
 444
 445 int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
 446                             struct tc_taprio_qopt_offload *admin)
 447 {
 448         struct sja1105_private *priv = ds->priv;
 449         struct sja1105_tas_data *tas_data = &priv->tas_data;
 450         int other_port, rc, i;
 451
 452         /* Can't change an already configured port (must delete qdisc first).
 453          * Can't delete the qdisc from an unconfigured port.
 454          */
 455         if (!!tas_data->offload[port] == admin->enable)
 456                 return -EINVAL;
 457
 458         if (!admin->enable) {
 459                 taprio_offload_free(tas_data->offload[port]);
 460                 tas_data->offload[port] = NULL;
 461
 462                 rc = sja1105_init_scheduling(priv);
 463                 if (rc < 0)
 464                         return rc;
 465
 466                 return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
 467         }
 468
 469         /* The cycle time extension is the amount of time the last cycle from
 470          * the old OPER needs to be extended in order to phase-align with the
 471          * base time of the ADMIN when that becomes the new OPER.
 472          * But of course our switch needs to be reset to switch-over between
 473          * the ADMIN and the OPER configs - so much for a seamless transition.
 474          * So don't add insult over injury and just say we don't support cycle
 475          * time extension.
 476          */
 477         if (admin->cycle_time_extension)
 478                 return -ENOTSUPP;
 479
 480         for (i = 0; i < admin->num_entries; i++) {
 481                 s64 delta_ns = admin->entries[i].interval;
 482                 s64 delta_cycles = ns_to_sja1105_delta(delta_ns);
 483                 bool too_long, too_short;
 484
 485                 too_long = (delta_cycles >= SJA1105_TAS_MAX_DELTA);
 486                 too_short = (delta_cycles == 0);
 487                 if (too_long || too_short) {
 488                         dev_err(priv->ds->dev,
 489                                 "Interval %llu too %s for GCL entry %d\n",
 490                                 delta_ns, too_long ? "long" : "short", i);
 491                         return -ERANGE;
 492                 }
 493         }
 494
 495         for (other_port = 0; other_port < SJA1105_NUM_PORTS; other_port++) {
 496                 if (other_port == port)
 497                         continue;
 498
 499                 if (sja1105_tas_check_conflicts(priv, other_port, admin))
 500                         return -ERANGE;
 501         }
 502
 503         tas_data->offload[port] = taprio_offload_get(admin);
 504
 505         rc = sja1105_init_scheduling(priv);
 506         if (rc < 0)
 507                 return rc;
 508
 509         return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
 510 }
 511
 512 static int sja1105_tas_check_running(struct sja1105_private *priv)
 513 {
 514         struct sja1105_tas_data *tas_data = &priv->tas_data;
 515         struct dsa_switch *ds = priv->ds;
 516         struct sja1105_ptp_cmd cmd = {0};
 517         int rc;
 518
 519         rc = sja1105_ptp_commit(ds, &cmd, SPI_READ);
 520         if (rc < 0)
 521                 return rc;
 522
 523         if (cmd.ptpstrtsch == 1)
 524                 /* Schedule successfully started */
 525                 tas_data->state = SJA1105_TAS_STATE_RUNNING;
 526         else if (cmd.ptpstopsch == 1)
 527                 /* Schedule is stopped */
 528                 tas_data->state = SJA1105_TAS_STATE_DISABLED;
 529         else
 530                 /* Schedule is probably not configured with PTP clock source */
 531                 rc = -EINVAL;
 532
 533         return rc;
 534 }
 535
 536 /* Write to PTPCLKCORP */
 537 static int sja1105_tas_adjust_drift(struct sja1105_private *priv,
 538                                     u64 correction)
 539 {
 540         const struct sja1105_regs *regs = priv->info->regs;
 541         u32 ptpclkcorp = ns_to_sja1105_ticks(correction);
 542
 543         return sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkcorp,
 544                                 &ptpclkcorp, NULL);
 545 }
 546
 547 /* Write to PTPSCHTM */
 548 static int sja1105_tas_set_base_time(struct sja1105_private *priv,
 549                                      u64 base_time)
 550 {
 551         const struct sja1105_regs *regs = priv->info->regs;
 552         u64 ptpschtm = ns_to_sja1105_ticks(base_time);
 553
 554         return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpschtm,
 555                                 &ptpschtm, NULL);
 556 }
 557
 558 static int sja1105_tas_start(struct sja1105_private *priv)
 559 {
 560         struct sja1105_tas_data *tas_data = &priv->tas_data;
 561         struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
 562         struct dsa_switch *ds = priv->ds;
 563         int rc;
 564
 565         dev_dbg(ds->dev, "Starting the TAS\n");
 566
 567         if (tas_data->state == SJA1105_TAS_STATE_ENABLED_NOT_RUNNING ||
 568             tas_data->state == SJA1105_TAS_STATE_RUNNING) {
 569                 dev_err(ds->dev, "TAS already started\n");
 570                 return -EINVAL;
 571         }
 572
 573         cmd->ptpstrtsch = 1;
 574         cmd->ptpstopsch = 0;
 575
 576         rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
 577         if (rc < 0)
 578                 return rc;
 579
 580         tas_data->state = SJA1105_TAS_STATE_ENABLED_NOT_RUNNING;
 581
 582         return 0;
 583 }
 584
 585 static int sja1105_tas_stop(struct sja1105_private *priv)
 586 {
 587         struct sja1105_tas_data *tas_data = &priv->tas_data;
 588         struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
 589         struct dsa_switch *ds = priv->ds;
 590         int rc;
 591
 592         dev_dbg(ds->dev, "Stopping the TAS\n");
 593
 594         if (tas_data->state == SJA1105_TAS_STATE_DISABLED) {
 595                 dev_err(ds->dev, "TAS already disabled\n");
 596                 return -EINVAL;
 597         }
 598
 599         cmd->ptpstopsch = 1;
 600         cmd->ptpstrtsch = 0;
 601
 602         rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
 603         if (rc < 0)
 604                 return rc;
 605
 606         tas_data->state = SJA1105_TAS_STATE_DISABLED;
 607
 608         return 0;
 609 }
 610
 611 /* The schedule engine and the PTP clock are driven by the same oscillator, and
 612  * they run in parallel. But whilst the PTP clock can keep an absolute
 613  * time-of-day, the schedule engine is only running in 'ticks' (25 ticks make
 614  * up a delta, which is 200ns), and wrapping around at the end of each cycle.
 615  * The schedule engine is started when the PTP clock reaches the PTPSCHTM time
 616  * (in PTP domain).
 617  * Because the PTP clock can be rate-corrected (accelerated or slowed down) by
 618  * a software servo, and the schedule engine clock runs in parallel to the PTP
 619  * clock, there is logic internal to the switch that periodically keeps the
 620  * schedule engine from drifting away. The frequency with which this internal
 621  * syntonization happens is the PTP clock correction period (PTPCLKCORP). It is
 622  * a value also in the PTP clock domain, and is also rate-corrected.
 623  * To be precise, during a correction period, there is logic to determine by
 624  * how many scheduler clock ticks has the PTP clock drifted. At the end of each
 625  * correction period/beginning of new one, the length of a delta is shrunk or
 626  * expanded with an integer number of ticks, compared with the typical 25.
 627  * So a delta lasts for 200ns (or 25 ticks) only on average.
 628  * Sometimes it is longer, sometimes it is shorter. The internal syntonization
 629  * logic can adjust for at most 5 ticks each 20 ticks.
 630  *
 631  * The first implication is that you should choose your schedule correction
 632  * period to be an integer multiple of the schedule length. Preferably one.
 633  * In case there are schedules of multiple ports active, then the correction
 634  * period needs to be a multiple of them all. Given the restriction that the
 635  * cycle times have to be multiples of one another anyway, this means the
 636  * correction period can simply be the largest cycle time, hence the current
 637  * choice. This way, the updates are always synchronous to the transmission
 638  * cycle, and therefore predictable.
 639  *
 640  * The second implication is that at the beginning of a correction period, the
 641  * first few deltas will be modulated in time, until the schedule engine is
 642  * properly phase-aligned with the PTP clock. For this reason, you should place
 643  * your best-effort traffic at the beginning of a cycle, and your
 644  * time-triggered traffic afterwards.
 645  *
 646  * The third implication is that once the schedule engine is started, it can
 647  * only adjust for so much drift within a correction period. In the servo you
 648  * can only change the PTPCLKRATE, but not step the clock (PTPCLKADD). If you
 649  * want to do the latter, you need to stop and restart the schedule engine,
 650  * which is what the state machine handles.
 651  */
 652 static void sja1105_tas_state_machine(struct work_struct *work)
 653 {
 654         struct sja1105_tas_data *tas_data = work_to_sja1105_tas(work);
 655         struct sja1105_private *priv = tas_to_sja1105(tas_data);
 656         struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
 657         struct timespec64 base_time_ts, now_ts;
 658         struct dsa_switch *ds = priv->ds;
 659         struct timespec64 diff;
 660         s64 base_time, now;
 661         int rc = 0;
 662
 663         mutex_lock(&ptp_data->lock);
 664
 665         switch (tas_data->state) {
 666         case SJA1105_TAS_STATE_DISABLED:
 667                 /* Can't do anything at all if clock is still being stepped */
 668                 if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ)
 669                         break;
 670
 671                 rc = sja1105_tas_adjust_drift(priv, tas_data->max_cycle_time);
 672                 if (rc < 0)
 673                         break;
 674
 675                 rc = __sja1105_ptp_gettimex(ds, &now, NULL);
 676                 if (rc < 0)
 677                         break;
 678
 679                 /* Plan to start the earliest schedule first. The others
 680                  * will be started in hardware, by way of their respective
 681                  * entry points delta.
 682                  * Try our best to avoid fringe cases (race condition between
 683                  * ptpschtm and ptpstrtsch) by pushing the oper_base_time at
 684                  * least one second in the future from now. This is not ideal,
 685                  * but this only needs to buy us time until the
 686                  * sja1105_tas_start command below gets executed.
 687                  */
 688                 base_time = future_base_time(tas_data->earliest_base_time,
 689                                              tas_data->max_cycle_time,
 690                                              now + 1ull * NSEC_PER_SEC);
 691                 base_time -= sja1105_delta_to_ns(1);
 692
 693                 rc = sja1105_tas_set_base_time(priv, base_time);
 694                 if (rc < 0)
 695                         break;
 696
 697                 tas_data->oper_base_time = base_time;
 698
 699                 rc = sja1105_tas_start(priv);
 700                 if (rc < 0)
 701                         break;
 702
 703                 base_time_ts = ns_to_timespec64(base_time);
 704                 now_ts = ns_to_timespec64(now);
 705
 706                 dev_dbg(ds->dev, "OPER base time %lld.%09ld (now %lld.%09ld)\n",
 707                         base_time_ts.tv_sec, base_time_ts.tv_nsec,
 708                         now_ts.tv_sec, now_ts.tv_nsec);
 709
 710                 break;
 711
 712         case SJA1105_TAS_STATE_ENABLED_NOT_RUNNING:
 713                 if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
 714                         /* Clock was stepped.. bad news for TAS */
 715                         sja1105_tas_stop(priv);
 716                         break;
 717                 }
 718
 719                 /* Check if TAS has actually started, by comparing the
 720                  * scheduled start time with the SJA1105 PTP clock
 721                  */
 722                 rc = __sja1105_ptp_gettimex(ds, &now, NULL);
 723                 if (rc < 0)
 724                         break;
 725
 726                 if (now < tas_data->oper_base_time) {
 727                         /* TAS has not started yet */
 728                         diff = ns_to_timespec64(tas_data->oper_base_time - now);
 729                         dev_dbg(ds->dev, "time to start: [%lld.%09ld]",
 730                                 diff.tv_sec, diff.tv_nsec);
 731                         break;
 732                 }
 733
 734                 /* Time elapsed, what happened? */
 735                 rc = sja1105_tas_check_running(priv);
 736                 if (rc < 0)
 737                         break;
 738
 739                 if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
 740                         /* TAS has started */
 741                         dev_err(ds->dev,
 742                                 "TAS not started despite time elapsed\n");
 743
 744                 break;
 745
 746         case SJA1105_TAS_STATE_RUNNING:
 747                 /* Clock was stepped.. bad news for TAS */
 748                 if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
 749                         sja1105_tas_stop(priv);
 750                         break;
 751                 }
 752
 753                 rc = sja1105_tas_check_running(priv);
 754                 if (rc < 0)
 755                         break;
 756
 757                 if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
 758                         dev_err(ds->dev, "TAS surprisingly stopped\n");
 759
 760                 break;
 761
 762         default:
 763                 if (net_ratelimit())
 764                         dev_err(ds->dev, "TAS in an invalid state (incorrect use of API)!\n");
 765         }
 766
 767         if (rc && net_ratelimit())
 768                 dev_err(ds->dev, "An operation returned %d\n", rc);
 769
 770         mutex_unlock(&ptp_data->lock);
 771 }
 772
 773 void sja1105_tas_clockstep(struct dsa_switch *ds)
 774 {
 775         struct sja1105_private *priv = ds->priv;
 776         struct sja1105_tas_data *tas_data = &priv->tas_data;
 777
 778         if (!tas_data->enabled)
 779                 return;
 780
 781         tas_data->last_op = SJA1105_PTP_CLOCKSTEP;
 782         schedule_work(&tas_data->tas_work);
 783 }
 784
 785 void sja1105_tas_adjfreq(struct dsa_switch *ds)
 786 {
 787         struct sja1105_private *priv = ds->priv;
 788         struct sja1105_tas_data *tas_data = &priv->tas_data;
 789
 790         if (!tas_data->enabled)
 791                 return;
 792
 793         /* No reason to schedule the workqueue, nothing changed */
 794         if (tas_data->state == SJA1105_TAS_STATE_RUNNING)
 795                 return;
 796
 797         tas_data->last_op = SJA1105_PTP_ADJUSTFREQ;
 798         schedule_work(&tas_data->tas_work);
 799 }
 800
 801 void sja1105_tas_setup(struct dsa_switch *ds)
 802 {
 803         struct sja1105_private *priv = ds->priv;
 804         struct sja1105_tas_data *tas_data = &priv->tas_data;
 805
 806         INIT_WORK(&tas_data->tas_work, sja1105_tas_state_machine);
 807         tas_data->state = SJA1105_TAS_STATE_DISABLED;
 808         tas_data->last_op = SJA1105_PTP_NONE;
 809 }
 810
 811 void sja1105_tas_teardown(struct dsa_switch *ds)
 812 {
 813         struct sja1105_private *priv = ds->priv;
 814         struct tc_taprio_qopt_offload *offload;
 815         int port;
 816
 817         cancel_work_sync(&priv->tas_data.tas_work);
 818
 819         for (port = 0; port < SJA1105_NUM_PORTS; port++) {
 820                 offload = priv->tas_data.offload[port];
 821                 if (!offload)
 822                         continue;
 823
 824                 taprio_offload_free(offload);
 825         }
 826 }