spec/main.tex

   1 \documentclass{article}
   2 \usepackage{palatino}
   3
   4 \author{Kristian Høgsberg\\
   5 \texttt{krh@bitplanet.net}
   6 }
   7
   8 \title{The Wayland Display Server}
   9
  10 \begin{document}
  11
  12 \maketitle
  13
  14 \section{Wayland Overview}
  15
  16 \begin{itemize}
  17 \item wayland is a protocol for a new display server.
  18 \item wayland is an implementation
  19 \end{itemize}
  20
  21 \subsection{Replacing X11}
  22
  23 Over the last 10 years, a lot of functionality have slowly moved out
  24 of the X server and into libraries or kernel drivers. It started with
  25 freetype and fontconfig providing an alternative to the core X fonts
  26 and direct rendering OpenGL as a graphics driver in a client side
  27 library. Then cairo came along and provided a modern 2D rendering
  28 library independent of X and compositing managers took over control of
  29 the rendering of the desktop. Recently with GEM and KMS in the Linux
  30 kernel, we can do modesetting outside X and schedule several direct
  31 rendering clients. The end result is a highly modular graphics stack.
  32
  33 Wayland is a new display server building on top of all those
  34 components. We’re trying to distill out the functionality in the X
  35 server that is still used by the modern Linux desktop. This turns out
  36 to be not a whole lot. Applications can allocate their own off-screen
  37 buffers and render their window contents by themselves. In the end,
  38 what’s needed is a way to present the resulting window surface to a
  39 compositor and a way to receive input. This is what Wayland provides,
  40 by piecing together the components already in the eco-system in a
  41 slightly different way.
  42
  43 X will always be relevant, in the same way Fortran compilers and VRML
  44 browsers are, but it’s time that we think about moving it out of the
  45 critical path and provide it as an optional component for legacy
  46 applications.
  47
  48
  49 \section{Wayland protocol}
  50
  51 \subsection{Basic Principles}
  52
  53 The wayland protocol is an asynchronous object oriented protocol.  All
  54 requests are method invocations on some object.  The request include
  55 an object id that uniquely identifies an object on the server.  Each
  56 object implements an interface and the requests include an opcode that
  57 identifies which method in the interface to invoke.
  58
  59 The wire protocol is determined from the C prototypes of the requests
  60 and events.  There is a straight forward mapping from the C types to
  61 packing the bytes in the request written to the socket.  It is
  62 possible to map the events and requests to function calls in other
  63 languages, but that hasn't been done at this point.
  64
  65 The server sends back events to the client, each event is emitted from
  66 an object.  Events can be error conditions.  The event includes the
  67 object id and the event opcode, from which the client can determine
  68 the type of event.  Events are generated both in repsonse to a request
  69 (in which case the request and the event constitutes a round trip) or
  70 spontanously when the server state changes.
  71
  72 \begin{itemize}
  73 \item state is broadcast on connect, events sent out when state
  74   change.  client must listen for these changes and cache the state.
  75   no need (or mechanism) to query server state.
  76
  77 \item server will broadcast presence of a number of global objects,
  78   which in turn will broadcast their current state
  79 \end{itemize}
  80
  81 \subsection{Connect Time}
  82
  83 \begin{itemize}
  84 \item no fixed format connect block, the server emits a bunch of
  85   events at connect time
  86 \item presence events for global objects: output, compositor, input
  87   devices
  88 \end{itemize}
  89 \subsection{Security and Authentication}
  90
  91 \begin{itemize}
  92 \item mostly about access to underlying buffers, need new drm auth
  93   mechanism (the grant-to ioctl idea), need to check the cmd stream?
  94
  95 \item getting the server socket depends on the compositor type, could
  96   be a system wide name, through fd passing on the session dbus. or
  97   the client is forked by the compositor and the fd is already opened.
  98 \end{itemize}
  99
 100 \subsection{Creating Objects}
 101
 102 \begin{itemize}
 103 \item client allocates object ID, uses range protocol
 104 \item server tracks how many IDs are left in current range, sends new
 105   range when client is about to run out.
 106 \end{itemize}
 107
 108 \subsection{Compositor}
 109
 110 The compositor is a global object, advertised at connect time.
 111
 112 \begin{tabular}{l}
 113   \hline
 114   Interface \texttt{compositor} \\ \hline
 115   Requests \\ \hline
 116   \texttt{create\_surface(id)} \\
 117   \texttt{commit()} \\ \hline
 118   Events \\ \hline
 119   \texttt{device(device)} \\
 120   \texttt{acknowledge(key, frame)} \\
 121   \texttt{frame(frame, time)} \\ \hline
 122 \end{tabular}
 123
 124
 125 \begin{itemize}
 126 \item a global object
 127 \item broadcasts drm file name, or at least a string like drm:/dev/card0
 128 \item commit/ack/frame protocol
 129 \end{itemize}
 130
 131 \subsection{Surface}
 132
 133 Created by the client.
 134
 135 \begin{tabular}{l}
 136   \hline
 137   Interface \texttt{surface} \\ \hline
 138   Requests \\ \hline
 139   \texttt{destroy()} \\
 140   \texttt{attach()} \\
 141   \texttt{map()} \\
 142   \texttt{damage()} \\ \hline
 143   Events \\ \hline
 144   no events \\ \hline
 145 \end{tabular}
 146
 147 Needs a way to set input region, opaque region.
 148
 149 \subsection{Input}
 150
 151 Represents a group of input devices, including mice, keyboards.  Has a
 152 keyboard and pointer focus.  Global object.  Pointer events are
 153 delivered in both screen coordinates and surface local coordinates.
 154
 155 \begin{tabular}{l}
 156   \hline
 157   Interface \texttt{cache} \\ \hline
 158   Requests \\ \hline
 159   no requests \\ \hline
 160   Events \\ \hline
 161   \texttt{motion(x, y, sx, sy)} \\
 162   \texttt{button(button, state, x, y, sx, sy)} \\
 163   \texttt{key(key, state)} \\
 164   \texttt{pointer\_focus(surface)} \\
 165   \texttt{keyboard\_focus(surface, keys)} \\ \hline
 166 \end{tabular}
 167
 168
 169 \item input group, keyboard, mouse
 170 \item keyboard map, change events
 171 \item pointer motion
 172 \item enter, leave, focus
 173 Talk about:
 174
 175 \begin{itemize}
 176 \item xkb on wayland
 177 \item multi pointer wayland
 178 \end{itemize}
 179
 180 A surface can change the pointer image when the surface is the pointer
 181 focus of the input device.  Wayland doesn't automatically change the
 182 pointer image when a pointer enters a surface, but expects the
 183 application to set the cursor it wants in response the the motion
 184 event.  The rationale is that a client has to manage changing pointer
 185 images for UI elements within the surface in response to motion events
 186 anyway, so we'll make that the only mechanism for setting changing the
 187 pointer image.  If the server receives a request to set the pointer
 188 image after the surface loses pointer focus, the request is ignored.
 189 To the client this will look like it successfully set the pointer
 190 image.
 191
 192 The compositor will revert the pointer image back to a default image
 193 when no surface has the pointer focus for that device.  Clients can
 194 revert the pointer image back to the default image by setting a NULL
 195 image.
 196
 197 What if the pointer moves from one window which has set a special
 198 pointer image to a surface that doesn't set an image in response to
 199 the motion event?  The new surface will be stuck with the special
 200 pointer image.  We can't just revert the pointer image on leaving a
 201 surface, since if we immediately enter a surface that sets a different
 202 image, the image will flicker.  Broken app, I suppose.
 203
 204 \subsection{Output}
 205
 206 A output is a global object, advertised at connect time or as they
 207 come and go.
 208
 209 \begin{tabular}{l}
 210   \hline
 211   Interface \texttt{output} \\ \hline
 212   Requests \\ \hline
 213   no requests \\ \hline
 214   Events \\ \hline
 215   \texttt{geometry(width, height)} \\ \hline
 216 \end{tabular}
 217
 218 \begin{itemize}
 219 \item laid out in a big (compositor) coordinate system
 220 \item basically xrandr over wayland
 221 \item geometry needs position in compositor coordinate system\
 222 \item events to advertise available modes, requests to move and change
 223   modes
 224 \end{itemize}
 225
 226 \subsection{Shared object cache}
 227
 228 Cache for sharing glyphs, icons, cursors across clients.  Lets clients
 229 share identical objects.  The cache is a global object, advertised at
 230 connect time.
 231
 232 \begin{tabular}{l}
 233   \hline
 234   Interface \texttt{cache} \\ \hline
 235   Requests \\ \hline
 236   \texttt{upload(key, visual, bo, stride, width, height)} \\ \hline
 237   Events \\ \hline
 238   \texttt{item(key, bo, x, y, stride)} \\
 239   \texttt{retire(bo)} \\ \hline
 240 \end{tabular}
 241
 242 \begin{itemize}
 243
 244 \item Upload by passing a visual, bo, stride, width, height to the
 245   cache.
 246
 247 \item Upload returns a bo name, stride, and x, y location of object in
 248   the buffer.  Clients take a reference on the atlas bo.
 249
 250 \item Shared objects are refcounted, freed by client (when purging
 251   glyphs from the local cache) or when a client exits.
 252
 253 \item Server can't delete individual items from an atlas, but it can
 254   throw out an entire atlas bo if it becomes too sparse.  The server
 255   sends out an \texttt{retire} event when this happens, and clients
 256   must throw away any objects from that bo and reupload.  Between the
 257   server dropping the atlas and the client receiving the retire event,
 258   clients can still legally use the old atlas since they have a ref on
 259   the bo.
 260
 261 \item cairo needs to hook into the glyph cache, and maybe also a way
 262   to create a read-only surface based on an object form the cache
 263   (icons).
 264
 265   \texttt{cairo\_wayland\_create\_cached\_surface(surface-data)}.
 266
 267 \end{itemize}
 268
 269
 270 \subsection{Drag and Drop}
 271
 272 Multi-device aware. Orthogonal to rest of wayland, as it is its own
 273 toplevel object.  Since the compositor determines the drag target, it
 274 works with transformed surfaces (dragging to a scaled down window in
 275 expose mode, for example).
 276
 277 Issues:
 278
 279 \begin{itemize}
 280 \item we can set the cursor image to the current cursor + dragged
 281   object, which will last as long as the drag, but maybe an request to
 282   attach an image to the cursor will be more convenient?
 283
 284 \item Should drag.send() destroy the object?  There's nothing to do
 285   after the data has been transferred.
 286
 287 \item How do we marshall several mime-types?  We could make the drag
 288   setup a multi-step operation: dnd.create, drag.offer(mime-type1,
 289   drag.offer(mime-type2), drag.activate().  The drag object could send
 290   multiple offer events on each motion event.  Or we could just
 291   implement an array type, but that's a pain to work with.
 292
 293 \item Middle-click drag to pop up menu?  Ctrl/Shift/Alt drag?
 294
 295 \item Send a file descriptor over the protocol to let initiator and
 296   source exchange data out of band?
 297
 298 \item Action?  Specify action when creating the drag object? Ask
 299   action?
 300 \end{itemize}
 301
 302 New objects, requests and events:
 303
 304 \begin{itemize}
 305 \item New toplevel dnd global.  One method, creates a drag object:
 306   \texttt{dnd.start(new object id, surface, input device, mime
 307     types)}. Starts drag for the device, if it's grabbed by the
 308   surface. drag ends when button is released.  Caller is responsible
 309   for destroying the drag object.
 310
 311 \item Drag object methods:
 312
 313   \texttt{drag.destroy(id)}, destroy drag object.
 314
 315   \texttt{drag.send(id, data)}, send drag data.
 316
 317   \texttt{drag.accept(id, mime type)}, accept drag offer, called by
 318   target surface.
 319
 320 \item Drag object events:
 321
 322   \texttt{drag.offer(id, mime-types)}, sent to potential destination
 323   surfaces to offer drag data.  If the device leaves the window or the
 324   originator cancels the drag, this event is sent with mime-types =
 325   NULL.
 326
 327   \texttt{drag.target(id, mime-type)}, sent to drag originator when a
 328   target surface has accepted the offer. if a previous target goes
 329   away, this event is sent with mime-type = NULL.
 330
 331   \texttt{drag.data(id, data)}, sent to target, contains dragged data.
 332   ends transaction on the target side.
 333 \end{itemize}
 334
 335 Sequence of events:
 336
 337 \begin{itemize}
 338 \item The initiator surface receives a click (which grabs the input
 339   device to that surface) and then enough motion to decide that a drag
 340   is starting.  Wayland has no subwindows, so it's entirely up to the
 341   application to decide whether or not a draggable object within the
 342   surface was clicked.
 343
 344 \item The initiator creates a drag object by calling the
 345   \texttt{create\_drag} method on the dnd global object.  As for any
 346   client created object, the client allocates the id.  The
 347   \texttt{create\_drag} method also takes the originating surface, the
 348   device that's dragging and the mime-types supported.  If the surface
 349   has indeed grabbed the device passed in, the server will create an
 350   active drag object for the device.  If the grab was released in the
 351   meantime, the drag object will be in-active, that is, the same state
 352   as when the grab is released.  In that case, the client will receive
 353   a button up event, which will let it know that the drag finished.
 354   To the client it will look like the drag was immediately cancelled
 355   by the grab ending.
 356
 357   The special mime-type application/x-root-target indicates that the
 358   initiator is looking for drag events to the root window as well.
 359
 360 \item To indicate the object being dragged, the initiator can replace
 361   the pointer image with an larger image representing the data being
 362   dragged with the cursor image overlaid.  The pointer image will
 363   remain in place as long as the grab is in effect, since the
 364   initiating surface keeps pointer focus, and no other surface
 365   receives enter events.
 366
 367 \item As long as the grab is active (or until the initiator cancels
 368   the drag by destroying the drag object), the drag object will send
 369   \texttt{offer} events to surfaces it moves across. As for motion
 370   events, these events contain the surface local coordinates of the
 371   device as well as the list of mime-types offered.  When a device
 372   leaves a surface, it will send an \texttt{offer} event with an empty
 373   list of mime-types to indicate that the device left the surface.
 374
 375 \item If a surface receives an offer event and decides that it's in an
 376   area that can accept a drag event, it should call the
 377   \texttt{accept} method on the drag object in the event.  The surface
 378   passes a mime-type in the request, picked from the list in the offer
 379   event, to indicate which of the types it wants.  At this point, the
 380   surface can update the appearance of the drop target to give
 381   feedback to the user that the drag has a valid target.  If the
 382   \texttt{offer} event moves to a different drop target (the surface
 383   decides the offer coordinates is outside the drop target) or leaves
 384   the surface (the offer event has an empty list of mime-types) it
 385   should revert the appearance of the drop target to the inactive
 386   state.  A surface can also decide to retract its drop target (if the
 387   drop target disappears or moves, for example), by calling the accept
 388   method with a NULL mime-type.
 389
 390 \item When a target surface sends an \texttt{accept} request, the drag
 391   object will send a \texttt{target} event to the initiator surface.
 392   This tells the initiator that the drag currently has a potential
 393   target and which of the offered mime-types the target wants.  The
 394   initiator can change the pointer image or drag source appearance to
 395   reflect this new state.  If the target surface retracts its drop
 396   target of if the surface disappears, a \texttt{target} event with a
 397   NULL mime-type will be sent.
 398
 399   If the initiator listed application/x-root-target as a valid
 400   mime-type, dragging into the root window will make the drag object
 401   send a \texttt{target} event with the application/x-root-target
 402   mime-type.
 403
 404 \item When the grab is released (indicated by the button release
 405   event), if the drag has an active target, the initiator calls the
 406   \texttt{send} method on the drag object to send the data to be
 407   transferred by the drag operation, in the format requested by the
 408   target.  The initiator can then destroy the drag object by calling
 409   the \texttt{destroy} method.
 410
 411 \item The drop target receives a \texttt{data} event from the drag
 412   object with the requested data.
 413 \end{itemize}
 414
 415 MIME is defined in RFC's 2045-2049. A registry of MIME types is
 416 maintained by the Internet Assigned Numbers Authority (IANA).
 417
 418 ftp://ftp.isi.edu/in-notes/iana/assignments/media-types/
 419
 420
 421 \section{Types of compositors}
 422
 423 \subsection{System Compositor}
 424
 425 \begin{itemize}
 426 \item ties in with graphical boot
 427 \item hosts different types of session compositors
 428 \item lets us switch between multiple sessions (fast user switching,
 429    secure/personal desktop switching)
 430 \item multiseat
 431 \item linux implementation using libudev, egl, kms, evdev, cairo
 432 \item for fullscreen clients, the system compositor can reprogram the
 433    video scanout address to source fromt the client provided buffer.
 434 \end{itemize}
 435
 436 \subsection{Session Compositor}
 437
 438 \begin{itemize}
 439 \item nested under the system compositor.  nesting is feasible because
 440    protocol is async, roundtrip would break nesting
 441 \item gnome-shell
 442 \item moblin
 443 \item compiz?
 444 \item kde compositor?
 445 \item text mode using vte
 446 \item rdp session
 447 \item fullscreen X session under wayland
 448 \item can run without system compositor, on the hw where it makes
 449    sense
 450 \item root window less X server, bridging X windows into a wayland
 451    session compositor
 452 \end{itemize}
 453
 454 \subsection{Embbedding Compositor}
 455
 456 X11 lets clients embed windows from other clients, or lets client copy
 457 pixmap contents rendered by another client into their window.  This is
 458 often used for applets in a panel, browser plugins and similar.
 459 Wayland doesn't directly allow this, but clients can communicate GEM
 460 buffer names out-of-band, for example, using d-bus or as command line
 461 arguments when the panel launches the applet.  Another option is to
 462 use a nested wayland instance.  For this, the wayland server will have
 463 to be a library that the host application links to.  The host
 464 application will then pass the wayland server socket name to the
 465 embedded application, and will need to implement the wayland
 466 compositor interface.  The host application composites the client
 467 surfaces as part of it's window, that is, in the web page or in the
 468 panel.  The benefit of nesting the wayland server is that it provides
 469 the requests the embedded client needs to inform the host about buffer
 470 updates and a mechanism for forwarding input events from the host
 471 application.
 472
 473 \begin{itemize}
 474 \item firefox embedding flash by being a special purpose compositor to
 475    the plugin
 476 \end{itemize}
 477
 478 \section{Implementation}
 479
 480 what's currently implemented
 481
 482 \subsection{Wayland Server Library}
 483
 484 \texttt{libwayland-server.so}
 485
 486 \begin{itemize}
 487 \item implements protocol side of a compositor
 488 \item minimal, doesn't include any rendering or input device handling
 489 \item helpers for running on egl and evdev, and for nested wayland
 490 \end{itemize}
 491
 492 \subsection{Wayland Client Library}
 493
 494 \texttt{libwayland.so}
 495
 496 \begin{itemize}
 497 \item minimal, designed to support integration with real toolkits such as
 498    Qt, GTK+ or Clutter.
 499
 500 \item doesn't cache state, but lets the toolkits cache server state in
 501    native objects (GObject or QObject or whatever).
 502 \end{itemize}
 503
 504 \subsection{Wayland System Compositor}
 505
 506 \begin{itemize}
 507 \item implementation of the system compositor
 508
 509 \item uses libudev, eagle (egl), evdev and drm
 510
 511 \item integrates with ConsoleKit, can create new sessions
 512
 513 \item allows multi seat setups
 514
 515 \item configurable through udev rules and maybe /etc/wayland.d type thing
 516 \end{itemize}
 517
 518 \subsection{X Server Session}
 519
 520 \begin{itemize}
 521 \item xserver module and driver support
 522
 523 \item uses wayland client library
 524
 525 \item same X.org server as we normally run, the front buffer is a wayland
 526    surface but all accel code, 3d and extensions are there
 527
 528 \item when full screen the session compositor will scan out from the X
 529    server wayland surface, at which point X is running pretty much as it
 530    does natively.
 531 \end{itemize}
 532
 533 \end{document}