spec/main.tex

   1 \documentclass{article}
   2 \usepackage{palatino}
   3
   4 \author{Kristian Høgsberg\\
   5 \texttt{krh@bitplanet.net}
   6 }
   7
   8 \title{The Wayland Display Server}
   9
  10 \begin{document}
  11
  12 \maketitle
  13
  14 \section{Wayland Overview}
  15
  16 \begin{itemize}
  17 \item wayland is a protocol for a new display server.
  18 \item wayland is an implementation
  19 \end{itemize}
  20
  21 \subsection{Replacing X11}
  22
  23 Over time, a lot of functionality have slowly moved out of the X
  24 server and into client-side libraries or kernel drivers. One of the
  25 first components to move out was font rendering, with freetype and
  26 fontconfig providing an alternative to the core X fonts.  Direct
  27 rendering OpenGL as a graphics driver in a client side library. Then
  28 cairo came along and provided a modern 2D rendering library
  29 independent of X and compositing managers took over control of the
  30 rendering of the desktop. Recently with GEM and KMS in the Linux
  31 kernel, we can do modesetting outside X and schedule several direct
  32 rendering clients. The end result is a highly modular graphics stack.
  33
  34 \subsection{Make the compositing manager the display server}
  35
  36 Wayland is a new display server building on top of all those
  37 components. We are trying to distill out the functionality in the X
  38 server that is still used by the modern Linux desktop. This turns out
  39 to be not a whole lot. Applications can allocate their own off-screen
  40 buffers and render their window contents by themselves. In the end,
  41 what’s needed is a way to present the resulting window surface to a
  42 compositor and a way to receive input. This is what Wayland provides,
  43 by piecing together the components already in the eco-system in a
  44 slightly different way.
  45
  46 X will always be relevant, in the same way Fortran compilers and VRML
  47 browsers are, but it’s time that we think about moving it out of the
  48 critical path and provide it as an optional component for legacy
  49 applications.
  50
  51
  52 \section{Wayland protocol}
  53
  54 \subsection{Basic Principles}
  55
  56 The wayland protocol is an asynchronous object oriented protocol.  All
  57 requests are method invocations on some object.  The request include
  58 an object id that uniquely identifies an object on the server.  Each
  59 object implements an interface and the requests include an opcode that
  60 identifies which method in the interface to invoke.
  61
  62 The wire protocol is determined from the C prototypes of the requests
  63 and events.  There is a straight forward mapping from the C types to
  64 packing the bytes in the request written to the socket.  It is
  65 possible to map the events and requests to function calls in other
  66 languages, but that hasn't been done at this point.
  67
  68 The server sends back events to the client, each event is emitted from
  69 an object.  Events can be error conditions.  The event includes the
  70 object id and the event opcode, from which the client can determine
  71 the type of event.  Events are generated both in repsonse to a request
  72 (in which case the request and the event constitutes a round trip) or
  73 spontanously when the server state changes.
  74
  75 \begin{itemize}
  76 \item state is broadcast on connect, events sent out when state
  77   change.  client must listen for these changes and cache the state.
  78   no need (or mechanism) to query server state.
  79
  80 \item server will broadcast presence of a number of global objects,
  81   which in turn will broadcast their current state
  82 \end{itemize}
  83
  84 \subsection{Connect Time}
  85
  86 \begin{itemize}
  87 \item no fixed format connect block, the server emits a bunch of
  88   events at connect time
  89 \item presence events for global objects: output, compositor, input
  90   devices
  91 \end{itemize}
  92 \subsection{Security and Authentication}
  93
  94 \begin{itemize}
  95 \item mostly about access to underlying buffers, need new drm auth
  96   mechanism (the grant-to ioctl idea), need to check the cmd stream?
  97
  98 \item getting the server socket depends on the compositor type, could
  99   be a system wide name, through fd passing on the session dbus. or
 100   the client is forked by the compositor and the fd is already opened.
 101 \end{itemize}
 102
 103 \subsection{Creating Objects}
 104
 105 \begin{itemize}
 106 \item client allocates object ID, uses range protocol
 107 \item server tracks how many IDs are left in current range, sends new
 108   range when client is about to run out.
 109 \end{itemize}
 110
 111 \subsection{Compositor}
 112
 113 The compositor is a global object, advertised at connect time.
 114
 115 \begin{tabular}{l}
 116   \hline
 117   Interface \texttt{compositor} \\ \hline
 118   Requests \\ \hline
 119   \texttt{create\_surface(id)} \\
 120   \texttt{commit()} \\ \hline
 121   Events \\ \hline
 122   \texttt{device(device)} \\
 123   \texttt{acknowledge(key, frame)} \\
 124   \texttt{frame(frame, time)} \\ \hline
 125 \end{tabular}
 126
 127
 128 \begin{itemize}
 129 \item a global object
 130 \item broadcasts drm file name, or at least a string like drm:/dev/card0
 131 \item commit/ack/frame protocol
 132 \end{itemize}
 133
 134 \subsection{Surface}
 135
 136 Created by the client.
 137
 138 \begin{tabular}{l}
 139   \hline
 140   Interface \texttt{surface} \\ \hline
 141   Requests \\ \hline
 142   \texttt{destroy()} \\
 143   \texttt{attach()} \\
 144   \texttt{map()} \\
 145   \texttt{damage()} \\ \hline
 146   Events \\ \hline
 147   no events \\ \hline
 148 \end{tabular}
 149
 150 Needs a way to set input region, opaque region.
 151
 152 \subsection{Input}
 153
 154 Represents a group of input devices, including mice, keyboards.  Has a
 155 keyboard and pointer focus.  Global object.  Pointer events are
 156 delivered in both screen coordinates and surface local coordinates.
 157
 158 \begin{tabular}{l}
 159   \hline
 160   Interface \texttt{cache} \\ \hline
 161   Requests \\ \hline
 162   \texttt{attach(buffer, x, y)} \\
 163   Events \\ \hline
 164   \texttt{motion(x, y, sx, sy)} \\
 165   \texttt{button(button, state, x, y, sx, sy)} \\
 166   \texttt{key(key, state)} \\
 167   \texttt{pointer\_focus(surface)} \\
 168   \texttt{keyboard\_focus(surface, keys)} \\ \hline
 169 \end{tabular}
 170
 171 Talk about:
 172
 173 \begin{itemize}
 174 \item keyboard map, change events
 175 \item xkb on wayland
 176 \item multi pointer wayland
 177 \end{itemize}
 178
 179 A surface can change the pointer image when the surface is the pointer
 180 focus of the input device.  Wayland doesn't automatically change the
 181 pointer image when a pointer enters a surface, but expects the
 182 application to set the cursor it wants in response the the pointer
 183 focus and motion events.  The rationale is that a client has to manage
 184 changing pointer images for UI elements within the surface in response
 185 to motion events anyway, so we'll make that the only mechanism for
 186 setting changing the pointer image.  If the server receives a request
 187 to set the pointer image after the surface loses pointer focus, the
 188 request is ignored.  To the client this will look like it successfully
 189 set the pointer image.
 190
 191 The compositor will revert the pointer image back to a default image
 192 when no surface has the pointer focus for that device.  Clients can
 193 revert the pointer image back to the default image by setting a NULL
 194 image.
 195
 196 What if the pointer moves from one window which has set a special
 197 pointer image to a surface that doesn't set an image in response to
 198 the motion event?  The new surface will be stuck with the special
 199 pointer image.  We can't just revert the pointer image on leaving a
 200 surface, since if we immediately enter a surface that sets a different
 201 image, the image will flicker.  Broken app, I suppose.
 202
 203 \subsection{Output}
 204
 205 A output is a global object, advertised at connect time or as they
 206 come and go.
 207
 208 \begin{tabular}{l}
 209   \hline
 210   Interface \texttt{output} \\ \hline
 211   Requests \\ \hline
 212   no requests \\ \hline
 213   Events \\ \hline
 214   \texttt{geometry(width, height)} \\ \hline
 215 \end{tabular}
 216
 217 \begin{itemize}
 218 \item laid out in a big (compositor) coordinate system
 219 \item basically xrandr over wayland
 220 \item geometry needs position in compositor coordinate system\
 221 \item events to advertise available modes, requests to move and change
 222   modes
 223 \end{itemize}
 224
 225 \subsection{Shared object cache}
 226
 227 Cache for sharing glyphs, icons, cursors across clients.  Lets clients
 228 share identical objects.  The cache is a global object, advertised at
 229 connect time.
 230
 231 \begin{tabular}{l}
 232   \hline
 233   Interface \texttt{cache} \\ \hline
 234   Requests \\ \hline
 235   \texttt{upload(key, visual, bo, stride, width, height)} \\ \hline
 236   Events \\ \hline
 237   \texttt{item(key, bo, x, y, stride)} \\
 238   \texttt{retire(bo)} \\ \hline
 239 \end{tabular}
 240
 241 \begin{itemize}
 242
 243 \item Upload by passing a visual, bo, stride, width, height to the
 244   cache.
 245
 246 \item Upload returns a bo name, stride, and x, y location of object in
 247   the buffer.  Clients take a reference on the atlas bo.
 248
 249 \item Shared objects are refcounted, freed by client (when purging
 250   glyphs from the local cache) or when a client exits.
 251
 252 \item Server can't delete individual items from an atlas, but it can
 253   throw out an entire atlas bo if it becomes too sparse.  The server
 254   sends out an \texttt{retire} event when this happens, and clients
 255   must throw away any objects from that bo and reupload.  Between the
 256   server dropping the atlas and the client receiving the retire event,
 257   clients can still legally use the old atlas since they have a ref on
 258   the bo.
 259
 260 \item cairo needs to hook into the glyph cache, and maybe also a way
 261   to create a read-only surface based on an object form the cache
 262   (icons).
 263
 264   \texttt{cairo\_wayland\_create\_cached\_surface(surface-data)}.
 265
 266 \end{itemize}
 267
 268
 269 \subsection{Drag and Drop}
 270
 271 Multi-device aware. Orthogonal to rest of wayland, as it is its own
 272 toplevel object.  Since the compositor determines the drag target, it
 273 works with transformed surfaces (dragging to a scaled down window in
 274 expose mode, for example).
 275
 276 Issues:
 277
 278 \begin{itemize}
 279 \item we can set the cursor image to the current cursor + dragged
 280   object, which will last as long as the drag, but maybe an request to
 281   attach an image to the cursor will be more convenient?
 282
 283 \item Should drag.send() destroy the object?  There's nothing to do
 284   after the data has been transferred.
 285
 286 \item How do we marshall several mime-types?  We could make the drag
 287   setup a multi-step operation: dnd.create, drag.offer(mime-type1,
 288   drag.offer(mime-type2), drag.activate().  The drag object could send
 289   multiple offer events on each motion event.  Or we could just
 290   implement an array type, but that's a pain to work with.
 291
 292 \item Middle-click drag to pop up menu?  Ctrl/Shift/Alt drag?
 293
 294 \item Send a file descriptor over the protocol to let initiator and
 295   source exchange data out of band?
 296
 297 \item Action?  Specify action when creating the drag object? Ask
 298   action?
 299 \end{itemize}
 300
 301 New objects, requests and events:
 302
 303 \begin{itemize}
 304 \item New toplevel dnd global.  One method, creates a drag object:
 305   \texttt{dnd.start(new object id, surface, input device, mime
 306     types)}. Starts drag for the device, if it's grabbed by the
 307   surface. drag ends when button is released.  Caller is responsible
 308   for destroying the drag object.
 309
 310 \item Drag object methods:
 311
 312   \texttt{drag.destroy(id)}, destroy drag object.
 313
 314   \texttt{drag.send(id, data)}, send drag data.
 315
 316   \texttt{drag.accept(id, mime type)}, accept drag offer, called by
 317   target surface.
 318
 319 \item Drag object events:
 320
 321   \texttt{drag.offer(id, mime-types)}, sent to potential destination
 322   surfaces to offer drag data.  If the device leaves the window or the
 323   originator cancels the drag, this event is sent with mime-types =
 324   NULL.
 325
 326   \texttt{drag.target(id, mime-type)}, sent to drag originator when a
 327   target surface has accepted the offer. if a previous target goes
 328   away, this event is sent with mime-type = NULL.
 329
 330   \texttt{drag.data(id, data)}, sent to target, contains dragged data.
 331   ends transaction on the target side.
 332 \end{itemize}
 333
 334 Sequence of events:
 335
 336 \begin{itemize}
 337 \item The initiator surface receives a click (which grabs the input
 338   device to that surface) and then enough motion to decide that a drag
 339   is starting.  Wayland has no subwindows, so it's entirely up to the
 340   application to decide whether or not a draggable object within the
 341   surface was clicked.
 342
 343 \item The initiator creates a drag object by calling the
 344   \texttt{create\_drag} method on the dnd global object.  As for any
 345   client created object, the client allocates the id.  The
 346   \texttt{create\_drag} method also takes the originating surface, the
 347   device that's dragging and the mime-types supported.  If the surface
 348   has indeed grabbed the device passed in, the server will create an
 349   active drag object for the device.  If the grab was released in the
 350   meantime, the drag object will be in-active, that is, the same state
 351   as when the grab is released.  In that case, the client will receive
 352   a button up event, which will let it know that the drag finished.
 353   To the client it will look like the drag was immediately cancelled
 354   by the grab ending.
 355
 356   The special mime-type application/x-root-target indicates that the
 357   initiator is looking for drag events to the root window as well.
 358
 359 \item To indicate the object being dragged, the initiator can replace
 360   the pointer image with an larger image representing the data being
 361   dragged with the cursor image overlaid.  The pointer image will
 362   remain in place as long as the grab is in effect, since the
 363   initiating surface keeps pointer focus, and no other surface
 364   receives enter events.
 365
 366 \item As long as the grab is active (or until the initiator cancels
 367   the drag by destroying the drag object), the drag object will send
 368   \texttt{offer} events to surfaces it moves across. As for motion
 369   events, these events contain the surface local coordinates of the
 370   device as well as the list of mime-types offered.  When a device
 371   leaves a surface, it will send an \texttt{offer} event with an empty
 372   list of mime-types to indicate that the device left the surface.
 373
 374 \item If a surface receives an offer event and decides that it's in an
 375   area that can accept a drag event, it should call the
 376   \texttt{accept} method on the drag object in the event.  The surface
 377   passes a mime-type in the request, picked from the list in the offer
 378   event, to indicate which of the types it wants.  At this point, the
 379   surface can update the appearance of the drop target to give
 380   feedback to the user that the drag has a valid target.  If the
 381   \texttt{offer} event moves to a different drop target (the surface
 382   decides the offer coordinates is outside the drop target) or leaves
 383   the surface (the offer event has an empty list of mime-types) it
 384   should revert the appearance of the drop target to the inactive
 385   state.  A surface can also decide to retract its drop target (if the
 386   drop target disappears or moves, for example), by calling the accept
 387   method with a NULL mime-type.
 388
 389 \item When a target surface sends an \texttt{accept} request, the drag
 390   object will send a \texttt{target} event to the initiator surface.
 391   This tells the initiator that the drag currently has a potential
 392   target and which of the offered mime-types the target wants.  The
 393   initiator can change the pointer image or drag source appearance to
 394   reflect this new state.  If the target surface retracts its drop
 395   target of if the surface disappears, a \texttt{target} event with a
 396   NULL mime-type will be sent.
 397
 398   If the initiator listed application/x-root-target as a valid
 399   mime-type, dragging into the root window will make the drag object
 400   send a \texttt{target} event with the application/x-root-target
 401   mime-type.
 402
 403 \item When the grab is released (indicated by the button release
 404   event), if the drag has an active target, the initiator calls the
 405   \texttt{send} method on the drag object to send the data to be
 406   transferred by the drag operation, in the format requested by the
 407   target.  The initiator can then destroy the drag object by calling
 408   the \texttt{destroy} method.
 409
 410 \item The drop target receives a \texttt{data} event from the drag
 411   object with the requested data.
 412 \end{itemize}
 413
 414 MIME is defined in RFC's 2045-2049. A registry of MIME types is
 415 maintained by the Internet Assigned Numbers Authority (IANA).
 416
 417 ftp://ftp.isi.edu/in-notes/iana/assignments/media-types/
 418
 419
 420 \section{Types of compositors}
 421
 422 \subsection{System Compositor}
 423
 424 \begin{itemize}
 425 \item ties in with graphical boot
 426 \item hosts different types of session compositors
 427 \item lets us switch between multiple sessions (fast user switching,
 428    secure/personal desktop switching)
 429 \item multiseat
 430 \item linux implementation using libudev, egl, kms, evdev, cairo
 431 \item for fullscreen clients, the system compositor can reprogram the
 432    video scanout address to source fromt the client provided buffer.
 433 \end{itemize}
 434
 435 \subsection{Session Compositor}
 436
 437 \begin{itemize}
 438 \item nested under the system compositor.  nesting is feasible because
 439    protocol is async, roundtrip would break nesting
 440 \item gnome-shell
 441 \item moblin
 442 \item compiz?
 443 \item kde compositor?
 444 \item text mode using vte
 445 \item rdp session
 446 \item fullscreen X session under wayland
 447 \item can run without system compositor, on the hw where it makes
 448    sense
 449 \item root window less X server, bridging X windows into a wayland
 450    session compositor
 451 \end{itemize}
 452
 453 \subsection{Embbedding Compositor}
 454
 455 X11 lets clients embed windows from other clients, or lets client copy
 456 pixmap contents rendered by another client into their window.  This is
 457 often used for applets in a panel, browser plugins and similar.
 458 Wayland doesn't directly allow this, but clients can communicate GEM
 459 buffer names out-of-band, for example, using d-bus or as command line
 460 arguments when the panel launches the applet.  Another option is to
 461 use a nested wayland instance.  For this, the wayland server will have
 462 to be a library that the host application links to.  The host
 463 application will then pass the wayland server socket name to the
 464 embedded application, and will need to implement the wayland
 465 compositor interface.  The host application composites the client
 466 surfaces as part of it's window, that is, in the web page or in the
 467 panel.  The benefit of nesting the wayland server is that it provides
 468 the requests the embedded client needs to inform the host about buffer
 469 updates and a mechanism for forwarding input events from the host
 470 application.
 471
 472 \begin{itemize}
 473 \item firefox embedding flash by being a special purpose compositor to
 474    the plugin
 475 \end{itemize}
 476
 477 \section{Implementation}
 478
 479 what's currently implemented
 480
 481 \subsection{Wayland Server Library}
 482
 483 \texttt{libwayland-server.so}
 484
 485 \begin{itemize}
 486 \item implements protocol side of a compositor
 487 \item minimal, doesn't include any rendering or input device handling
 488 \item helpers for running on egl and evdev, and for nested wayland
 489 \end{itemize}
 490
 491 \subsection{Wayland Client Library}
 492
 493 \texttt{libwayland.so}
 494
 495 \begin{itemize}
 496 \item minimal, designed to support integration with real toolkits such as
 497    Qt, GTK+ or Clutter.
 498
 499 \item doesn't cache state, but lets the toolkits cache server state in
 500    native objects (GObject or QObject or whatever).
 501 \end{itemize}
 502
 503 \subsection{Wayland System Compositor}
 504
 505 \begin{itemize}
 506 \item implementation of the system compositor
 507
 508 \item uses libudev, eagle (egl), evdev and drm
 509
 510 \item integrates with ConsoleKit, can create new sessions
 511
 512 \item allows multi seat setups
 513
 514 \item configurable through udev rules and maybe /etc/wayland.d type thing
 515 \end{itemize}
 516
 517 \subsection{X Server Session}
 518
 519 \begin{itemize}
 520 \item xserver module and driver support
 521
 522 \item uses wayland client library
 523
 524 \item same X.org server as we normally run, the front buffer is a wayland
 525    surface but all accel code, 3d and extensions are there
 526
 527 \item when full screen the session compositor will scan out from the X
 528    server wayland surface, at which point X is running pretty much as it
 529    does natively.
 530 \end{itemize}
 531
 532 \end{document}