...

Source file src/golang.org/x/sys/unix/syscall_linux.go

Documentation: golang.org/x/sys/unix

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Linux system calls.
     6  // This file is compiled as ordinary Go code,
     7  // but it is also input to mksyscall,
     8  // which parses the //sys lines and generates system call stubs.
     9  // Note that sometimes we use a lowercase //sys name and
    10  // wrap it in our own nicer implementation.
    11  
    12  package unix
    13  
    14  import (
    15  	"encoding/binary"
    16  	"strconv"
    17  	"syscall"
    18  	"time"
    19  	"unsafe"
    20  )
    21  
    22  /*
    23   * Wrapped
    24   */
    25  
    26  func Access(path string, mode uint32) (err error) {
    27  	return Faccessat(AT_FDCWD, path, mode, 0)
    28  }
    29  
    30  func Chmod(path string, mode uint32) (err error) {
    31  	return Fchmodat(AT_FDCWD, path, mode, 0)
    32  }
    33  
    34  func Chown(path string, uid int, gid int) (err error) {
    35  	return Fchownat(AT_FDCWD, path, uid, gid, 0)
    36  }
    37  
    38  func Creat(path string, mode uint32) (fd int, err error) {
    39  	return Open(path, O_CREAT|O_WRONLY|O_TRUNC, mode)
    40  }
    41  
    42  func EpollCreate(size int) (fd int, err error) {
    43  	if size <= 0 {
    44  		return -1, EINVAL
    45  	}
    46  	return EpollCreate1(0)
    47  }
    48  
    49  //sys	FanotifyInit(flags uint, event_f_flags uint) (fd int, err error)
    50  //sys	fanotifyMark(fd int, flags uint, mask uint64, dirFd int, pathname *byte) (err error)
    51  
    52  func FanotifyMark(fd int, flags uint, mask uint64, dirFd int, pathname string) (err error) {
    53  	if pathname == "" {
    54  		return fanotifyMark(fd, flags, mask, dirFd, nil)
    55  	}
    56  	p, err := BytePtrFromString(pathname)
    57  	if err != nil {
    58  		return err
    59  	}
    60  	return fanotifyMark(fd, flags, mask, dirFd, p)
    61  }
    62  
    63  //sys	fchmodat(dirfd int, path string, mode uint32) (err error)
    64  //sys	fchmodat2(dirfd int, path string, mode uint32, flags int) (err error)
    65  
    66  func Fchmodat(dirfd int, path string, mode uint32, flags int) error {
    67  	// Linux fchmodat doesn't support the flags parameter, but fchmodat2 does.
    68  	// Try fchmodat2 if flags are specified.
    69  	if flags != 0 {
    70  		err := fchmodat2(dirfd, path, mode, flags)
    71  		if err == ENOSYS {
    72  			// fchmodat2 isn't available. If the flags are known to be valid,
    73  			// return EOPNOTSUPP to indicate that fchmodat doesn't support them.
    74  			if flags&^(AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH) != 0 {
    75  				return EINVAL
    76  			} else if flags&(AT_SYMLINK_NOFOLLOW|AT_EMPTY_PATH) != 0 {
    77  				return EOPNOTSUPP
    78  			}
    79  		}
    80  		return err
    81  	}
    82  	return fchmodat(dirfd, path, mode)
    83  }
    84  
    85  func InotifyInit() (fd int, err error) {
    86  	return InotifyInit1(0)
    87  }
    88  
    89  //sys	ioctl(fd int, req uint, arg uintptr) (err error) = SYS_IOCTL
    90  //sys	ioctlPtr(fd int, req uint, arg unsafe.Pointer) (err error) = SYS_IOCTL
    91  
    92  // ioctl itself should not be exposed directly, but additional get/set functions
    93  // for specific types are permissible. These are defined in ioctl.go and
    94  // ioctl_linux.go.
    95  //
    96  // The third argument to ioctl is often a pointer but sometimes an integer.
    97  // Callers should use ioctlPtr when the third argument is a pointer and ioctl
    98  // when the third argument is an integer.
    99  //
   100  // TODO: some existing code incorrectly uses ioctl when it should use ioctlPtr.
   101  
   102  //sys	Linkat(olddirfd int, oldpath string, newdirfd int, newpath string, flags int) (err error)
   103  
   104  func Link(oldpath string, newpath string) (err error) {
   105  	return Linkat(AT_FDCWD, oldpath, AT_FDCWD, newpath, 0)
   106  }
   107  
   108  func Mkdir(path string, mode uint32) (err error) {
   109  	return Mkdirat(AT_FDCWD, path, mode)
   110  }
   111  
   112  func Mknod(path string, mode uint32, dev int) (err error) {
   113  	return Mknodat(AT_FDCWD, path, mode, dev)
   114  }
   115  
   116  func Open(path string, mode int, perm uint32) (fd int, err error) {
   117  	return openat(AT_FDCWD, path, mode|O_LARGEFILE, perm)
   118  }
   119  
   120  //sys	openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
   121  
   122  func Openat(dirfd int, path string, flags int, mode uint32) (fd int, err error) {
   123  	return openat(dirfd, path, flags|O_LARGEFILE, mode)
   124  }
   125  
   126  //sys	openat2(dirfd int, path string, open_how *OpenHow, size int) (fd int, err error)
   127  
   128  func Openat2(dirfd int, path string, how *OpenHow) (fd int, err error) {
   129  	return openat2(dirfd, path, how, SizeofOpenHow)
   130  }
   131  
   132  func Pipe(p []int) error {
   133  	return Pipe2(p, 0)
   134  }
   135  
   136  //sysnb	pipe2(p *[2]_C_int, flags int) (err error)
   137  
   138  func Pipe2(p []int, flags int) error {
   139  	if len(p) != 2 {
   140  		return EINVAL
   141  	}
   142  	var pp [2]_C_int
   143  	err := pipe2(&pp, flags)
   144  	if err == nil {
   145  		p[0] = int(pp[0])
   146  		p[1] = int(pp[1])
   147  	}
   148  	return err
   149  }
   150  
   151  //sys	ppoll(fds *PollFd, nfds int, timeout *Timespec, sigmask *Sigset_t) (n int, err error)
   152  
   153  func Ppoll(fds []PollFd, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
   154  	if len(fds) == 0 {
   155  		return ppoll(nil, 0, timeout, sigmask)
   156  	}
   157  	return ppoll(&fds[0], len(fds), timeout, sigmask)
   158  }
   159  
   160  func Poll(fds []PollFd, timeout int) (n int, err error) {
   161  	var ts *Timespec
   162  	if timeout >= 0 {
   163  		ts = new(Timespec)
   164  		*ts = NsecToTimespec(int64(timeout) * 1e6)
   165  	}
   166  	return Ppoll(fds, ts, nil)
   167  }
   168  
   169  //sys	Readlinkat(dirfd int, path string, buf []byte) (n int, err error)
   170  
   171  func Readlink(path string, buf []byte) (n int, err error) {
   172  	return Readlinkat(AT_FDCWD, path, buf)
   173  }
   174  
   175  func Rename(oldpath string, newpath string) (err error) {
   176  	return Renameat(AT_FDCWD, oldpath, AT_FDCWD, newpath)
   177  }
   178  
   179  func Rmdir(path string) error {
   180  	return Unlinkat(AT_FDCWD, path, AT_REMOVEDIR)
   181  }
   182  
   183  //sys	Symlinkat(oldpath string, newdirfd int, newpath string) (err error)
   184  
   185  func Symlink(oldpath string, newpath string) (err error) {
   186  	return Symlinkat(oldpath, AT_FDCWD, newpath)
   187  }
   188  
   189  func Unlink(path string) error {
   190  	return Unlinkat(AT_FDCWD, path, 0)
   191  }
   192  
   193  //sys	Unlinkat(dirfd int, path string, flags int) (err error)
   194  
   195  func Utimes(path string, tv []Timeval) error {
   196  	if tv == nil {
   197  		err := utimensat(AT_FDCWD, path, nil, 0)
   198  		if err != ENOSYS {
   199  			return err
   200  		}
   201  		return utimes(path, nil)
   202  	}
   203  	if len(tv) != 2 {
   204  		return EINVAL
   205  	}
   206  	var ts [2]Timespec
   207  	ts[0] = NsecToTimespec(TimevalToNsec(tv[0]))
   208  	ts[1] = NsecToTimespec(TimevalToNsec(tv[1]))
   209  	err := utimensat(AT_FDCWD, path, (*[2]Timespec)(unsafe.Pointer(&ts[0])), 0)
   210  	if err != ENOSYS {
   211  		return err
   212  	}
   213  	return utimes(path, (*[2]Timeval)(unsafe.Pointer(&tv[0])))
   214  }
   215  
   216  //sys	utimensat(dirfd int, path string, times *[2]Timespec, flags int) (err error)
   217  
   218  func UtimesNano(path string, ts []Timespec) error {
   219  	return UtimesNanoAt(AT_FDCWD, path, ts, 0)
   220  }
   221  
   222  func UtimesNanoAt(dirfd int, path string, ts []Timespec, flags int) error {
   223  	if ts == nil {
   224  		return utimensat(dirfd, path, nil, flags)
   225  	}
   226  	if len(ts) != 2 {
   227  		return EINVAL
   228  	}
   229  	return utimensat(dirfd, path, (*[2]Timespec)(unsafe.Pointer(&ts[0])), flags)
   230  }
   231  
   232  func Futimesat(dirfd int, path string, tv []Timeval) error {
   233  	if tv == nil {
   234  		return futimesat(dirfd, path, nil)
   235  	}
   236  	if len(tv) != 2 {
   237  		return EINVAL
   238  	}
   239  	return futimesat(dirfd, path, (*[2]Timeval)(unsafe.Pointer(&tv[0])))
   240  }
   241  
   242  func Futimes(fd int, tv []Timeval) (err error) {
   243  	// Believe it or not, this is the best we can do on Linux
   244  	// (and is what glibc does).
   245  	return Utimes("/proc/self/fd/"+strconv.Itoa(fd), tv)
   246  }
   247  
   248  const ImplementsGetwd = true
   249  
   250  //sys	Getcwd(buf []byte) (n int, err error)
   251  
   252  func Getwd() (wd string, err error) {
   253  	var buf [PathMax]byte
   254  	n, err := Getcwd(buf[0:])
   255  	if err != nil {
   256  		return "", err
   257  	}
   258  	// Getcwd returns the number of bytes written to buf, including the NUL.
   259  	if n < 1 || n > len(buf) || buf[n-1] != 0 {
   260  		return "", EINVAL
   261  	}
   262  	// In some cases, Linux can return a path that starts with the
   263  	// "(unreachable)" prefix, which can potentially be a valid relative
   264  	// path. To work around that, return ENOENT if path is not absolute.
   265  	if buf[0] != '/' {
   266  		return "", ENOENT
   267  	}
   268  
   269  	return string(buf[0 : n-1]), nil
   270  }
   271  
   272  func Getgroups() (gids []int, err error) {
   273  	n, err := getgroups(0, nil)
   274  	if err != nil {
   275  		return nil, err
   276  	}
   277  	if n == 0 {
   278  		return nil, nil
   279  	}
   280  
   281  	// Sanity check group count. Max is 1<<16 on Linux.
   282  	if n < 0 || n > 1<<20 {
   283  		return nil, EINVAL
   284  	}
   285  
   286  	a := make([]_Gid_t, n)
   287  	n, err = getgroups(n, &a[0])
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  	gids = make([]int, n)
   292  	for i, v := range a[0:n] {
   293  		gids[i] = int(v)
   294  	}
   295  	return
   296  }
   297  
   298  func Setgroups(gids []int) (err error) {
   299  	if len(gids) == 0 {
   300  		return setgroups(0, nil)
   301  	}
   302  
   303  	a := make([]_Gid_t, len(gids))
   304  	for i, v := range gids {
   305  		a[i] = _Gid_t(v)
   306  	}
   307  	return setgroups(len(a), &a[0])
   308  }
   309  
   310  type WaitStatus uint32
   311  
   312  // Wait status is 7 bits at bottom, either 0 (exited),
   313  // 0x7F (stopped), or a signal number that caused an exit.
   314  // The 0x80 bit is whether there was a core dump.
   315  // An extra number (exit code, signal causing a stop)
   316  // is in the high bits. At least that's the idea.
   317  // There are various irregularities. For example, the
   318  // "continued" status is 0xFFFF, distinguishing itself
   319  // from stopped via the core dump bit.
   320  
   321  const (
   322  	mask    = 0x7F
   323  	core    = 0x80
   324  	exited  = 0x00
   325  	stopped = 0x7F
   326  	shift   = 8
   327  )
   328  
   329  func (w WaitStatus) Exited() bool { return w&mask == exited }
   330  
   331  func (w WaitStatus) Signaled() bool { return w&mask != stopped && w&mask != exited }
   332  
   333  func (w WaitStatus) Stopped() bool { return w&0xFF == stopped }
   334  
   335  func (w WaitStatus) Continued() bool { return w == 0xFFFF }
   336  
   337  func (w WaitStatus) CoreDump() bool { return w.Signaled() && w&core != 0 }
   338  
   339  func (w WaitStatus) ExitStatus() int {
   340  	if !w.Exited() {
   341  		return -1
   342  	}
   343  	return int(w>>shift) & 0xFF
   344  }
   345  
   346  func (w WaitStatus) Signal() syscall.Signal {
   347  	if !w.Signaled() {
   348  		return -1
   349  	}
   350  	return syscall.Signal(w & mask)
   351  }
   352  
   353  func (w WaitStatus) StopSignal() syscall.Signal {
   354  	if !w.Stopped() {
   355  		return -1
   356  	}
   357  	return syscall.Signal(w>>shift) & 0xFF
   358  }
   359  
   360  func (w WaitStatus) TrapCause() int {
   361  	if w.StopSignal() != SIGTRAP {
   362  		return -1
   363  	}
   364  	return int(w>>shift) >> 8
   365  }
   366  
   367  //sys	wait4(pid int, wstatus *_C_int, options int, rusage *Rusage) (wpid int, err error)
   368  
   369  func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, err error) {
   370  	var status _C_int
   371  	wpid, err = wait4(pid, &status, options, rusage)
   372  	if wstatus != nil {
   373  		*wstatus = WaitStatus(status)
   374  	}
   375  	return
   376  }
   377  
   378  //sys	Waitid(idType int, id int, info *Siginfo, options int, rusage *Rusage) (err error)
   379  
   380  func Mkfifo(path string, mode uint32) error {
   381  	return Mknod(path, mode|S_IFIFO, 0)
   382  }
   383  
   384  func Mkfifoat(dirfd int, path string, mode uint32) error {
   385  	return Mknodat(dirfd, path, mode|S_IFIFO, 0)
   386  }
   387  
   388  func (sa *SockaddrInet4) sockaddr() (unsafe.Pointer, _Socklen, error) {
   389  	if sa.Port < 0 || sa.Port > 0xFFFF {
   390  		return nil, 0, EINVAL
   391  	}
   392  	sa.raw.Family = AF_INET
   393  	p := (*[2]byte)(unsafe.Pointer(&sa.raw.Port))
   394  	p[0] = byte(sa.Port >> 8)
   395  	p[1] = byte(sa.Port)
   396  	sa.raw.Addr = sa.Addr
   397  	return unsafe.Pointer(&sa.raw), SizeofSockaddrInet4, nil
   398  }
   399  
   400  func (sa *SockaddrInet6) sockaddr() (unsafe.Pointer, _Socklen, error) {
   401  	if sa.Port < 0 || sa.Port > 0xFFFF {
   402  		return nil, 0, EINVAL
   403  	}
   404  	sa.raw.Family = AF_INET6
   405  	p := (*[2]byte)(unsafe.Pointer(&sa.raw.Port))
   406  	p[0] = byte(sa.Port >> 8)
   407  	p[1] = byte(sa.Port)
   408  	sa.raw.Scope_id = sa.ZoneId
   409  	sa.raw.Addr = sa.Addr
   410  	return unsafe.Pointer(&sa.raw), SizeofSockaddrInet6, nil
   411  }
   412  
   413  func (sa *SockaddrUnix) sockaddr() (unsafe.Pointer, _Socklen, error) {
   414  	name := sa.Name
   415  	n := len(name)
   416  	if n >= len(sa.raw.Path) {
   417  		return nil, 0, EINVAL
   418  	}
   419  	sa.raw.Family = AF_UNIX
   420  	for i := 0; i < n; i++ {
   421  		sa.raw.Path[i] = int8(name[i])
   422  	}
   423  	// length is family (uint16), name, NUL.
   424  	sl := _Socklen(2)
   425  	if n > 0 {
   426  		sl += _Socklen(n) + 1
   427  	}
   428  	if sa.raw.Path[0] == '@' || (sa.raw.Path[0] == 0 && sl > 3) {
   429  		// Check sl > 3 so we don't change unnamed socket behavior.
   430  		sa.raw.Path[0] = 0
   431  		// Don't count trailing NUL for abstract address.
   432  		sl--
   433  	}
   434  
   435  	return unsafe.Pointer(&sa.raw), sl, nil
   436  }
   437  
   438  // SockaddrLinklayer implements the Sockaddr interface for AF_PACKET type sockets.
   439  type SockaddrLinklayer struct {
   440  	Protocol uint16
   441  	Ifindex  int
   442  	Hatype   uint16
   443  	Pkttype  uint8
   444  	Halen    uint8
   445  	Addr     [8]byte
   446  	raw      RawSockaddrLinklayer
   447  }
   448  
   449  func (sa *SockaddrLinklayer) sockaddr() (unsafe.Pointer, _Socklen, error) {
   450  	if sa.Ifindex < 0 || sa.Ifindex > 0x7fffffff {
   451  		return nil, 0, EINVAL
   452  	}
   453  	sa.raw.Family = AF_PACKET
   454  	sa.raw.Protocol = sa.Protocol
   455  	sa.raw.Ifindex = int32(sa.Ifindex)
   456  	sa.raw.Hatype = sa.Hatype
   457  	sa.raw.Pkttype = sa.Pkttype
   458  	sa.raw.Halen = sa.Halen
   459  	sa.raw.Addr = sa.Addr
   460  	return unsafe.Pointer(&sa.raw), SizeofSockaddrLinklayer, nil
   461  }
   462  
   463  // SockaddrNetlink implements the Sockaddr interface for AF_NETLINK type sockets.
   464  type SockaddrNetlink struct {
   465  	Family uint16
   466  	Pad    uint16
   467  	Pid    uint32
   468  	Groups uint32
   469  	raw    RawSockaddrNetlink
   470  }
   471  
   472  func (sa *SockaddrNetlink) sockaddr() (unsafe.Pointer, _Socklen, error) {
   473  	sa.raw.Family = AF_NETLINK
   474  	sa.raw.Pad = sa.Pad
   475  	sa.raw.Pid = sa.Pid
   476  	sa.raw.Groups = sa.Groups
   477  	return unsafe.Pointer(&sa.raw), SizeofSockaddrNetlink, nil
   478  }
   479  
   480  // SockaddrHCI implements the Sockaddr interface for AF_BLUETOOTH type sockets
   481  // using the HCI protocol.
   482  type SockaddrHCI struct {
   483  	Dev     uint16
   484  	Channel uint16
   485  	raw     RawSockaddrHCI
   486  }
   487  
   488  func (sa *SockaddrHCI) sockaddr() (unsafe.Pointer, _Socklen, error) {
   489  	sa.raw.Family = AF_BLUETOOTH
   490  	sa.raw.Dev = sa.Dev
   491  	sa.raw.Channel = sa.Channel
   492  	return unsafe.Pointer(&sa.raw), SizeofSockaddrHCI, nil
   493  }
   494  
   495  // SockaddrL2 implements the Sockaddr interface for AF_BLUETOOTH type sockets
   496  // using the L2CAP protocol.
   497  type SockaddrL2 struct {
   498  	PSM      uint16
   499  	CID      uint16
   500  	Addr     [6]uint8
   501  	AddrType uint8
   502  	raw      RawSockaddrL2
   503  }
   504  
   505  func (sa *SockaddrL2) sockaddr() (unsafe.Pointer, _Socklen, error) {
   506  	sa.raw.Family = AF_BLUETOOTH
   507  	psm := (*[2]byte)(unsafe.Pointer(&sa.raw.Psm))
   508  	psm[0] = byte(sa.PSM)
   509  	psm[1] = byte(sa.PSM >> 8)
   510  	for i := 0; i < len(sa.Addr); i++ {
   511  		sa.raw.Bdaddr[i] = sa.Addr[len(sa.Addr)-1-i]
   512  	}
   513  	cid := (*[2]byte)(unsafe.Pointer(&sa.raw.Cid))
   514  	cid[0] = byte(sa.CID)
   515  	cid[1] = byte(sa.CID >> 8)
   516  	sa.raw.Bdaddr_type = sa.AddrType
   517  	return unsafe.Pointer(&sa.raw), SizeofSockaddrL2, nil
   518  }
   519  
   520  // SockaddrRFCOMM implements the Sockaddr interface for AF_BLUETOOTH type sockets
   521  // using the RFCOMM protocol.
   522  //
   523  // Server example:
   524  //
   525  //	fd, _ := Socket(AF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM)
   526  //	_ = unix.Bind(fd, &unix.SockaddrRFCOMM{
   527  //		Channel: 1,
   528  //		Addr:    [6]uint8{0, 0, 0, 0, 0, 0}, // BDADDR_ANY or 00:00:00:00:00:00
   529  //	})
   530  //	_ = Listen(fd, 1)
   531  //	nfd, sa, _ := Accept(fd)
   532  //	fmt.Printf("conn addr=%v fd=%d", sa.(*unix.SockaddrRFCOMM).Addr, nfd)
   533  //	Read(nfd, buf)
   534  //
   535  // Client example:
   536  //
   537  //	fd, _ := Socket(AF_BLUETOOTH, SOCK_STREAM, BTPROTO_RFCOMM)
   538  //	_ = Connect(fd, &SockaddrRFCOMM{
   539  //		Channel: 1,
   540  //		Addr:    [6]byte{0x11, 0x22, 0x33, 0xaa, 0xbb, 0xcc}, // CC:BB:AA:33:22:11
   541  //	})
   542  //	Write(fd, []byte(`hello`))
   543  type SockaddrRFCOMM struct {
   544  	// Addr represents a bluetooth address, byte ordering is little-endian.
   545  	Addr [6]uint8
   546  
   547  	// Channel is a designated bluetooth channel, only 1-30 are available for use.
   548  	// Since Linux 2.6.7 and further zero value is the first available channel.
   549  	Channel uint8
   550  
   551  	raw RawSockaddrRFCOMM
   552  }
   553  
   554  func (sa *SockaddrRFCOMM) sockaddr() (unsafe.Pointer, _Socklen, error) {
   555  	sa.raw.Family = AF_BLUETOOTH
   556  	sa.raw.Channel = sa.Channel
   557  	sa.raw.Bdaddr = sa.Addr
   558  	return unsafe.Pointer(&sa.raw), SizeofSockaddrRFCOMM, nil
   559  }
   560  
   561  // SockaddrCAN implements the Sockaddr interface for AF_CAN type sockets.
   562  // The RxID and TxID fields are used for transport protocol addressing in
   563  // (CAN_TP16, CAN_TP20, CAN_MCNET, and CAN_ISOTP), they can be left with
   564  // zero values for CAN_RAW and CAN_BCM sockets as they have no meaning.
   565  //
   566  // The SockaddrCAN struct must be bound to the socket file descriptor
   567  // using Bind before the CAN socket can be used.
   568  //
   569  //	// Read one raw CAN frame
   570  //	fd, _ := Socket(AF_CAN, SOCK_RAW, CAN_RAW)
   571  //	addr := &SockaddrCAN{Ifindex: index}
   572  //	Bind(fd, addr)
   573  //	frame := make([]byte, 16)
   574  //	Read(fd, frame)
   575  //
   576  // The full SocketCAN documentation can be found in the linux kernel
   577  // archives at: https://www.kernel.org/doc/Documentation/networking/can.txt
   578  type SockaddrCAN struct {
   579  	Ifindex int
   580  	RxID    uint32
   581  	TxID    uint32
   582  	raw     RawSockaddrCAN
   583  }
   584  
   585  func (sa *SockaddrCAN) sockaddr() (unsafe.Pointer, _Socklen, error) {
   586  	if sa.Ifindex < 0 || sa.Ifindex > 0x7fffffff {
   587  		return nil, 0, EINVAL
   588  	}
   589  	sa.raw.Family = AF_CAN
   590  	sa.raw.Ifindex = int32(sa.Ifindex)
   591  	rx := (*[4]byte)(unsafe.Pointer(&sa.RxID))
   592  	for i := 0; i < 4; i++ {
   593  		sa.raw.Addr[i] = rx[i]
   594  	}
   595  	tx := (*[4]byte)(unsafe.Pointer(&sa.TxID))
   596  	for i := 0; i < 4; i++ {
   597  		sa.raw.Addr[i+4] = tx[i]
   598  	}
   599  	return unsafe.Pointer(&sa.raw), SizeofSockaddrCAN, nil
   600  }
   601  
   602  // SockaddrCANJ1939 implements the Sockaddr interface for AF_CAN using J1939
   603  // protocol (https://en.wikipedia.org/wiki/SAE_J1939). For more information
   604  // on the purposes of the fields, check the official linux kernel documentation
   605  // available here: https://www.kernel.org/doc/Documentation/networking/j1939.rst
   606  type SockaddrCANJ1939 struct {
   607  	Ifindex int
   608  	Name    uint64
   609  	PGN     uint32
   610  	Addr    uint8
   611  	raw     RawSockaddrCAN
   612  }
   613  
   614  func (sa *SockaddrCANJ1939) sockaddr() (unsafe.Pointer, _Socklen, error) {
   615  	if sa.Ifindex < 0 || sa.Ifindex > 0x7fffffff {
   616  		return nil, 0, EINVAL
   617  	}
   618  	sa.raw.Family = AF_CAN
   619  	sa.raw.Ifindex = int32(sa.Ifindex)
   620  	n := (*[8]byte)(unsafe.Pointer(&sa.Name))
   621  	for i := 0; i < 8; i++ {
   622  		sa.raw.Addr[i] = n[i]
   623  	}
   624  	p := (*[4]byte)(unsafe.Pointer(&sa.PGN))
   625  	for i := 0; i < 4; i++ {
   626  		sa.raw.Addr[i+8] = p[i]
   627  	}
   628  	sa.raw.Addr[12] = sa.Addr
   629  	return unsafe.Pointer(&sa.raw), SizeofSockaddrCAN, nil
   630  }
   631  
   632  // SockaddrALG implements the Sockaddr interface for AF_ALG type sockets.
   633  // SockaddrALG enables userspace access to the Linux kernel's cryptography
   634  // subsystem. The Type and Name fields specify which type of hash or cipher
   635  // should be used with a given socket.
   636  //
   637  // To create a file descriptor that provides access to a hash or cipher, both
   638  // Bind and Accept must be used. Once the setup process is complete, input
   639  // data can be written to the socket, processed by the kernel, and then read
   640  // back as hash output or ciphertext.
   641  //
   642  // Here is an example of using an AF_ALG socket with SHA1 hashing.
   643  // The initial socket setup process is as follows:
   644  //
   645  //	// Open a socket to perform SHA1 hashing.
   646  //	fd, _ := unix.Socket(unix.AF_ALG, unix.SOCK_SEQPACKET, 0)
   647  //	addr := &unix.SockaddrALG{Type: "hash", Name: "sha1"}
   648  //	unix.Bind(fd, addr)
   649  //	// Note: unix.Accept does not work at this time; must invoke accept()
   650  //	// manually using unix.Syscall.
   651  //	hashfd, _, _ := unix.Syscall(unix.SYS_ACCEPT, uintptr(fd), 0, 0)
   652  //
   653  // Once a file descriptor has been returned from Accept, it may be used to
   654  // perform SHA1 hashing. The descriptor is not safe for concurrent use, but
   655  // may be re-used repeatedly with subsequent Write and Read operations.
   656  //
   657  // When hashing a small byte slice or string, a single Write and Read may
   658  // be used:
   659  //
   660  //	// Assume hashfd is already configured using the setup process.
   661  //	hash := os.NewFile(hashfd, "sha1")
   662  //	// Hash an input string and read the results. Each Write discards
   663  //	// previous hash state. Read always reads the current state.
   664  //	b := make([]byte, 20)
   665  //	for i := 0; i < 2; i++ {
   666  //	    io.WriteString(hash, "Hello, world.")
   667  //	    hash.Read(b)
   668  //	    fmt.Println(hex.EncodeToString(b))
   669  //	}
   670  //	// Output:
   671  //	// 2ae01472317d1935a84797ec1983ae243fc6aa28
   672  //	// 2ae01472317d1935a84797ec1983ae243fc6aa28
   673  //
   674  // For hashing larger byte slices, or byte streams such as those read from
   675  // a file or socket, use Sendto with MSG_MORE to instruct the kernel to update
   676  // the hash digest instead of creating a new one for a given chunk and finalizing it.
   677  //
   678  //	// Assume hashfd and addr are already configured using the setup process.
   679  //	hash := os.NewFile(hashfd, "sha1")
   680  //	// Hash the contents of a file.
   681  //	f, _ := os.Open("/tmp/linux-4.10-rc7.tar.xz")
   682  //	b := make([]byte, 4096)
   683  //	for {
   684  //	    n, err := f.Read(b)
   685  //	    if err == io.EOF {
   686  //	        break
   687  //	    }
   688  //	    unix.Sendto(hashfd, b[:n], unix.MSG_MORE, addr)
   689  //	}
   690  //	hash.Read(b)
   691  //	fmt.Println(hex.EncodeToString(b))
   692  //	// Output: 85cdcad0c06eef66f805ecce353bec9accbeecc5
   693  //
   694  // For more information, see: http://www.chronox.de/crypto-API/crypto/userspace-if.html.
   695  type SockaddrALG struct {
   696  	Type    string
   697  	Name    string
   698  	Feature uint32
   699  	Mask    uint32
   700  	raw     RawSockaddrALG
   701  }
   702  
   703  func (sa *SockaddrALG) sockaddr() (unsafe.Pointer, _Socklen, error) {
   704  	// Leave room for NUL byte terminator.
   705  	if len(sa.Type) > len(sa.raw.Type)-1 {
   706  		return nil, 0, EINVAL
   707  	}
   708  	if len(sa.Name) > len(sa.raw.Name)-1 {
   709  		return nil, 0, EINVAL
   710  	}
   711  
   712  	sa.raw.Family = AF_ALG
   713  	sa.raw.Feat = sa.Feature
   714  	sa.raw.Mask = sa.Mask
   715  
   716  	copy(sa.raw.Type[:], sa.Type)
   717  	copy(sa.raw.Name[:], sa.Name)
   718  
   719  	return unsafe.Pointer(&sa.raw), SizeofSockaddrALG, nil
   720  }
   721  
   722  // SockaddrVM implements the Sockaddr interface for AF_VSOCK type sockets.
   723  // SockaddrVM provides access to Linux VM sockets: a mechanism that enables
   724  // bidirectional communication between a hypervisor and its guest virtual
   725  // machines.
   726  type SockaddrVM struct {
   727  	// CID and Port specify a context ID and port address for a VM socket.
   728  	// Guests have a unique CID, and hosts may have a well-known CID of:
   729  	//  - VMADDR_CID_HYPERVISOR: refers to the hypervisor process.
   730  	//  - VMADDR_CID_LOCAL: refers to local communication (loopback).
   731  	//  - VMADDR_CID_HOST: refers to other processes on the host.
   732  	CID   uint32
   733  	Port  uint32
   734  	Flags uint8
   735  	raw   RawSockaddrVM
   736  }
   737  
   738  func (sa *SockaddrVM) sockaddr() (unsafe.Pointer, _Socklen, error) {
   739  	sa.raw.Family = AF_VSOCK
   740  	sa.raw.Port = sa.Port
   741  	sa.raw.Cid = sa.CID
   742  	sa.raw.Flags = sa.Flags
   743  
   744  	return unsafe.Pointer(&sa.raw), SizeofSockaddrVM, nil
   745  }
   746  
   747  type SockaddrXDP struct {
   748  	Flags        uint16
   749  	Ifindex      uint32
   750  	QueueID      uint32
   751  	SharedUmemFD uint32
   752  	raw          RawSockaddrXDP
   753  }
   754  
   755  func (sa *SockaddrXDP) sockaddr() (unsafe.Pointer, _Socklen, error) {
   756  	sa.raw.Family = AF_XDP
   757  	sa.raw.Flags = sa.Flags
   758  	sa.raw.Ifindex = sa.Ifindex
   759  	sa.raw.Queue_id = sa.QueueID
   760  	sa.raw.Shared_umem_fd = sa.SharedUmemFD
   761  
   762  	return unsafe.Pointer(&sa.raw), SizeofSockaddrXDP, nil
   763  }
   764  
   765  // This constant mirrors the #define of PX_PROTO_OE in
   766  // linux/if_pppox.h. We're defining this by hand here instead of
   767  // autogenerating through mkerrors.sh because including
   768  // linux/if_pppox.h causes some declaration conflicts with other
   769  // includes (linux/if_pppox.h includes linux/in.h, which conflicts
   770  // with netinet/in.h). Given that we only need a single zero constant
   771  // out of that file, it's cleaner to just define it by hand here.
   772  const px_proto_oe = 0
   773  
   774  type SockaddrPPPoE struct {
   775  	SID    uint16
   776  	Remote []byte
   777  	Dev    string
   778  	raw    RawSockaddrPPPoX
   779  }
   780  
   781  func (sa *SockaddrPPPoE) sockaddr() (unsafe.Pointer, _Socklen, error) {
   782  	if len(sa.Remote) != 6 {
   783  		return nil, 0, EINVAL
   784  	}
   785  	if len(sa.Dev) > IFNAMSIZ-1 {
   786  		return nil, 0, EINVAL
   787  	}
   788  
   789  	*(*uint16)(unsafe.Pointer(&sa.raw[0])) = AF_PPPOX
   790  	// This next field is in host-endian byte order. We can't use the
   791  	// same unsafe pointer cast as above, because this value is not
   792  	// 32-bit aligned and some architectures don't allow unaligned
   793  	// access.
   794  	//
   795  	// However, the value of px_proto_oe is 0, so we can use
   796  	// encoding/binary helpers to write the bytes without worrying
   797  	// about the ordering.
   798  	binary.BigEndian.PutUint32(sa.raw[2:6], px_proto_oe)
   799  	// This field is deliberately big-endian, unlike the previous
   800  	// one. The kernel expects SID to be in network byte order.
   801  	binary.BigEndian.PutUint16(sa.raw[6:8], sa.SID)
   802  	copy(sa.raw[8:14], sa.Remote)
   803  	for i := 14; i < 14+IFNAMSIZ; i++ {
   804  		sa.raw[i] = 0
   805  	}
   806  	copy(sa.raw[14:], sa.Dev)
   807  	return unsafe.Pointer(&sa.raw), SizeofSockaddrPPPoX, nil
   808  }
   809  
   810  // SockaddrTIPC implements the Sockaddr interface for AF_TIPC type sockets.
   811  // For more information on TIPC, see: http://tipc.sourceforge.net/.
   812  type SockaddrTIPC struct {
   813  	// Scope is the publication scopes when binding service/service range.
   814  	// Should be set to TIPC_CLUSTER_SCOPE or TIPC_NODE_SCOPE.
   815  	Scope int
   816  
   817  	// Addr is the type of address used to manipulate a socket. Addr must be
   818  	// one of:
   819  	//  - *TIPCSocketAddr: "id" variant in the C addr union
   820  	//  - *TIPCServiceRange: "nameseq" variant in the C addr union
   821  	//  - *TIPCServiceName: "name" variant in the C addr union
   822  	//
   823  	// If nil, EINVAL will be returned when the structure is used.
   824  	Addr TIPCAddr
   825  
   826  	raw RawSockaddrTIPC
   827  }
   828  
   829  // TIPCAddr is implemented by types that can be used as an address for
   830  // SockaddrTIPC. It is only implemented by *TIPCSocketAddr, *TIPCServiceRange,
   831  // and *TIPCServiceName.
   832  type TIPCAddr interface {
   833  	tipcAddrtype() uint8
   834  	tipcAddr() [12]byte
   835  }
   836  
   837  func (sa *TIPCSocketAddr) tipcAddr() [12]byte {
   838  	var out [12]byte
   839  	copy(out[:], (*(*[unsafe.Sizeof(TIPCSocketAddr{})]byte)(unsafe.Pointer(sa)))[:])
   840  	return out
   841  }
   842  
   843  func (sa *TIPCSocketAddr) tipcAddrtype() uint8 { return TIPC_SOCKET_ADDR }
   844  
   845  func (sa *TIPCServiceRange) tipcAddr() [12]byte {
   846  	var out [12]byte
   847  	copy(out[:], (*(*[unsafe.Sizeof(TIPCServiceRange{})]byte)(unsafe.Pointer(sa)))[:])
   848  	return out
   849  }
   850  
   851  func (sa *TIPCServiceRange) tipcAddrtype() uint8 { return TIPC_SERVICE_RANGE }
   852  
   853  func (sa *TIPCServiceName) tipcAddr() [12]byte {
   854  	var out [12]byte
   855  	copy(out[:], (*(*[unsafe.Sizeof(TIPCServiceName{})]byte)(unsafe.Pointer(sa)))[:])
   856  	return out
   857  }
   858  
   859  func (sa *TIPCServiceName) tipcAddrtype() uint8 { return TIPC_SERVICE_ADDR }
   860  
   861  func (sa *SockaddrTIPC) sockaddr() (unsafe.Pointer, _Socklen, error) {
   862  	if sa.Addr == nil {
   863  		return nil, 0, EINVAL
   864  	}
   865  	sa.raw.Family = AF_TIPC
   866  	sa.raw.Scope = int8(sa.Scope)
   867  	sa.raw.Addrtype = sa.Addr.tipcAddrtype()
   868  	sa.raw.Addr = sa.Addr.tipcAddr()
   869  	return unsafe.Pointer(&sa.raw), SizeofSockaddrTIPC, nil
   870  }
   871  
   872  // SockaddrL2TPIP implements the Sockaddr interface for IPPROTO_L2TP/AF_INET sockets.
   873  type SockaddrL2TPIP struct {
   874  	Addr   [4]byte
   875  	ConnId uint32
   876  	raw    RawSockaddrL2TPIP
   877  }
   878  
   879  func (sa *SockaddrL2TPIP) sockaddr() (unsafe.Pointer, _Socklen, error) {
   880  	sa.raw.Family = AF_INET
   881  	sa.raw.Conn_id = sa.ConnId
   882  	sa.raw.Addr = sa.Addr
   883  	return unsafe.Pointer(&sa.raw), SizeofSockaddrL2TPIP, nil
   884  }
   885  
   886  // SockaddrL2TPIP6 implements the Sockaddr interface for IPPROTO_L2TP/AF_INET6 sockets.
   887  type SockaddrL2TPIP6 struct {
   888  	Addr   [16]byte
   889  	ZoneId uint32
   890  	ConnId uint32
   891  	raw    RawSockaddrL2TPIP6
   892  }
   893  
   894  func (sa *SockaddrL2TPIP6) sockaddr() (unsafe.Pointer, _Socklen, error) {
   895  	sa.raw.Family = AF_INET6
   896  	sa.raw.Conn_id = sa.ConnId
   897  	sa.raw.Scope_id = sa.ZoneId
   898  	sa.raw.Addr = sa.Addr
   899  	return unsafe.Pointer(&sa.raw), SizeofSockaddrL2TPIP6, nil
   900  }
   901  
   902  // SockaddrIUCV implements the Sockaddr interface for AF_IUCV sockets.
   903  type SockaddrIUCV struct {
   904  	UserID string
   905  	Name   string
   906  	raw    RawSockaddrIUCV
   907  }
   908  
   909  func (sa *SockaddrIUCV) sockaddr() (unsafe.Pointer, _Socklen, error) {
   910  	sa.raw.Family = AF_IUCV
   911  	// These are EBCDIC encoded by the kernel, but we still need to pad them
   912  	// with blanks. Initializing with blanks allows the caller to feed in either
   913  	// a padded or an unpadded string.
   914  	for i := 0; i < 8; i++ {
   915  		sa.raw.Nodeid[i] = ' '
   916  		sa.raw.User_id[i] = ' '
   917  		sa.raw.Name[i] = ' '
   918  	}
   919  	if len(sa.UserID) > 8 || len(sa.Name) > 8 {
   920  		return nil, 0, EINVAL
   921  	}
   922  	for i, b := range []byte(sa.UserID[:]) {
   923  		sa.raw.User_id[i] = int8(b)
   924  	}
   925  	for i, b := range []byte(sa.Name[:]) {
   926  		sa.raw.Name[i] = int8(b)
   927  	}
   928  	return unsafe.Pointer(&sa.raw), SizeofSockaddrIUCV, nil
   929  }
   930  
   931  type SockaddrNFC struct {
   932  	DeviceIdx   uint32
   933  	TargetIdx   uint32
   934  	NFCProtocol uint32
   935  	raw         RawSockaddrNFC
   936  }
   937  
   938  func (sa *SockaddrNFC) sockaddr() (unsafe.Pointer, _Socklen, error) {
   939  	sa.raw.Sa_family = AF_NFC
   940  	sa.raw.Dev_idx = sa.DeviceIdx
   941  	sa.raw.Target_idx = sa.TargetIdx
   942  	sa.raw.Nfc_protocol = sa.NFCProtocol
   943  	return unsafe.Pointer(&sa.raw), SizeofSockaddrNFC, nil
   944  }
   945  
   946  type SockaddrNFCLLCP struct {
   947  	DeviceIdx      uint32
   948  	TargetIdx      uint32
   949  	NFCProtocol    uint32
   950  	DestinationSAP uint8
   951  	SourceSAP      uint8
   952  	ServiceName    string
   953  	raw            RawSockaddrNFCLLCP
   954  }
   955  
   956  func (sa *SockaddrNFCLLCP) sockaddr() (unsafe.Pointer, _Socklen, error) {
   957  	sa.raw.Sa_family = AF_NFC
   958  	sa.raw.Dev_idx = sa.DeviceIdx
   959  	sa.raw.Target_idx = sa.TargetIdx
   960  	sa.raw.Nfc_protocol = sa.NFCProtocol
   961  	sa.raw.Dsap = sa.DestinationSAP
   962  	sa.raw.Ssap = sa.SourceSAP
   963  	if len(sa.ServiceName) > len(sa.raw.Service_name) {
   964  		return nil, 0, EINVAL
   965  	}
   966  	copy(sa.raw.Service_name[:], sa.ServiceName)
   967  	sa.raw.SetServiceNameLen(len(sa.ServiceName))
   968  	return unsafe.Pointer(&sa.raw), SizeofSockaddrNFCLLCP, nil
   969  }
   970  
   971  var socketProtocol = func(fd int) (int, error) {
   972  	return GetsockoptInt(fd, SOL_SOCKET, SO_PROTOCOL)
   973  }
   974  
   975  func anyToSockaddr(fd int, rsa *RawSockaddrAny) (Sockaddr, error) {
   976  	switch rsa.Addr.Family {
   977  	case AF_NETLINK:
   978  		pp := (*RawSockaddrNetlink)(unsafe.Pointer(rsa))
   979  		sa := new(SockaddrNetlink)
   980  		sa.Family = pp.Family
   981  		sa.Pad = pp.Pad
   982  		sa.Pid = pp.Pid
   983  		sa.Groups = pp.Groups
   984  		return sa, nil
   985  
   986  	case AF_PACKET:
   987  		pp := (*RawSockaddrLinklayer)(unsafe.Pointer(rsa))
   988  		sa := new(SockaddrLinklayer)
   989  		sa.Protocol = pp.Protocol
   990  		sa.Ifindex = int(pp.Ifindex)
   991  		sa.Hatype = pp.Hatype
   992  		sa.Pkttype = pp.Pkttype
   993  		sa.Halen = pp.Halen
   994  		sa.Addr = pp.Addr
   995  		return sa, nil
   996  
   997  	case AF_UNIX:
   998  		pp := (*RawSockaddrUnix)(unsafe.Pointer(rsa))
   999  		sa := new(SockaddrUnix)
  1000  		if pp.Path[0] == 0 {
  1001  			// "Abstract" Unix domain socket.
  1002  			// Rewrite leading NUL as @ for textual display.
  1003  			// (This is the standard convention.)
  1004  			// Not friendly to overwrite in place,
  1005  			// but the callers below don't care.
  1006  			pp.Path[0] = '@'
  1007  		}
  1008  
  1009  		// Assume path ends at NUL.
  1010  		// This is not technically the Linux semantics for
  1011  		// abstract Unix domain sockets--they are supposed
  1012  		// to be uninterpreted fixed-size binary blobs--but
  1013  		// everyone uses this convention.
  1014  		n := 0
  1015  		for n < len(pp.Path) && pp.Path[n] != 0 {
  1016  			n++
  1017  		}
  1018  		sa.Name = string(unsafe.Slice((*byte)(unsafe.Pointer(&pp.Path[0])), n))
  1019  		return sa, nil
  1020  
  1021  	case AF_INET:
  1022  		proto, err := socketProtocol(fd)
  1023  		if err != nil {
  1024  			return nil, err
  1025  		}
  1026  
  1027  		switch proto {
  1028  		case IPPROTO_L2TP:
  1029  			pp := (*RawSockaddrL2TPIP)(unsafe.Pointer(rsa))
  1030  			sa := new(SockaddrL2TPIP)
  1031  			sa.ConnId = pp.Conn_id
  1032  			sa.Addr = pp.Addr
  1033  			return sa, nil
  1034  		default:
  1035  			pp := (*RawSockaddrInet4)(unsafe.Pointer(rsa))
  1036  			sa := new(SockaddrInet4)
  1037  			p := (*[2]byte)(unsafe.Pointer(&pp.Port))
  1038  			sa.Port = int(p[0])<<8 + int(p[1])
  1039  			sa.Addr = pp.Addr
  1040  			return sa, nil
  1041  		}
  1042  
  1043  	case AF_INET6:
  1044  		proto, err := socketProtocol(fd)
  1045  		if err != nil {
  1046  			return nil, err
  1047  		}
  1048  
  1049  		switch proto {
  1050  		case IPPROTO_L2TP:
  1051  			pp := (*RawSockaddrL2TPIP6)(unsafe.Pointer(rsa))
  1052  			sa := new(SockaddrL2TPIP6)
  1053  			sa.ConnId = pp.Conn_id
  1054  			sa.ZoneId = pp.Scope_id
  1055  			sa.Addr = pp.Addr
  1056  			return sa, nil
  1057  		default:
  1058  			pp := (*RawSockaddrInet6)(unsafe.Pointer(rsa))
  1059  			sa := new(SockaddrInet6)
  1060  			p := (*[2]byte)(unsafe.Pointer(&pp.Port))
  1061  			sa.Port = int(p[0])<<8 + int(p[1])
  1062  			sa.ZoneId = pp.Scope_id
  1063  			sa.Addr = pp.Addr
  1064  			return sa, nil
  1065  		}
  1066  
  1067  	case AF_VSOCK:
  1068  		pp := (*RawSockaddrVM)(unsafe.Pointer(rsa))
  1069  		sa := &SockaddrVM{
  1070  			CID:   pp.Cid,
  1071  			Port:  pp.Port,
  1072  			Flags: pp.Flags,
  1073  		}
  1074  		return sa, nil
  1075  	case AF_BLUETOOTH:
  1076  		proto, err := socketProtocol(fd)
  1077  		if err != nil {
  1078  			return nil, err
  1079  		}
  1080  		// only BTPROTO_L2CAP and BTPROTO_RFCOMM can accept connections
  1081  		switch proto {
  1082  		case BTPROTO_L2CAP:
  1083  			pp := (*RawSockaddrL2)(unsafe.Pointer(rsa))
  1084  			sa := &SockaddrL2{
  1085  				PSM:      pp.Psm,
  1086  				CID:      pp.Cid,
  1087  				Addr:     pp.Bdaddr,
  1088  				AddrType: pp.Bdaddr_type,
  1089  			}
  1090  			return sa, nil
  1091  		case BTPROTO_RFCOMM:
  1092  			pp := (*RawSockaddrRFCOMM)(unsafe.Pointer(rsa))
  1093  			sa := &SockaddrRFCOMM{
  1094  				Channel: pp.Channel,
  1095  				Addr:    pp.Bdaddr,
  1096  			}
  1097  			return sa, nil
  1098  		}
  1099  	case AF_XDP:
  1100  		pp := (*RawSockaddrXDP)(unsafe.Pointer(rsa))
  1101  		sa := &SockaddrXDP{
  1102  			Flags:        pp.Flags,
  1103  			Ifindex:      pp.Ifindex,
  1104  			QueueID:      pp.Queue_id,
  1105  			SharedUmemFD: pp.Shared_umem_fd,
  1106  		}
  1107  		return sa, nil
  1108  	case AF_PPPOX:
  1109  		pp := (*RawSockaddrPPPoX)(unsafe.Pointer(rsa))
  1110  		if binary.BigEndian.Uint32(pp[2:6]) != px_proto_oe {
  1111  			return nil, EINVAL
  1112  		}
  1113  		sa := &SockaddrPPPoE{
  1114  			SID:    binary.BigEndian.Uint16(pp[6:8]),
  1115  			Remote: pp[8:14],
  1116  		}
  1117  		for i := 14; i < 14+IFNAMSIZ; i++ {
  1118  			if pp[i] == 0 {
  1119  				sa.Dev = string(pp[14:i])
  1120  				break
  1121  			}
  1122  		}
  1123  		return sa, nil
  1124  	case AF_TIPC:
  1125  		pp := (*RawSockaddrTIPC)(unsafe.Pointer(rsa))
  1126  
  1127  		sa := &SockaddrTIPC{
  1128  			Scope: int(pp.Scope),
  1129  		}
  1130  
  1131  		// Determine which union variant is present in pp.Addr by checking
  1132  		// pp.Addrtype.
  1133  		switch pp.Addrtype {
  1134  		case TIPC_SERVICE_RANGE:
  1135  			sa.Addr = (*TIPCServiceRange)(unsafe.Pointer(&pp.Addr))
  1136  		case TIPC_SERVICE_ADDR:
  1137  			sa.Addr = (*TIPCServiceName)(unsafe.Pointer(&pp.Addr))
  1138  		case TIPC_SOCKET_ADDR:
  1139  			sa.Addr = (*TIPCSocketAddr)(unsafe.Pointer(&pp.Addr))
  1140  		default:
  1141  			return nil, EINVAL
  1142  		}
  1143  
  1144  		return sa, nil
  1145  	case AF_IUCV:
  1146  		pp := (*RawSockaddrIUCV)(unsafe.Pointer(rsa))
  1147  
  1148  		var user [8]byte
  1149  		var name [8]byte
  1150  
  1151  		for i := 0; i < 8; i++ {
  1152  			user[i] = byte(pp.User_id[i])
  1153  			name[i] = byte(pp.Name[i])
  1154  		}
  1155  
  1156  		sa := &SockaddrIUCV{
  1157  			UserID: string(user[:]),
  1158  			Name:   string(name[:]),
  1159  		}
  1160  		return sa, nil
  1161  
  1162  	case AF_CAN:
  1163  		proto, err := socketProtocol(fd)
  1164  		if err != nil {
  1165  			return nil, err
  1166  		}
  1167  
  1168  		pp := (*RawSockaddrCAN)(unsafe.Pointer(rsa))
  1169  
  1170  		switch proto {
  1171  		case CAN_J1939:
  1172  			sa := &SockaddrCANJ1939{
  1173  				Ifindex: int(pp.Ifindex),
  1174  			}
  1175  			name := (*[8]byte)(unsafe.Pointer(&sa.Name))
  1176  			for i := 0; i < 8; i++ {
  1177  				name[i] = pp.Addr[i]
  1178  			}
  1179  			pgn := (*[4]byte)(unsafe.Pointer(&sa.PGN))
  1180  			for i := 0; i < 4; i++ {
  1181  				pgn[i] = pp.Addr[i+8]
  1182  			}
  1183  			addr := (*[1]byte)(unsafe.Pointer(&sa.Addr))
  1184  			addr[0] = pp.Addr[12]
  1185  			return sa, nil
  1186  		default:
  1187  			sa := &SockaddrCAN{
  1188  				Ifindex: int(pp.Ifindex),
  1189  			}
  1190  			rx := (*[4]byte)(unsafe.Pointer(&sa.RxID))
  1191  			for i := 0; i < 4; i++ {
  1192  				rx[i] = pp.Addr[i]
  1193  			}
  1194  			tx := (*[4]byte)(unsafe.Pointer(&sa.TxID))
  1195  			for i := 0; i < 4; i++ {
  1196  				tx[i] = pp.Addr[i+4]
  1197  			}
  1198  			return sa, nil
  1199  		}
  1200  	case AF_NFC:
  1201  		proto, err := socketProtocol(fd)
  1202  		if err != nil {
  1203  			return nil, err
  1204  		}
  1205  		switch proto {
  1206  		case NFC_SOCKPROTO_RAW:
  1207  			pp := (*RawSockaddrNFC)(unsafe.Pointer(rsa))
  1208  			sa := &SockaddrNFC{
  1209  				DeviceIdx:   pp.Dev_idx,
  1210  				TargetIdx:   pp.Target_idx,
  1211  				NFCProtocol: pp.Nfc_protocol,
  1212  			}
  1213  			return sa, nil
  1214  		case NFC_SOCKPROTO_LLCP:
  1215  			pp := (*RawSockaddrNFCLLCP)(unsafe.Pointer(rsa))
  1216  			if uint64(pp.Service_name_len) > uint64(len(pp.Service_name)) {
  1217  				return nil, EINVAL
  1218  			}
  1219  			sa := &SockaddrNFCLLCP{
  1220  				DeviceIdx:      pp.Dev_idx,
  1221  				TargetIdx:      pp.Target_idx,
  1222  				NFCProtocol:    pp.Nfc_protocol,
  1223  				DestinationSAP: pp.Dsap,
  1224  				SourceSAP:      pp.Ssap,
  1225  				ServiceName:    string(pp.Service_name[:pp.Service_name_len]),
  1226  			}
  1227  			return sa, nil
  1228  		default:
  1229  			return nil, EINVAL
  1230  		}
  1231  	}
  1232  	return nil, EAFNOSUPPORT
  1233  }
  1234  
  1235  func Accept(fd int) (nfd int, sa Sockaddr, err error) {
  1236  	var rsa RawSockaddrAny
  1237  	var len _Socklen = SizeofSockaddrAny
  1238  	nfd, err = accept4(fd, &rsa, &len, 0)
  1239  	if err != nil {
  1240  		return
  1241  	}
  1242  	sa, err = anyToSockaddr(fd, &rsa)
  1243  	if err != nil {
  1244  		Close(nfd)
  1245  		nfd = 0
  1246  	}
  1247  	return
  1248  }
  1249  
  1250  func Accept4(fd int, flags int) (nfd int, sa Sockaddr, err error) {
  1251  	var rsa RawSockaddrAny
  1252  	var len _Socklen = SizeofSockaddrAny
  1253  	nfd, err = accept4(fd, &rsa, &len, flags)
  1254  	if err != nil {
  1255  		return
  1256  	}
  1257  	if len > SizeofSockaddrAny {
  1258  		panic("RawSockaddrAny too small")
  1259  	}
  1260  	sa, err = anyToSockaddr(fd, &rsa)
  1261  	if err != nil {
  1262  		Close(nfd)
  1263  		nfd = 0
  1264  	}
  1265  	return
  1266  }
  1267  
  1268  func Getsockname(fd int) (sa Sockaddr, err error) {
  1269  	var rsa RawSockaddrAny
  1270  	var len _Socklen = SizeofSockaddrAny
  1271  	if err = getsockname(fd, &rsa, &len); err != nil {
  1272  		return
  1273  	}
  1274  	return anyToSockaddr(fd, &rsa)
  1275  }
  1276  
  1277  func GetsockoptIPMreqn(fd, level, opt int) (*IPMreqn, error) {
  1278  	var value IPMreqn
  1279  	vallen := _Socklen(SizeofIPMreqn)
  1280  	err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
  1281  	return &value, err
  1282  }
  1283  
  1284  func GetsockoptUcred(fd, level, opt int) (*Ucred, error) {
  1285  	var value Ucred
  1286  	vallen := _Socklen(SizeofUcred)
  1287  	err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
  1288  	return &value, err
  1289  }
  1290  
  1291  func GetsockoptTCPInfo(fd, level, opt int) (*TCPInfo, error) {
  1292  	var value TCPInfo
  1293  	vallen := _Socklen(SizeofTCPInfo)
  1294  	err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
  1295  	return &value, err
  1296  }
  1297  
  1298  // GetsockoptString returns the string value of the socket option opt for the
  1299  // socket associated with fd at the given socket level.
  1300  func GetsockoptString(fd, level, opt int) (string, error) {
  1301  	buf := make([]byte, 256)
  1302  	vallen := _Socklen(len(buf))
  1303  	err := getsockopt(fd, level, opt, unsafe.Pointer(&buf[0]), &vallen)
  1304  	if err != nil {
  1305  		if err == ERANGE {
  1306  			buf = make([]byte, vallen)
  1307  			err = getsockopt(fd, level, opt, unsafe.Pointer(&buf[0]), &vallen)
  1308  		}
  1309  		if err != nil {
  1310  			return "", err
  1311  		}
  1312  	}
  1313  	return ByteSliceToString(buf[:vallen]), nil
  1314  }
  1315  
  1316  func GetsockoptTpacketStats(fd, level, opt int) (*TpacketStats, error) {
  1317  	var value TpacketStats
  1318  	vallen := _Socklen(SizeofTpacketStats)
  1319  	err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
  1320  	return &value, err
  1321  }
  1322  
  1323  func GetsockoptTpacketStatsV3(fd, level, opt int) (*TpacketStatsV3, error) {
  1324  	var value TpacketStatsV3
  1325  	vallen := _Socklen(SizeofTpacketStatsV3)
  1326  	err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
  1327  	return &value, err
  1328  }
  1329  
  1330  func SetsockoptIPMreqn(fd, level, opt int, mreq *IPMreqn) (err error) {
  1331  	return setsockopt(fd, level, opt, unsafe.Pointer(mreq), unsafe.Sizeof(*mreq))
  1332  }
  1333  
  1334  func SetsockoptPacketMreq(fd, level, opt int, mreq *PacketMreq) error {
  1335  	return setsockopt(fd, level, opt, unsafe.Pointer(mreq), unsafe.Sizeof(*mreq))
  1336  }
  1337  
  1338  // SetsockoptSockFprog attaches a classic BPF or an extended BPF program to a
  1339  // socket to filter incoming packets.  See 'man 7 socket' for usage information.
  1340  func SetsockoptSockFprog(fd, level, opt int, fprog *SockFprog) error {
  1341  	return setsockopt(fd, level, opt, unsafe.Pointer(fprog), unsafe.Sizeof(*fprog))
  1342  }
  1343  
  1344  func SetsockoptCanRawFilter(fd, level, opt int, filter []CanFilter) error {
  1345  	var p unsafe.Pointer
  1346  	if len(filter) > 0 {
  1347  		p = unsafe.Pointer(&filter[0])
  1348  	}
  1349  	return setsockopt(fd, level, opt, p, uintptr(len(filter)*SizeofCanFilter))
  1350  }
  1351  
  1352  func SetsockoptTpacketReq(fd, level, opt int, tp *TpacketReq) error {
  1353  	return setsockopt(fd, level, opt, unsafe.Pointer(tp), unsafe.Sizeof(*tp))
  1354  }
  1355  
  1356  func SetsockoptTpacketReq3(fd, level, opt int, tp *TpacketReq3) error {
  1357  	return setsockopt(fd, level, opt, unsafe.Pointer(tp), unsafe.Sizeof(*tp))
  1358  }
  1359  
  1360  func SetsockoptTCPRepairOpt(fd, level, opt int, o []TCPRepairOpt) (err error) {
  1361  	if len(o) == 0 {
  1362  		return EINVAL
  1363  	}
  1364  	return setsockopt(fd, level, opt, unsafe.Pointer(&o[0]), uintptr(SizeofTCPRepairOpt*len(o)))
  1365  }
  1366  
  1367  func SetsockoptTCPMD5Sig(fd, level, opt int, s *TCPMD5Sig) error {
  1368  	return setsockopt(fd, level, opt, unsafe.Pointer(s), unsafe.Sizeof(*s))
  1369  }
  1370  
  1371  // Keyctl Commands (http://man7.org/linux/man-pages/man2/keyctl.2.html)
  1372  
  1373  // KeyctlInt calls keyctl commands in which each argument is an int.
  1374  // These commands are KEYCTL_REVOKE, KEYCTL_CHOWN, KEYCTL_CLEAR, KEYCTL_LINK,
  1375  // KEYCTL_UNLINK, KEYCTL_NEGATE, KEYCTL_SET_REQKEY_KEYRING, KEYCTL_SET_TIMEOUT,
  1376  // KEYCTL_ASSUME_AUTHORITY, KEYCTL_SESSION_TO_PARENT, KEYCTL_REJECT,
  1377  // KEYCTL_INVALIDATE, and KEYCTL_GET_PERSISTENT.
  1378  //sys	KeyctlInt(cmd int, arg2 int, arg3 int, arg4 int, arg5 int) (ret int, err error) = SYS_KEYCTL
  1379  
  1380  // KeyctlBuffer calls keyctl commands in which the third and fourth
  1381  // arguments are a buffer and its length, respectively.
  1382  // These commands are KEYCTL_UPDATE, KEYCTL_READ, and KEYCTL_INSTANTIATE.
  1383  //sys	KeyctlBuffer(cmd int, arg2 int, buf []byte, arg5 int) (ret int, err error) = SYS_KEYCTL
  1384  
  1385  // KeyctlString calls keyctl commands which return a string.
  1386  // These commands are KEYCTL_DESCRIBE and KEYCTL_GET_SECURITY.
  1387  func KeyctlString(cmd int, id int) (string, error) {
  1388  	// We must loop as the string data may change in between the syscalls.
  1389  	// We could allocate a large buffer here to reduce the chance that the
  1390  	// syscall needs to be called twice; however, this is unnecessary as
  1391  	// the performance loss is negligible.
  1392  	var buffer []byte
  1393  	for {
  1394  		// Try to fill the buffer with data
  1395  		length, err := KeyctlBuffer(cmd, id, buffer, 0)
  1396  		if err != nil {
  1397  			return "", err
  1398  		}
  1399  
  1400  		// Check if the data was written
  1401  		if length <= len(buffer) {
  1402  			// Exclude the null terminator
  1403  			return string(buffer[:length-1]), nil
  1404  		}
  1405  
  1406  		// Make a bigger buffer if needed
  1407  		buffer = make([]byte, length)
  1408  	}
  1409  }
  1410  
  1411  // Keyctl commands with special signatures.
  1412  
  1413  // KeyctlGetKeyringID implements the KEYCTL_GET_KEYRING_ID command.
  1414  // See the full documentation at:
  1415  // http://man7.org/linux/man-pages/man3/keyctl_get_keyring_ID.3.html
  1416  func KeyctlGetKeyringID(id int, create bool) (ringid int, err error) {
  1417  	createInt := 0
  1418  	if create {
  1419  		createInt = 1
  1420  	}
  1421  	return KeyctlInt(KEYCTL_GET_KEYRING_ID, id, createInt, 0, 0)
  1422  }
  1423  
  1424  // KeyctlSetperm implements the KEYCTL_SETPERM command. The perm value is the
  1425  // key handle permission mask as described in the "keyctl setperm" section of
  1426  // http://man7.org/linux/man-pages/man1/keyctl.1.html.
  1427  // See the full documentation at:
  1428  // http://man7.org/linux/man-pages/man3/keyctl_setperm.3.html
  1429  func KeyctlSetperm(id int, perm uint32) error {
  1430  	_, err := KeyctlInt(KEYCTL_SETPERM, id, int(perm), 0, 0)
  1431  	return err
  1432  }
  1433  
  1434  //sys	keyctlJoin(cmd int, arg2 string) (ret int, err error) = SYS_KEYCTL
  1435  
  1436  // KeyctlJoinSessionKeyring implements the KEYCTL_JOIN_SESSION_KEYRING command.
  1437  // See the full documentation at:
  1438  // http://man7.org/linux/man-pages/man3/keyctl_join_session_keyring.3.html
  1439  func KeyctlJoinSessionKeyring(name string) (ringid int, err error) {
  1440  	return keyctlJoin(KEYCTL_JOIN_SESSION_KEYRING, name)
  1441  }
  1442  
  1443  //sys	keyctlSearch(cmd int, arg2 int, arg3 string, arg4 string, arg5 int) (ret int, err error) = SYS_KEYCTL
  1444  
  1445  // KeyctlSearch implements the KEYCTL_SEARCH command.
  1446  // See the full documentation at:
  1447  // http://man7.org/linux/man-pages/man3/keyctl_search.3.html
  1448  func KeyctlSearch(ringid int, keyType, description string, destRingid int) (id int, err error) {
  1449  	return keyctlSearch(KEYCTL_SEARCH, ringid, keyType, description, destRingid)
  1450  }
  1451  
  1452  //sys	keyctlIOV(cmd int, arg2 int, payload []Iovec, arg5 int) (err error) = SYS_KEYCTL
  1453  
  1454  // KeyctlInstantiateIOV implements the KEYCTL_INSTANTIATE_IOV command. This
  1455  // command is similar to KEYCTL_INSTANTIATE, except that the payload is a slice
  1456  // of Iovec (each of which represents a buffer) instead of a single buffer.
  1457  // See the full documentation at:
  1458  // http://man7.org/linux/man-pages/man3/keyctl_instantiate_iov.3.html
  1459  func KeyctlInstantiateIOV(id int, payload []Iovec, ringid int) error {
  1460  	return keyctlIOV(KEYCTL_INSTANTIATE_IOV, id, payload, ringid)
  1461  }
  1462  
  1463  //sys	keyctlDH(cmd int, arg2 *KeyctlDHParams, buf []byte) (ret int, err error) = SYS_KEYCTL
  1464  
  1465  // KeyctlDHCompute implements the KEYCTL_DH_COMPUTE command. This command
  1466  // computes a Diffie-Hellman shared secret based on the provide params. The
  1467  // secret is written to the provided buffer and the returned size is the number
  1468  // of bytes written (returning an error if there is insufficient space in the
  1469  // buffer). If a nil buffer is passed in, this function returns the minimum
  1470  // buffer length needed to store the appropriate data. Note that this differs
  1471  // from KEYCTL_READ's behavior which always returns the requested payload size.
  1472  // See the full documentation at:
  1473  // http://man7.org/linux/man-pages/man3/keyctl_dh_compute.3.html
  1474  func KeyctlDHCompute(params *KeyctlDHParams, buffer []byte) (size int, err error) {
  1475  	return keyctlDH(KEYCTL_DH_COMPUTE, params, buffer)
  1476  }
  1477  
  1478  // KeyctlRestrictKeyring implements the KEYCTL_RESTRICT_KEYRING command. This
  1479  // command limits the set of keys that can be linked to the keyring, regardless
  1480  // of keyring permissions. The command requires the "setattr" permission.
  1481  //
  1482  // When called with an empty keyType the command locks the keyring, preventing
  1483  // any further keys from being linked to the keyring.
  1484  //
  1485  // The "asymmetric" keyType defines restrictions requiring key payloads to be
  1486  // DER encoded X.509 certificates signed by keys in another keyring. Restrictions
  1487  // for "asymmetric" include "builtin_trusted", "builtin_and_secondary_trusted",
  1488  // "key_or_keyring:<key>", and "key_or_keyring:<key>:chain".
  1489  //
  1490  // As of Linux 4.12, only the "asymmetric" keyType defines type-specific
  1491  // restrictions.
  1492  //
  1493  // See the full documentation at:
  1494  // http://man7.org/linux/man-pages/man3/keyctl_restrict_keyring.3.html
  1495  // http://man7.org/linux/man-pages/man2/keyctl.2.html
  1496  func KeyctlRestrictKeyring(ringid int, keyType string, restriction string) error {
  1497  	if keyType == "" {
  1498  		return keyctlRestrictKeyring(KEYCTL_RESTRICT_KEYRING, ringid)
  1499  	}
  1500  	return keyctlRestrictKeyringByType(KEYCTL_RESTRICT_KEYRING, ringid, keyType, restriction)
  1501  }
  1502  
  1503  //sys	keyctlRestrictKeyringByType(cmd int, arg2 int, keyType string, restriction string) (err error) = SYS_KEYCTL
  1504  //sys	keyctlRestrictKeyring(cmd int, arg2 int) (err error) = SYS_KEYCTL
  1505  
  1506  func recvmsgRaw(fd int, iov []Iovec, oob []byte, flags int, rsa *RawSockaddrAny) (n, oobn int, recvflags int, err error) {
  1507  	var msg Msghdr
  1508  	msg.Name = (*byte)(unsafe.Pointer(rsa))
  1509  	msg.Namelen = uint32(SizeofSockaddrAny)
  1510  	var dummy byte
  1511  	if len(oob) > 0 {
  1512  		if emptyIovecs(iov) {
  1513  			var sockType int
  1514  			sockType, err = GetsockoptInt(fd, SOL_SOCKET, SO_TYPE)
  1515  			if err != nil {
  1516  				return
  1517  			}
  1518  			// receive at least one normal byte
  1519  			if sockType != SOCK_DGRAM {
  1520  				var iova [1]Iovec
  1521  				iova[0].Base = &dummy
  1522  				iova[0].SetLen(1)
  1523  				iov = iova[:]
  1524  			}
  1525  		}
  1526  		msg.Control = &oob[0]
  1527  		msg.SetControllen(len(oob))
  1528  	}
  1529  	if len(iov) > 0 {
  1530  		msg.Iov = &iov[0]
  1531  		msg.SetIovlen(len(iov))
  1532  	}
  1533  	if n, err = recvmsg(fd, &msg, flags); err != nil {
  1534  		return
  1535  	}
  1536  	oobn = int(msg.Controllen)
  1537  	recvflags = int(msg.Flags)
  1538  	return
  1539  }
  1540  
  1541  func sendmsgN(fd int, iov []Iovec, oob []byte, ptr unsafe.Pointer, salen _Socklen, flags int) (n int, err error) {
  1542  	var msg Msghdr
  1543  	msg.Name = (*byte)(ptr)
  1544  	msg.Namelen = uint32(salen)
  1545  	var dummy byte
  1546  	var empty bool
  1547  	if len(oob) > 0 {
  1548  		empty = emptyIovecs(iov)
  1549  		if empty {
  1550  			var sockType int
  1551  			sockType, err = GetsockoptInt(fd, SOL_SOCKET, SO_TYPE)
  1552  			if err != nil {
  1553  				return 0, err
  1554  			}
  1555  			// send at least one normal byte
  1556  			if sockType != SOCK_DGRAM {
  1557  				var iova [1]Iovec
  1558  				iova[0].Base = &dummy
  1559  				iova[0].SetLen(1)
  1560  				iov = iova[:]
  1561  			}
  1562  		}
  1563  		msg.Control = &oob[0]
  1564  		msg.SetControllen(len(oob))
  1565  	}
  1566  	if len(iov) > 0 {
  1567  		msg.Iov = &iov[0]
  1568  		msg.SetIovlen(len(iov))
  1569  	}
  1570  	if n, err = sendmsg(fd, &msg, flags); err != nil {
  1571  		return 0, err
  1572  	}
  1573  	if len(oob) > 0 && empty {
  1574  		n = 0
  1575  	}
  1576  	return n, nil
  1577  }
  1578  
  1579  // BindToDevice binds the socket associated with fd to device.
  1580  func BindToDevice(fd int, device string) (err error) {
  1581  	return SetsockoptString(fd, SOL_SOCKET, SO_BINDTODEVICE, device)
  1582  }
  1583  
  1584  //sys	ptrace(request int, pid int, addr uintptr, data uintptr) (err error)
  1585  //sys	ptracePtr(request int, pid int, addr uintptr, data unsafe.Pointer) (err error) = SYS_PTRACE
  1586  
  1587  func ptracePeek(req int, pid int, addr uintptr, out []byte) (count int, err error) {
  1588  	// The peek requests are machine-size oriented, so we wrap it
  1589  	// to retrieve arbitrary-length data.
  1590  
  1591  	// The ptrace syscall differs from glibc's ptrace.
  1592  	// Peeks returns the word in *data, not as the return value.
  1593  
  1594  	var buf [SizeofPtr]byte
  1595  
  1596  	// Leading edge. PEEKTEXT/PEEKDATA don't require aligned
  1597  	// access (PEEKUSER warns that it might), but if we don't
  1598  	// align our reads, we might straddle an unmapped page
  1599  	// boundary and not get the bytes leading up to the page
  1600  	// boundary.
  1601  	n := 0
  1602  	if addr%SizeofPtr != 0 {
  1603  		err = ptracePtr(req, pid, addr-addr%SizeofPtr, unsafe.Pointer(&buf[0]))
  1604  		if err != nil {
  1605  			return 0, err
  1606  		}
  1607  		n += copy(out, buf[addr%SizeofPtr:])
  1608  		out = out[n:]
  1609  	}
  1610  
  1611  	// Remainder.
  1612  	for len(out) > 0 {
  1613  		// We use an internal buffer to guarantee alignment.
  1614  		// It's not documented if this is necessary, but we're paranoid.
  1615  		err = ptracePtr(req, pid, addr+uintptr(n), unsafe.Pointer(&buf[0]))
  1616  		if err != nil {
  1617  			return n, err
  1618  		}
  1619  		copied := copy(out, buf[0:])
  1620  		n += copied
  1621  		out = out[copied:]
  1622  	}
  1623  
  1624  	return n, nil
  1625  }
  1626  
  1627  func PtracePeekText(pid int, addr uintptr, out []byte) (count int, err error) {
  1628  	return ptracePeek(PTRACE_PEEKTEXT, pid, addr, out)
  1629  }
  1630  
  1631  func PtracePeekData(pid int, addr uintptr, out []byte) (count int, err error) {
  1632  	return ptracePeek(PTRACE_PEEKDATA, pid, addr, out)
  1633  }
  1634  
  1635  func PtracePeekUser(pid int, addr uintptr, out []byte) (count int, err error) {
  1636  	return ptracePeek(PTRACE_PEEKUSR, pid, addr, out)
  1637  }
  1638  
  1639  func ptracePoke(pokeReq int, peekReq int, pid int, addr uintptr, data []byte) (count int, err error) {
  1640  	// As for ptracePeek, we need to align our accesses to deal
  1641  	// with the possibility of straddling an invalid page.
  1642  
  1643  	// Leading edge.
  1644  	n := 0
  1645  	if addr%SizeofPtr != 0 {
  1646  		var buf [SizeofPtr]byte
  1647  		err = ptracePtr(peekReq, pid, addr-addr%SizeofPtr, unsafe.Pointer(&buf[0]))
  1648  		if err != nil {
  1649  			return 0, err
  1650  		}
  1651  		n += copy(buf[addr%SizeofPtr:], data)
  1652  		word := *((*uintptr)(unsafe.Pointer(&buf[0])))
  1653  		err = ptrace(pokeReq, pid, addr-addr%SizeofPtr, word)
  1654  		if err != nil {
  1655  			return 0, err
  1656  		}
  1657  		data = data[n:]
  1658  	}
  1659  
  1660  	// Interior.
  1661  	for len(data) > SizeofPtr {
  1662  		word := *((*uintptr)(unsafe.Pointer(&data[0])))
  1663  		err = ptrace(pokeReq, pid, addr+uintptr(n), word)
  1664  		if err != nil {
  1665  			return n, err
  1666  		}
  1667  		n += SizeofPtr
  1668  		data = data[SizeofPtr:]
  1669  	}
  1670  
  1671  	// Trailing edge.
  1672  	if len(data) > 0 {
  1673  		var buf [SizeofPtr]byte
  1674  		err = ptracePtr(peekReq, pid, addr+uintptr(n), unsafe.Pointer(&buf[0]))
  1675  		if err != nil {
  1676  			return n, err
  1677  		}
  1678  		copy(buf[0:], data)
  1679  		word := *((*uintptr)(unsafe.Pointer(&buf[0])))
  1680  		err = ptrace(pokeReq, pid, addr+uintptr(n), word)
  1681  		if err != nil {
  1682  			return n, err
  1683  		}
  1684  		n += len(data)
  1685  	}
  1686  
  1687  	return n, nil
  1688  }
  1689  
  1690  func PtracePokeText(pid int, addr uintptr, data []byte) (count int, err error) {
  1691  	return ptracePoke(PTRACE_POKETEXT, PTRACE_PEEKTEXT, pid, addr, data)
  1692  }
  1693  
  1694  func PtracePokeData(pid int, addr uintptr, data []byte) (count int, err error) {
  1695  	return ptracePoke(PTRACE_POKEDATA, PTRACE_PEEKDATA, pid, addr, data)
  1696  }
  1697  
  1698  func PtracePokeUser(pid int, addr uintptr, data []byte) (count int, err error) {
  1699  	return ptracePoke(PTRACE_POKEUSR, PTRACE_PEEKUSR, pid, addr, data)
  1700  }
  1701  
  1702  // elfNT_PRSTATUS is a copy of the debug/elf.NT_PRSTATUS constant so
  1703  // x/sys/unix doesn't need to depend on debug/elf and thus
  1704  // compress/zlib, debug/dwarf, and other packages.
  1705  const elfNT_PRSTATUS = 1
  1706  
  1707  func PtraceGetRegs(pid int, regsout *PtraceRegs) (err error) {
  1708  	var iov Iovec
  1709  	iov.Base = (*byte)(unsafe.Pointer(regsout))
  1710  	iov.SetLen(int(unsafe.Sizeof(*regsout)))
  1711  	return ptracePtr(PTRACE_GETREGSET, pid, uintptr(elfNT_PRSTATUS), unsafe.Pointer(&iov))
  1712  }
  1713  
  1714  func PtraceSetRegs(pid int, regs *PtraceRegs) (err error) {
  1715  	var iov Iovec
  1716  	iov.Base = (*byte)(unsafe.Pointer(regs))
  1717  	iov.SetLen(int(unsafe.Sizeof(*regs)))
  1718  	return ptracePtr(PTRACE_SETREGSET, pid, uintptr(elfNT_PRSTATUS), unsafe.Pointer(&iov))
  1719  }
  1720  
  1721  func PtraceSetOptions(pid int, options int) (err error) {
  1722  	return ptrace(PTRACE_SETOPTIONS, pid, 0, uintptr(options))
  1723  }
  1724  
  1725  func PtraceGetEventMsg(pid int) (msg uint, err error) {
  1726  	var data _C_long
  1727  	err = ptracePtr(PTRACE_GETEVENTMSG, pid, 0, unsafe.Pointer(&data))
  1728  	msg = uint(data)
  1729  	return
  1730  }
  1731  
  1732  func PtraceCont(pid int, signal int) (err error) {
  1733  	return ptrace(PTRACE_CONT, pid, 0, uintptr(signal))
  1734  }
  1735  
  1736  func PtraceSyscall(pid int, signal int) (err error) {
  1737  	return ptrace(PTRACE_SYSCALL, pid, 0, uintptr(signal))
  1738  }
  1739  
  1740  func PtraceSingleStep(pid int) (err error) { return ptrace(PTRACE_SINGLESTEP, pid, 0, 0) }
  1741  
  1742  func PtraceInterrupt(pid int) (err error) { return ptrace(PTRACE_INTERRUPT, pid, 0, 0) }
  1743  
  1744  func PtraceAttach(pid int) (err error) { return ptrace(PTRACE_ATTACH, pid, 0, 0) }
  1745  
  1746  func PtraceSeize(pid int) (err error) { return ptrace(PTRACE_SEIZE, pid, 0, 0) }
  1747  
  1748  func PtraceDetach(pid int) (err error) { return ptrace(PTRACE_DETACH, pid, 0, 0) }
  1749  
  1750  //sys	reboot(magic1 uint, magic2 uint, cmd int, arg string) (err error)
  1751  
  1752  func Reboot(cmd int) (err error) {
  1753  	return reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, "")
  1754  }
  1755  
  1756  func direntIno(buf []byte) (uint64, bool) {
  1757  	return readInt(buf, unsafe.Offsetof(Dirent{}.Ino), unsafe.Sizeof(Dirent{}.Ino))
  1758  }
  1759  
  1760  func direntReclen(buf []byte) (uint64, bool) {
  1761  	return readInt(buf, unsafe.Offsetof(Dirent{}.Reclen), unsafe.Sizeof(Dirent{}.Reclen))
  1762  }
  1763  
  1764  func direntNamlen(buf []byte) (uint64, bool) {
  1765  	reclen, ok := direntReclen(buf)
  1766  	if !ok {
  1767  		return 0, false
  1768  	}
  1769  	return reclen - uint64(unsafe.Offsetof(Dirent{}.Name)), true
  1770  }
  1771  
  1772  //sys	mount(source string, target string, fstype string, flags uintptr, data *byte) (err error)
  1773  
  1774  func Mount(source string, target string, fstype string, flags uintptr, data string) (err error) {
  1775  	// Certain file systems get rather angry and EINVAL if you give
  1776  	// them an empty string of data, rather than NULL.
  1777  	if data == "" {
  1778  		return mount(source, target, fstype, flags, nil)
  1779  	}
  1780  	datap, err := BytePtrFromString(data)
  1781  	if err != nil {
  1782  		return err
  1783  	}
  1784  	return mount(source, target, fstype, flags, datap)
  1785  }
  1786  
  1787  //sys	mountSetattr(dirfd int, pathname string, flags uint, attr *MountAttr, size uintptr) (err error) = SYS_MOUNT_SETATTR
  1788  
  1789  // MountSetattr is a wrapper for mount_setattr(2).
  1790  // https://man7.org/linux/man-pages/man2/mount_setattr.2.html
  1791  //
  1792  // Requires kernel >= 5.12.
  1793  func MountSetattr(dirfd int, pathname string, flags uint, attr *MountAttr) error {
  1794  	return mountSetattr(dirfd, pathname, flags, attr, unsafe.Sizeof(*attr))
  1795  }
  1796  
  1797  func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) {
  1798  	if raceenabled {
  1799  		raceReleaseMerge(unsafe.Pointer(&ioSync))
  1800  	}
  1801  	return sendfile(outfd, infd, offset, count)
  1802  }
  1803  
  1804  // Sendto
  1805  // Recvfrom
  1806  // Socketpair
  1807  
  1808  /*
  1809   * Direct access
  1810   */
  1811  //sys	Acct(path string) (err error)
  1812  //sys	AddKey(keyType string, description string, payload []byte, ringid int) (id int, err error)
  1813  //sys	Adjtimex(buf *Timex) (state int, err error)
  1814  //sysnb	Capget(hdr *CapUserHeader, data *CapUserData) (err error)
  1815  //sysnb	Capset(hdr *CapUserHeader, data *CapUserData) (err error)
  1816  //sys	Chdir(path string) (err error)
  1817  //sys	Chroot(path string) (err error)
  1818  //sys	ClockAdjtime(clockid int32, buf *Timex) (state int, err error)
  1819  //sys	ClockGetres(clockid int32, res *Timespec) (err error)
  1820  //sys	ClockGettime(clockid int32, time *Timespec) (err error)
  1821  //sys	ClockNanosleep(clockid int32, flags int, request *Timespec, remain *Timespec) (err error)
  1822  //sys	Close(fd int) (err error)
  1823  //sys	CloseRange(first uint, last uint, flags uint) (err error)
  1824  //sys	CopyFileRange(rfd int, roff *int64, wfd int, woff *int64, len int, flags int) (n int, err error)
  1825  //sys	DeleteModule(name string, flags int) (err error)
  1826  //sys	Dup(oldfd int) (fd int, err error)
  1827  
  1828  func Dup2(oldfd, newfd int) error {
  1829  	return Dup3(oldfd, newfd, 0)
  1830  }
  1831  
  1832  //sys	Dup3(oldfd int, newfd int, flags int) (err error)
  1833  //sysnb	EpollCreate1(flag int) (fd int, err error)
  1834  //sysnb	EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error)
  1835  //sys	Eventfd(initval uint, flags int) (fd int, err error) = SYS_EVENTFD2
  1836  //sys	Exit(code int) = SYS_EXIT_GROUP
  1837  //sys	Fallocate(fd int, mode uint32, off int64, len int64) (err error)
  1838  //sys	Fchdir(fd int) (err error)
  1839  //sys	Fchmod(fd int, mode uint32) (err error)
  1840  //sys	Fchownat(dirfd int, path string, uid int, gid int, flags int) (err error)
  1841  //sys	Fdatasync(fd int) (err error)
  1842  //sys	Fgetxattr(fd int, attr string, dest []byte) (sz int, err error)
  1843  //sys	FinitModule(fd int, params string, flags int) (err error)
  1844  //sys	Flistxattr(fd int, dest []byte) (sz int, err error)
  1845  //sys	Flock(fd int, how int) (err error)
  1846  //sys	Fremovexattr(fd int, attr string) (err error)
  1847  //sys	Fsetxattr(fd int, attr string, dest []byte, flags int) (err error)
  1848  //sys	Fsync(fd int) (err error)
  1849  //sys	Fsmount(fd int, flags int, mountAttrs int) (fsfd int, err error)
  1850  //sys	Fsopen(fsName string, flags int) (fd int, err error)
  1851  //sys	Fspick(dirfd int, pathName string, flags int) (fd int, err error)
  1852  //sys	Getdents(fd int, buf []byte) (n int, err error) = SYS_GETDENTS64
  1853  //sysnb	Getpgid(pid int) (pgid int, err error)
  1854  
  1855  func Getpgrp() (pid int) {
  1856  	pid, _ = Getpgid(0)
  1857  	return
  1858  }
  1859  
  1860  //sysnb	Getpid() (pid int)
  1861  //sysnb	Getppid() (ppid int)
  1862  //sys	Getpriority(which int, who int) (prio int, err error)
  1863  //sys	Getrandom(buf []byte, flags int) (n int, err error)
  1864  //sysnb	Getrusage(who int, rusage *Rusage) (err error)
  1865  //sysnb	Getsid(pid int) (sid int, err error)
  1866  //sysnb	Gettid() (tid int)
  1867  //sys	Getxattr(path string, attr string, dest []byte) (sz int, err error)
  1868  //sys	InitModule(moduleImage []byte, params string) (err error)
  1869  //sys	InotifyAddWatch(fd int, pathname string, mask uint32) (watchdesc int, err error)
  1870  //sysnb	InotifyInit1(flags int) (fd int, err error)
  1871  //sysnb	InotifyRmWatch(fd int, watchdesc uint32) (success int, err error)
  1872  //sysnb	Kill(pid int, sig syscall.Signal) (err error)
  1873  //sys	Klogctl(typ int, buf []byte) (n int, err error) = SYS_SYSLOG
  1874  //sys	Lgetxattr(path string, attr string, dest []byte) (sz int, err error)
  1875  //sys	Listxattr(path string, dest []byte) (sz int, err error)
  1876  //sys	Llistxattr(path string, dest []byte) (sz int, err error)
  1877  //sys	Lremovexattr(path string, attr string) (err error)
  1878  //sys	Lsetxattr(path string, attr string, data []byte, flags int) (err error)
  1879  //sys	MemfdCreate(name string, flags int) (fd int, err error)
  1880  //sys	Mkdirat(dirfd int, path string, mode uint32) (err error)
  1881  //sys	Mknodat(dirfd int, path string, mode uint32, dev int) (err error)
  1882  //sys	MoveMount(fromDirfd int, fromPathName string, toDirfd int, toPathName string, flags int) (err error)
  1883  //sys	Nanosleep(time *Timespec, leftover *Timespec) (err error)
  1884  //sys	OpenTree(dfd int, fileName string, flags uint) (r int, err error)
  1885  //sys	PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error)
  1886  //sys	PivotRoot(newroot string, putold string) (err error) = SYS_PIVOT_ROOT
  1887  //sys	Prctl(option int, arg2 uintptr, arg3 uintptr, arg4 uintptr, arg5 uintptr) (err error)
  1888  //sys	pselect6(nfd int, r *FdSet, w *FdSet, e *FdSet, timeout *Timespec, sigmask *sigset_argpack) (n int, err error)
  1889  //sys	read(fd int, p []byte) (n int, err error)
  1890  //sys	Removexattr(path string, attr string) (err error)
  1891  //sys	Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) (err error)
  1892  //sys	RequestKey(keyType string, description string, callback string, destRingid int) (id int, err error)
  1893  //sys	Setdomainname(p []byte) (err error)
  1894  //sys	Sethostname(p []byte) (err error)
  1895  //sysnb	Setpgid(pid int, pgid int) (err error)
  1896  //sysnb	Setsid() (pid int, err error)
  1897  //sysnb	Settimeofday(tv *Timeval) (err error)
  1898  //sys	Setns(fd int, nstype int) (err error)
  1899  
  1900  //go:linkname syscall_prlimit syscall.prlimit
  1901  func syscall_prlimit(pid, resource int, newlimit, old *syscall.Rlimit) error
  1902  
  1903  func Prlimit(pid, resource int, newlimit, old *Rlimit) error {
  1904  	// Just call the syscall version, because as of Go 1.21
  1905  	// it will affect starting a new process.
  1906  	return syscall_prlimit(pid, resource, (*syscall.Rlimit)(newlimit), (*syscall.Rlimit)(old))
  1907  }
  1908  
  1909  // PrctlRetInt performs a prctl operation specified by option and further
  1910  // optional arguments arg2 through arg5 depending on option. It returns a
  1911  // non-negative integer that is returned by the prctl syscall.
  1912  func PrctlRetInt(option int, arg2 uintptr, arg3 uintptr, arg4 uintptr, arg5 uintptr) (int, error) {
  1913  	ret, _, err := Syscall6(SYS_PRCTL, uintptr(option), uintptr(arg2), uintptr(arg3), uintptr(arg4), uintptr(arg5), 0)
  1914  	if err != 0 {
  1915  		return 0, err
  1916  	}
  1917  	return int(ret), nil
  1918  }
  1919  
  1920  func Setuid(uid int) (err error) {
  1921  	return syscall.Setuid(uid)
  1922  }
  1923  
  1924  func Setgid(gid int) (err error) {
  1925  	return syscall.Setgid(gid)
  1926  }
  1927  
  1928  func Setreuid(ruid, euid int) (err error) {
  1929  	return syscall.Setreuid(ruid, euid)
  1930  }
  1931  
  1932  func Setregid(rgid, egid int) (err error) {
  1933  	return syscall.Setregid(rgid, egid)
  1934  }
  1935  
  1936  func Setresuid(ruid, euid, suid int) (err error) {
  1937  	return syscall.Setresuid(ruid, euid, suid)
  1938  }
  1939  
  1940  func Setresgid(rgid, egid, sgid int) (err error) {
  1941  	return syscall.Setresgid(rgid, egid, sgid)
  1942  }
  1943  
  1944  // SetfsgidRetGid sets fsgid for current thread and returns previous fsgid set.
  1945  // setfsgid(2) will return a non-nil error only if its caller lacks CAP_SETUID capability.
  1946  // If the call fails due to other reasons, current fsgid will be returned.
  1947  func SetfsgidRetGid(gid int) (int, error) {
  1948  	return setfsgid(gid)
  1949  }
  1950  
  1951  // SetfsuidRetUid sets fsuid for current thread and returns previous fsuid set.
  1952  // setfsgid(2) will return a non-nil error only if its caller lacks CAP_SETUID capability
  1953  // If the call fails due to other reasons, current fsuid will be returned.
  1954  func SetfsuidRetUid(uid int) (int, error) {
  1955  	return setfsuid(uid)
  1956  }
  1957  
  1958  func Setfsgid(gid int) error {
  1959  	_, err := setfsgid(gid)
  1960  	return err
  1961  }
  1962  
  1963  func Setfsuid(uid int) error {
  1964  	_, err := setfsuid(uid)
  1965  	return err
  1966  }
  1967  
  1968  func Signalfd(fd int, sigmask *Sigset_t, flags int) (newfd int, err error) {
  1969  	return signalfd(fd, sigmask, _C__NSIG/8, flags)
  1970  }
  1971  
  1972  //sys	Setpriority(which int, who int, prio int) (err error)
  1973  //sys	Setxattr(path string, attr string, data []byte, flags int) (err error)
  1974  //sys	signalfd(fd int, sigmask *Sigset_t, maskSize uintptr, flags int) (newfd int, err error) = SYS_SIGNALFD4
  1975  //sys	Statx(dirfd int, path string, flags int, mask int, stat *Statx_t) (err error)
  1976  //sys	Sync()
  1977  //sys	Syncfs(fd int) (err error)
  1978  //sysnb	Sysinfo(info *Sysinfo_t) (err error)
  1979  //sys	Tee(rfd int, wfd int, len int, flags int) (n int64, err error)
  1980  //sysnb	TimerfdCreate(clockid int, flags int) (fd int, err error)
  1981  //sysnb	TimerfdGettime(fd int, currValue *ItimerSpec) (err error)
  1982  //sysnb	TimerfdSettime(fd int, flags int, newValue *ItimerSpec, oldValue *ItimerSpec) (err error)
  1983  //sysnb	Tgkill(tgid int, tid int, sig syscall.Signal) (err error)
  1984  //sysnb	Times(tms *Tms) (ticks uintptr, err error)
  1985  //sysnb	Umask(mask int) (oldmask int)
  1986  //sysnb	Uname(buf *Utsname) (err error)
  1987  //sys	Unmount(target string, flags int) (err error) = SYS_UMOUNT2
  1988  //sys	Unshare(flags int) (err error)
  1989  //sys	write(fd int, p []byte) (n int, err error)
  1990  //sys	exitThread(code int) (err error) = SYS_EXIT
  1991  //sys	readv(fd int, iovs []Iovec) (n int, err error) = SYS_READV
  1992  //sys	writev(fd int, iovs []Iovec) (n int, err error) = SYS_WRITEV
  1993  //sys	preadv(fd int, iovs []Iovec, offs_l uintptr, offs_h uintptr) (n int, err error) = SYS_PREADV
  1994  //sys	pwritev(fd int, iovs []Iovec, offs_l uintptr, offs_h uintptr) (n int, err error) = SYS_PWRITEV
  1995  //sys	preadv2(fd int, iovs []Iovec, offs_l uintptr, offs_h uintptr, flags int) (n int, err error) = SYS_PREADV2
  1996  //sys	pwritev2(fd int, iovs []Iovec, offs_l uintptr, offs_h uintptr, flags int) (n int, err error) = SYS_PWRITEV2
  1997  
  1998  // minIovec is the size of the small initial allocation used by
  1999  // Readv, Writev, etc.
  2000  //
  2001  // This small allocation gets stack allocated, which lets the
  2002  // common use case of len(iovs) <= minIovs avoid more expensive
  2003  // heap allocations.
  2004  const minIovec = 8
  2005  
  2006  // appendBytes converts bs to Iovecs and appends them to vecs.
  2007  func appendBytes(vecs []Iovec, bs [][]byte) []Iovec {
  2008  	for _, b := range bs {
  2009  		var v Iovec
  2010  		v.SetLen(len(b))
  2011  		if len(b) > 0 {
  2012  			v.Base = &b[0]
  2013  		} else {
  2014  			v.Base = (*byte)(unsafe.Pointer(&_zero))
  2015  		}
  2016  		vecs = append(vecs, v)
  2017  	}
  2018  	return vecs
  2019  }
  2020  
  2021  // offs2lohi splits offs into its low and high order bits.
  2022  func offs2lohi(offs int64) (lo, hi uintptr) {
  2023  	const longBits = SizeofLong * 8
  2024  	return uintptr(offs), uintptr(uint64(offs) >> (longBits - 1) >> 1) // two shifts to avoid false positive in vet
  2025  }
  2026  
  2027  func Readv(fd int, iovs [][]byte) (n int, err error) {
  2028  	iovecs := make([]Iovec, 0, minIovec)
  2029  	iovecs = appendBytes(iovecs, iovs)
  2030  	n, err = readv(fd, iovecs)
  2031  	readvRacedetect(iovecs, n, err)
  2032  	return n, err
  2033  }
  2034  
  2035  func Preadv(fd int, iovs [][]byte, offset int64) (n int, err error) {
  2036  	iovecs := make([]Iovec, 0, minIovec)
  2037  	iovecs = appendBytes(iovecs, iovs)
  2038  	lo, hi := offs2lohi(offset)
  2039  	n, err = preadv(fd, iovecs, lo, hi)
  2040  	readvRacedetect(iovecs, n, err)
  2041  	return n, err
  2042  }
  2043  
  2044  func Preadv2(fd int, iovs [][]byte, offset int64, flags int) (n int, err error) {
  2045  	iovecs := make([]Iovec, 0, minIovec)
  2046  	iovecs = appendBytes(iovecs, iovs)
  2047  	lo, hi := offs2lohi(offset)
  2048  	n, err = preadv2(fd, iovecs, lo, hi, flags)
  2049  	readvRacedetect(iovecs, n, err)
  2050  	return n, err
  2051  }
  2052  
  2053  func readvRacedetect(iovecs []Iovec, n int, err error) {
  2054  	if !raceenabled {
  2055  		return
  2056  	}
  2057  	for i := 0; n > 0 && i < len(iovecs); i++ {
  2058  		m := int(iovecs[i].Len)
  2059  		if m > n {
  2060  			m = n
  2061  		}
  2062  		n -= m
  2063  		if m > 0 {
  2064  			raceWriteRange(unsafe.Pointer(iovecs[i].Base), m)
  2065  		}
  2066  	}
  2067  	if err == nil {
  2068  		raceAcquire(unsafe.Pointer(&ioSync))
  2069  	}
  2070  }
  2071  
  2072  func Writev(fd int, iovs [][]byte) (n int, err error) {
  2073  	iovecs := make([]Iovec, 0, minIovec)
  2074  	iovecs = appendBytes(iovecs, iovs)
  2075  	if raceenabled {
  2076  		raceReleaseMerge(unsafe.Pointer(&ioSync))
  2077  	}
  2078  	n, err = writev(fd, iovecs)
  2079  	writevRacedetect(iovecs, n)
  2080  	return n, err
  2081  }
  2082  
  2083  func Pwritev(fd int, iovs [][]byte, offset int64) (n int, err error) {
  2084  	iovecs := make([]Iovec, 0, minIovec)
  2085  	iovecs = appendBytes(iovecs, iovs)
  2086  	if raceenabled {
  2087  		raceReleaseMerge(unsafe.Pointer(&ioSync))
  2088  	}
  2089  	lo, hi := offs2lohi(offset)
  2090  	n, err = pwritev(fd, iovecs, lo, hi)
  2091  	writevRacedetect(iovecs, n)
  2092  	return n, err
  2093  }
  2094  
  2095  func Pwritev2(fd int, iovs [][]byte, offset int64, flags int) (n int, err error) {
  2096  	iovecs := make([]Iovec, 0, minIovec)
  2097  	iovecs = appendBytes(iovecs, iovs)
  2098  	if raceenabled {
  2099  		raceReleaseMerge(unsafe.Pointer(&ioSync))
  2100  	}
  2101  	lo, hi := offs2lohi(offset)
  2102  	n, err = pwritev2(fd, iovecs, lo, hi, flags)
  2103  	writevRacedetect(iovecs, n)
  2104  	return n, err
  2105  }
  2106  
  2107  func writevRacedetect(iovecs []Iovec, n int) {
  2108  	if !raceenabled {
  2109  		return
  2110  	}
  2111  	for i := 0; n > 0 && i < len(iovecs); i++ {
  2112  		m := int(iovecs[i].Len)
  2113  		if m > n {
  2114  			m = n
  2115  		}
  2116  		n -= m
  2117  		if m > 0 {
  2118  			raceReadRange(unsafe.Pointer(iovecs[i].Base), m)
  2119  		}
  2120  	}
  2121  }
  2122  
  2123  // mmap varies by architecture; see syscall_linux_*.go.
  2124  //sys	munmap(addr uintptr, length uintptr) (err error)
  2125  //sys	mremap(oldaddr uintptr, oldlength uintptr, newlength uintptr, flags int, newaddr uintptr) (xaddr uintptr, err error)
  2126  //sys	Madvise(b []byte, advice int) (err error)
  2127  //sys	Mprotect(b []byte, prot int) (err error)
  2128  //sys	Mlock(b []byte) (err error)
  2129  //sys	Mlockall(flags int) (err error)
  2130  //sys	Msync(b []byte, flags int) (err error)
  2131  //sys	Munlock(b []byte) (err error)
  2132  //sys	Munlockall() (err error)
  2133  
  2134  const (
  2135  	mremapFixed     = MREMAP_FIXED
  2136  	mremapDontunmap = MREMAP_DONTUNMAP
  2137  	mremapMaymove   = MREMAP_MAYMOVE
  2138  )
  2139  
  2140  // Vmsplice splices user pages from a slice of Iovecs into a pipe specified by fd,
  2141  // using the specified flags.
  2142  func Vmsplice(fd int, iovs []Iovec, flags int) (int, error) {
  2143  	var p unsafe.Pointer
  2144  	if len(iovs) > 0 {
  2145  		p = unsafe.Pointer(&iovs[0])
  2146  	}
  2147  
  2148  	n, _, errno := Syscall6(SYS_VMSPLICE, uintptr(fd), uintptr(p), uintptr(len(iovs)), uintptr(flags), 0, 0)
  2149  	if errno != 0 {
  2150  		return 0, syscall.Errno(errno)
  2151  	}
  2152  
  2153  	return int(n), nil
  2154  }
  2155  
  2156  func isGroupMember(gid int) bool {
  2157  	groups, err := Getgroups()
  2158  	if err != nil {
  2159  		return false
  2160  	}
  2161  
  2162  	for _, g := range groups {
  2163  		if g == gid {
  2164  			return true
  2165  		}
  2166  	}
  2167  	return false
  2168  }
  2169  
  2170  func isCapDacOverrideSet() bool {
  2171  	hdr := CapUserHeader{Version: LINUX_CAPABILITY_VERSION_3}
  2172  	data := [2]CapUserData{}
  2173  	err := Capget(&hdr, &data[0])
  2174  
  2175  	return err == nil && data[0].Effective&(1<<CAP_DAC_OVERRIDE) != 0
  2176  }
  2177  
  2178  //sys	faccessat(dirfd int, path string, mode uint32) (err error)
  2179  //sys	Faccessat2(dirfd int, path string, mode uint32, flags int) (err error)
  2180  
  2181  func Faccessat(dirfd int, path string, mode uint32, flags int) (err error) {
  2182  	if flags == 0 {
  2183  		return faccessat(dirfd, path, mode)
  2184  	}
  2185  
  2186  	if err := Faccessat2(dirfd, path, mode, flags); err != ENOSYS && err != EPERM {
  2187  		return err
  2188  	}
  2189  
  2190  	// The Linux kernel faccessat system call does not take any flags.
  2191  	// The glibc faccessat implements the flags itself; see
  2192  	// https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/faccessat.c;hb=HEAD
  2193  	// Because people naturally expect syscall.Faccessat to act
  2194  	// like C faccessat, we do the same.
  2195  
  2196  	if flags & ^(AT_SYMLINK_NOFOLLOW|AT_EACCESS) != 0 {
  2197  		return EINVAL
  2198  	}
  2199  
  2200  	var st Stat_t
  2201  	if err := Fstatat(dirfd, path, &st, flags&AT_SYMLINK_NOFOLLOW); err != nil {
  2202  		return err
  2203  	}
  2204  
  2205  	mode &= 7
  2206  	if mode == 0 {
  2207  		return nil
  2208  	}
  2209  
  2210  	var uid int
  2211  	if flags&AT_EACCESS != 0 {
  2212  		uid = Geteuid()
  2213  		if uid != 0 && isCapDacOverrideSet() {
  2214  			// If CAP_DAC_OVERRIDE is set, file access check is
  2215  			// done by the kernel in the same way as for root
  2216  			// (see generic_permission() in the Linux sources).
  2217  			uid = 0
  2218  		}
  2219  	} else {
  2220  		uid = Getuid()
  2221  	}
  2222  
  2223  	if uid == 0 {
  2224  		if mode&1 == 0 {
  2225  			// Root can read and write any file.
  2226  			return nil
  2227  		}
  2228  		if st.Mode&0111 != 0 {
  2229  			// Root can execute any file that anybody can execute.
  2230  			return nil
  2231  		}
  2232  		return EACCES
  2233  	}
  2234  
  2235  	var fmode uint32
  2236  	if uint32(uid) == st.Uid {
  2237  		fmode = (st.Mode >> 6) & 7
  2238  	} else {
  2239  		var gid int
  2240  		if flags&AT_EACCESS != 0 {
  2241  			gid = Getegid()
  2242  		} else {
  2243  			gid = Getgid()
  2244  		}
  2245  
  2246  		if uint32(gid) == st.Gid || isGroupMember(int(st.Gid)) {
  2247  			fmode = (st.Mode >> 3) & 7
  2248  		} else {
  2249  			fmode = st.Mode & 7
  2250  		}
  2251  	}
  2252  
  2253  	if fmode&mode == mode {
  2254  		return nil
  2255  	}
  2256  
  2257  	return EACCES
  2258  }
  2259  
  2260  //sys	nameToHandleAt(dirFD int, pathname string, fh *fileHandle, mountID *_C_int, flags int) (err error) = SYS_NAME_TO_HANDLE_AT
  2261  //sys	openByHandleAt(mountFD int, fh *fileHandle, flags int) (fd int, err error) = SYS_OPEN_BY_HANDLE_AT
  2262  
  2263  // fileHandle is the argument to nameToHandleAt and openByHandleAt. We
  2264  // originally tried to generate it via unix/linux/types.go with "type
  2265  // fileHandle C.struct_file_handle" but that generated empty structs
  2266  // for mips64 and mips64le. Instead, hard code it for now (it's the
  2267  // same everywhere else) until the mips64 generator issue is fixed.
  2268  type fileHandle struct {
  2269  	Bytes uint32
  2270  	Type  int32
  2271  }
  2272  
  2273  // FileHandle represents the C struct file_handle used by
  2274  // name_to_handle_at (see NameToHandleAt) and open_by_handle_at (see
  2275  // OpenByHandleAt).
  2276  type FileHandle struct {
  2277  	*fileHandle
  2278  }
  2279  
  2280  // NewFileHandle constructs a FileHandle.
  2281  func NewFileHandle(handleType int32, handle []byte) FileHandle {
  2282  	const hdrSize = unsafe.Sizeof(fileHandle{})
  2283  	buf := make([]byte, hdrSize+uintptr(len(handle)))
  2284  	copy(buf[hdrSize:], handle)
  2285  	fh := (*fileHandle)(unsafe.Pointer(&buf[0]))
  2286  	fh.Type = handleType
  2287  	fh.Bytes = uint32(len(handle))
  2288  	return FileHandle{fh}
  2289  }
  2290  
  2291  func (fh *FileHandle) Size() int   { return int(fh.fileHandle.Bytes) }
  2292  func (fh *FileHandle) Type() int32 { return fh.fileHandle.Type }
  2293  func (fh *FileHandle) Bytes() []byte {
  2294  	n := fh.Size()
  2295  	if n == 0 {
  2296  		return nil
  2297  	}
  2298  	return unsafe.Slice((*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&fh.fileHandle.Type))+4)), n)
  2299  }
  2300  
  2301  // NameToHandleAt wraps the name_to_handle_at system call; it obtains
  2302  // a handle for a path name.
  2303  func NameToHandleAt(dirfd int, path string, flags int) (handle FileHandle, mountID int, err error) {
  2304  	var mid _C_int
  2305  	// Try first with a small buffer, assuming the handle will
  2306  	// only be 32 bytes.
  2307  	size := uint32(32 + unsafe.Sizeof(fileHandle{}))
  2308  	didResize := false
  2309  	for {
  2310  		buf := make([]byte, size)
  2311  		fh := (*fileHandle)(unsafe.Pointer(&buf[0]))
  2312  		fh.Bytes = size - uint32(unsafe.Sizeof(fileHandle{}))
  2313  		err = nameToHandleAt(dirfd, path, fh, &mid, flags)
  2314  		if err == EOVERFLOW {
  2315  			if didResize {
  2316  				// We shouldn't need to resize more than once
  2317  				return
  2318  			}
  2319  			didResize = true
  2320  			size = fh.Bytes + uint32(unsafe.Sizeof(fileHandle{}))
  2321  			continue
  2322  		}
  2323  		if err != nil {
  2324  			return
  2325  		}
  2326  		return FileHandle{fh}, int(mid), nil
  2327  	}
  2328  }
  2329  
  2330  // OpenByHandleAt wraps the open_by_handle_at system call; it opens a
  2331  // file via a handle as previously returned by NameToHandleAt.
  2332  func OpenByHandleAt(mountFD int, handle FileHandle, flags int) (fd int, err error) {
  2333  	return openByHandleAt(mountFD, handle.fileHandle, flags)
  2334  }
  2335  
  2336  // Klogset wraps the sys_syslog system call; it sets console_loglevel to
  2337  // the value specified by arg and passes a dummy pointer to bufp.
  2338  func Klogset(typ int, arg int) (err error) {
  2339  	var p unsafe.Pointer
  2340  	_, _, errno := Syscall(SYS_SYSLOG, uintptr(typ), uintptr(p), uintptr(arg))
  2341  	if errno != 0 {
  2342  		return errnoErr(errno)
  2343  	}
  2344  	return nil
  2345  }
  2346  
  2347  // RemoteIovec is Iovec with the pointer replaced with an integer.
  2348  // It is used for ProcessVMReadv and ProcessVMWritev, where the pointer
  2349  // refers to a location in a different process' address space, which
  2350  // would confuse the Go garbage collector.
  2351  type RemoteIovec struct {
  2352  	Base uintptr
  2353  	Len  int
  2354  }
  2355  
  2356  //sys	ProcessVMReadv(pid int, localIov []Iovec, remoteIov []RemoteIovec, flags uint) (n int, err error) = SYS_PROCESS_VM_READV
  2357  //sys	ProcessVMWritev(pid int, localIov []Iovec, remoteIov []RemoteIovec, flags uint) (n int, err error) = SYS_PROCESS_VM_WRITEV
  2358  
  2359  //sys	PidfdOpen(pid int, flags int) (fd int, err error) = SYS_PIDFD_OPEN
  2360  //sys	PidfdGetfd(pidfd int, targetfd int, flags int) (fd int, err error) = SYS_PIDFD_GETFD
  2361  //sys	PidfdSendSignal(pidfd int, sig Signal, info *Siginfo, flags int) (err error) = SYS_PIDFD_SEND_SIGNAL
  2362  
  2363  //sys	shmat(id int, addr uintptr, flag int) (ret uintptr, err error)
  2364  //sys	shmctl(id int, cmd int, buf *SysvShmDesc) (result int, err error)
  2365  //sys	shmdt(addr uintptr) (err error)
  2366  //sys	shmget(key int, size int, flag int) (id int, err error)
  2367  
  2368  //sys	getitimer(which int, currValue *Itimerval) (err error)
  2369  //sys	setitimer(which int, newValue *Itimerval, oldValue *Itimerval) (err error)
  2370  
  2371  // MakeItimerval creates an Itimerval from interval and value durations.
  2372  func MakeItimerval(interval, value time.Duration) Itimerval {
  2373  	return Itimerval{
  2374  		Interval: NsecToTimeval(interval.Nanoseconds()),
  2375  		Value:    NsecToTimeval(value.Nanoseconds()),
  2376  	}
  2377  }
  2378  
  2379  // A value which may be passed to the which parameter for Getitimer and
  2380  // Setitimer.
  2381  type ItimerWhich int
  2382  
  2383  // Possible which values for Getitimer and Setitimer.
  2384  const (
  2385  	ItimerReal    ItimerWhich = ITIMER_REAL
  2386  	ItimerVirtual ItimerWhich = ITIMER_VIRTUAL
  2387  	ItimerProf    ItimerWhich = ITIMER_PROF
  2388  )
  2389  
  2390  // Getitimer wraps getitimer(2) to return the current value of the timer
  2391  // specified by which.
  2392  func Getitimer(which ItimerWhich) (Itimerval, error) {
  2393  	var it Itimerval
  2394  	if err := getitimer(int(which), &it); err != nil {
  2395  		return Itimerval{}, err
  2396  	}
  2397  
  2398  	return it, nil
  2399  }
  2400  
  2401  // Setitimer wraps setitimer(2) to arm or disarm the timer specified by which.
  2402  // It returns the previous value of the timer.
  2403  //
  2404  // If the Itimerval argument is the zero value, the timer will be disarmed.
  2405  func Setitimer(which ItimerWhich, it Itimerval) (Itimerval, error) {
  2406  	var prev Itimerval
  2407  	if err := setitimer(int(which), &it, &prev); err != nil {
  2408  		return Itimerval{}, err
  2409  	}
  2410  
  2411  	return prev, nil
  2412  }
  2413  
  2414  //sysnb	rtSigprocmask(how int, set *Sigset_t, oldset *Sigset_t, sigsetsize uintptr) (err error) = SYS_RT_SIGPROCMASK
  2415  
  2416  func PthreadSigmask(how int, set, oldset *Sigset_t) error {
  2417  	if oldset != nil {
  2418  		// Explicitly clear in case Sigset_t is larger than _C__NSIG.
  2419  		*oldset = Sigset_t{}
  2420  	}
  2421  	return rtSigprocmask(how, set, oldset, _C__NSIG/8)
  2422  }
  2423  
  2424  //sysnb	getresuid(ruid *_C_int, euid *_C_int, suid *_C_int)
  2425  //sysnb	getresgid(rgid *_C_int, egid *_C_int, sgid *_C_int)
  2426  
  2427  func Getresuid() (ruid, euid, suid int) {
  2428  	var r, e, s _C_int
  2429  	getresuid(&r, &e, &s)
  2430  	return int(r), int(e), int(s)
  2431  }
  2432  
  2433  func Getresgid() (rgid, egid, sgid int) {
  2434  	var r, e, s _C_int
  2435  	getresgid(&r, &e, &s)
  2436  	return int(r), int(e), int(s)
  2437  }
  2438  
  2439  // Pselect is a wrapper around the Linux pselect6 system call.
  2440  // This version does not modify the timeout argument.
  2441  func Pselect(nfd int, r *FdSet, w *FdSet, e *FdSet, timeout *Timespec, sigmask *Sigset_t) (n int, err error) {
  2442  	// Per https://man7.org/linux/man-pages/man2/select.2.html#NOTES,
  2443  	// The Linux pselect6() system call modifies its timeout argument.
  2444  	// [Not modifying the argument] is the behavior required by POSIX.1-2001.
  2445  	var mutableTimeout *Timespec
  2446  	if timeout != nil {
  2447  		mutableTimeout = new(Timespec)
  2448  		*mutableTimeout = *timeout
  2449  	}
  2450  
  2451  	// The final argument of the pselect6() system call is not a
  2452  	// sigset_t * pointer, but is instead a structure
  2453  	var kernelMask *sigset_argpack
  2454  	if sigmask != nil {
  2455  		wordBits := 32 << (^uintptr(0) >> 63) // see math.intSize
  2456  
  2457  		// A sigset stores one bit per signal,
  2458  		// offset by 1 (because signal 0 does not exist).
  2459  		// So the number of words needed is ⌈__C_NSIG - 1 / wordBits⌉.
  2460  		sigsetWords := (_C__NSIG - 1 + wordBits - 1) / (wordBits)
  2461  
  2462  		sigsetBytes := uintptr(sigsetWords * (wordBits / 8))
  2463  		kernelMask = &sigset_argpack{
  2464  			ss:    sigmask,
  2465  			ssLen: sigsetBytes,
  2466  		}
  2467  	}
  2468  
  2469  	return pselect6(nfd, r, w, e, mutableTimeout, kernelMask)
  2470  }
  2471  
  2472  //sys	schedSetattr(pid int, attr *SchedAttr, flags uint) (err error)
  2473  //sys	schedGetattr(pid int, attr *SchedAttr, size uint, flags uint) (err error)
  2474  
  2475  // SchedSetAttr is a wrapper for sched_setattr(2) syscall.
  2476  // https://man7.org/linux/man-pages/man2/sched_setattr.2.html
  2477  func SchedSetAttr(pid int, attr *SchedAttr, flags uint) error {
  2478  	if attr == nil {
  2479  		return EINVAL
  2480  	}
  2481  	attr.Size = SizeofSchedAttr
  2482  	return schedSetattr(pid, attr, flags)
  2483  }
  2484  
  2485  // SchedGetAttr is a wrapper for sched_getattr(2) syscall.
  2486  // https://man7.org/linux/man-pages/man2/sched_getattr.2.html
  2487  func SchedGetAttr(pid int, flags uint) (*SchedAttr, error) {
  2488  	attr := &SchedAttr{}
  2489  	if err := schedGetattr(pid, attr, SizeofSchedAttr, flags); err != nil {
  2490  		return nil, err
  2491  	}
  2492  	return attr, nil
  2493  }
  2494  
  2495  //sys	Cachestat(fd uint, crange *CachestatRange, cstat *Cachestat_t, flags uint) (err error)
  2496  

View as plain text