#!/usr/bin/env perl
# mdf --- df wrapper for better, portable display
# Author: Noah Friedman <friedman@splode.com>
# Created: 2001-01-11
# Public domain.

# $Id: mdf,v 1.23 2018/10/22 23:32:23 friedman Exp $

use strict;
use warnings qw(all);
no  warnings 'qw';  # don't complain about commas in qw lists.

use Getopt::Long;
use Pod::Usage;
use POSIX;
use Symbol;
use bignum;

(my $progname = $0) =~ s|.*/||;

my $version = "2.0.2";

# DF_* = fields of arrays created by get_df
# MI_* = fields of arrays created by parse_mountinfo
use constant
  { DF_FS    => 0,     MI_ID  => 0,
    DF_TOTAL => 1,     MI_PID => 1,
    DF_USED  => 2,     MI_DEV => 2,
    DF_AVAIL => 3,     MI_RT  => 3,
    DF_PCT   => 4,     MI_MPT => 4,
    DF_MPT   => 5,     MI_TAG => 6,
  };

my %mult
  = ( b      => 512,          human => 1024,
      blk    => 512,          si    => 1000,
      blks   => 512,
      blocks => 512,

      k      => 1024,         t   => 1024 ** 4,    z   => 1024 ** 7,
      kib    => 1024,         tib => 1024 ** 4,    zib => 1024 ** 7,
      kb     => 1000,         tb  => 1000 ** 4,    zb  => 1000 ** 7,

      m      => 1024 ** 2,    p   => 1024 ** 5,    y   => 1024 ** 8,
      mib    => 1024 ** 2,    pib => 1024 ** 5,    yib => 1024 ** 8,
      mb     => 1000 ** 2,    pb  => 1000 ** 5,    yb  => 1000 ** 8,

      g      => 1024 ** 3,    e   => 1024 ** 6,
      gib    => 1024 ** 3,    eib => 1024 ** 6,
      gb     => 1000 ** 3,    eb  => 1000 ** 6,
    );

my @unit = (qw(B K M G T P E Z Y));

my %sortkey =
  ( dev        => 0,         size       => 1,
    device     => q(dev),    used       => 2,
    '/dev'     => q(dev),    avail      => 3,

    pct        => 4,         mnt        => 5,
    percent    => q(pct),    mount      => q(mnt),
    '%'        => q(pct),    mountpoint => q(mnt),
    'use%'     => q(pct),    fs         => q(mnt),
    'used%'    => q(pct),    filesystem => q(mnt),
    'cap'      => q(pct),
    'capacity' => q(pct),
  );

my @rjustify = qw(0 1 1 1 1 0);

our %opt = ( blocksize => 'human',
             sort      => [],
           );

my ($u_system, $u_nodename, $u_release, $u_version, $u_machine) = uname ();

sub dnsdomain
{
  local $_ = $u_nodename;

  unless (/\./)
    {
      use Socket;
      my $addr  = gethostbyname ($_);
      return unless defined $addr;
      $_ = gethostbyaddr ($addr, AF_INET);
    }
  return unless defined $_ && /\./;

  s/^[^.]+\.//;
  return $_;
}

sub xcat
{
  open (my $fh, $_[0]) || die "$!";
  local $/ = undef;
  return <$fh>;
}

sub next_in_path
{
  my $this = shift || $0;
  my @st_me = stat $0;
  (my $prog = $this) =~ s=.*/==;
  (my $dir  = $this) =~ s=/[^/]+$==;

  my @p = split (/:/, $ENV{PATH});
  if ($prog ne $dir)
    {
      # First search for instances of program later in path than this one.
      # If nothing is found, wrap around to the beginning of the path.
      # But never select this program file.
      my $re_dir = quotemeta ($dir);
      (my $afterpath = $ENV{PATH}) =~ s=(?:^|.*:)$re_dir(?::|$)==;
      unshift @p, split (/:/, $afterpath);
    }

  map { $_ = "." if $_ eq "";
        my $f = "$_/$prog";

        if (-f $f && -x _)
          {
            my @st = stat _;
            return $f unless $st[0] == $st_me[0] && $st[1] == $st_me[1];
          }
      } @p;

  die "$progname: cannot find an instance of $prog in PATH other than $this\n";
}

sub compact_devmapper
{
  local $_ = shift;
  return $_ unless m=^/dev/mapper/=;
  # Hyphens within volume groups or volume names are doubled under
  # /dev/mapper; convert the undoubled hyphens to / before undoubling.
  s=([^-])-([^-])=$1/$2=g;
  s=--=-=g;
  s=mapper/==;
  return $_;
}

# Return an array of array of fields
sub fieldarray { map { [split (/\s+/, $_)] } split (/[\r\n]+/, $_[0]) }

my %df_cmd = ( default => 'df',
               hpux    => 'bdf',
             );

my %df_arg = ( default => [qw(-a -k)],
               aix     => [qw(-k -P)],
               hpux    => [],
               linux   => [qw(--all --block-size=1)],
             );

my %df_per = ( darwin  => [qw(-t nodevfs,fdesc)],
               freebsd => [qw(-t nodevfs,fdescfs,linprocfs,linsysfs,mqueuefs,procfs,tmpfs)],
               linux   => [qw(-x tmpfs -x devtmpfs -x ramfs)],
               netbsd  => [qw(-t nokernfs,procfs,ptyfs)],
             );

sub get_df
{
  my $dfcmd  =   $df_cmd{$^O} || $df_cmd{default};
  my @dfargs = @{$df_arg{$^O} || $df_arg{default}};

  my $df = ($progname eq $dfcmd ? next_in_path ($0) : next_in_path ($dfcmd));
  push @dfargs, @{$df_per{$^O}} if $opt{persistent} && $df_per{$^O};
  push @dfargs, "-l" if $opt{local};
  push @dfargs, "-i" if $opt{inodes};

  my $data = qx{ $df @dfargs @_ };
  exit ($? >> 8) unless $data;

  my $dnsdomain = dnsdomain ();
  $data =~ s=\.$dnsdomain:/=:/=gmo if defined $dnsdomain;
  $data =~ s/\n\s+/ /g; # join continuation lines

  my $linuxp = ($^O eq 'linux');
  map { my @elt  =  split (/\s+/, $_, 6);
        $elt[0]  =  compact_devmapper ($elt[0]);
        for my $n (1..3)
          {
            $elt[$n]  =  0 if $elt[$n] eq '-';
            $elt[$n] *=  1024 unless $linuxp;
          }
        $elt[4]  =~ s/%//g;
        $elt[5]  = "" unless @elt > 5;
        \@elt
      } split (/[\r\n]+/, $data);
}

sub mountinfo
{
  my $minfo = $_[0] || "/proc/self/mountinfo";
  return unless -f $minfo;
  my @minfo = fieldarray (xcat ($minfo));

  # Unique-ifying field.  If tag field is `-', it means there are no
  # tags, so use the device name instead.
  my $uf = $minfo[0][MI_TAG] eq '-' ? MI_TAG + 2 : MI_TAG;

  # Further distinguish device by the first tag id, since e.g. nfs mounts
  # from the same server might happen to be separate exported mountpoints
  # on what is (local to the server) the same filesystem.
  map { $_->[MI_DEV] .= '-' . $_->[$uf] } @minfo;
  return @minfo;
}

# (0) mount ID:  unique identifier of the mount (may be reused after umount)
# (1) parent ID:  ID of parent (or of self for the top of the mount tree)
# (2) major:minor:  value of st_dev for files on filesystem
# (3) root:  root of the mount within the filesystem
# (4) mount point:  mount point relative to the process's root
# (5-) varies depending on kernel version
# (6) first tag, for recent-ish linux kernels; usually "shared:n".
sub parse_mountinfo
{
  my %root;
  my %stdev;
  map { if ($_->[MI_RT] eq "/")
          {
            my $dev = $_->[MI_DEV];
            my $mpt = $_->[MI_MPT];

            $root{$dev} = $mpt unless defined $root{$dev};

            $stdev{$dev} ||= [];
            push @{$stdev{$dev}}, $mpt;
          }
      } @_;

  # %notfirst is a list of mountpoints which are actually the same literal
  # mount appearing in multiple places, usually because they are atop
  # another bind mount.  This is typical for e.g. fuse mounts which might
  # appear on top of the original local fs mountpoint, and a bind mount of
  # a portion of that local fs somewhere else where the fuse mount is also
  # visible.  For example:
  #
  #          # mount /dev/sda1 /foo
  #          # mount -o bind /foo/bar /bar
  #          # sshfs rhost:/ /bar/rhost
  #
  #  Now "rhost" will appear as a mount on /foo/bar/rhost and /bar/rhost,
  #  but they are literally the same mount, not two separate mounts that
  #  just happen to specify the same origin.
  my %notfirst;
  while (my ($dev, $list) = each %stdev)
    {
      next unless @$list > 1;
      my $first = shift @$list;
      map { $notfirst{$_} = $first } @$list;
    }

  my %mdev;
  map { ($mdev{$_->[MI_MPT]} = $root{$_->[MI_DEV]} . $_->[MI_RT]) =~ s=^//=/=
          if defined $root{$_->[MI_DEV]} && $_->[MI_RT] ne "/";
      } @_;

  return (\%mdev, \%notfirst);
}

# Determines which mount entries are duplicates because they appear under a
# directory that is bind-mounted in multiple places.
sub duplicate_binds
{
  my $m = shift;
  my %mdev = %$m;

  my %dup;
  my %seen;
  map { if ($_->[MI_RT] ne "/")
          {
            my $key  = $_->[MI_DEV] . " " . $_->[MI_RT];
            my $this = $_->[MI_MPT];

            if ($seen{$key} && $this ne $seen{$key})
              {
                while ($this ne "")
                  {
                    if (exists $mdev{$this})
                      {
                        $dup{$this} = $_->[MI_RT];
                        last;
                      }
                    $this =~ s=/[^/]*$==;
                  }
              }
            else { $seen{$key} = $_->[MI_MPT] }
          }
      } @_;

  map { printf STDERR "dup: %s => %s\n", $_, $dup{$_} } sort keys %dup;

  return %dup;
}

sub round
{
  my ($n, $places, $roundup) = @_;
  my $shift = ($places ? 10**$places : ($n < 10 ? 10 : 1));
  $n *= $shift;

  my $int  = int ($n);
  my $frac = $n - $int;

  my $round;
  if    ($frac < .5)    { $round = $int     }
  elsif ($frac > .5)    { $round = $int + 1 }
  elsif ($int & 1 == 1) { $round = $int + 1 } # round odd up
  else                  { $round = $int     } # round even down

  return $round / $shift;
}

sub expand_unit
{
  local $_ = shift;

  /^(\d*)([a-z]*)$/i;
  my $n = $1 || 1;
  if ($2)
    {
      my $u = lc $2;
      die "$progname: $2: invalid unit\n" unless defined $mult{$u};

      if ($opt{si})
        {
          if ($opt{blocksize} eq 'human')
            { $u = 'si' }
          elsif ($mult{$u} != 512)
            {
              $u .= "b" unless $u =~ /b$/;
              $u =~ s/i//;
            }
        }
      return $n * $mult{$u};
    }
  return $n;
}

sub to_unit
{
  my $val = shift;
  return $val unless $val;

  if ($opt{blocksize} eq 'human' || $opt{blocksize} eq 'si')
    {
      my $factor = expand_unit ($opt{blocksize});
      my $n = 0;
      while ($val >= $factor)
        {
          $val /= $factor;
          $n++;
        }
      return round ($val, @_) . $unit[$n];
    }
  elsif ($opt{header_unit})
    {
      $opt{blocksize} =~ /^(\d*)([a-z]*)$/i;
      my $unit = $2 ? expand_unit ($2) : 1024;
      # if x == 2^n, only nth bit in x is set.
      # subtracting 1 flips all bits via a borrow; the logical AND is zero.
      # If x != 2^n, x-1 will flip all bits up to and including the first 1,
      # but will not negate the entire value and & will not produce zero.
      my $non_pow2 = $unit & ($unit - 1);
      $opt{si} = 1 if $non_pow2;

      my $factor = $non_pow2 ? 1000 : 1024;
      my $n = 0;
      while ($val >= $factor && $val % $factor == 0)
        {
          $val /= $factor;
          $n++;
        }
      return join ("", round ($val, @_), $unit[$n],
                   ($n == 0 ? "" : ($opt{si} ? "B" : "iB")));
    }
  else
    {
      $val /= expand_unit ($opt{blocksize});
    }
  return round ($val, @_);
}

sub make_format
{
  my $fmt = join ("  ", map { $_ ? "%%%ds" : "%%-%ds" } @rjustify) . "\n";
  my @len = map { length $_ } @{shift @_};
  map { for (my $i = 0; $i < @len; $i++)
          {
            my $l = length $_->[$i];
            $len[$i] = $l if $l > $len[$i];
          }
      } @_;

  sprintf ($fmt, @len);
}

sub sortrows
{
  return @_ unless $opt{sort};
  my @field = @{$opt{sort}};

  my @sorted = sort { my $p = 0;
                      map { $p = ("$a->[$_]$b->[$_]" =~ /^\d+$/
                                  ? $a->[$_] <=> $b->[$_]
                                  : $a->[$_] cmp $b->[$_]
                                 ) unless $p } @field;
                      $p
                    } @_;
  @sorted = reverse @sorted if $opt{reverse_sort};
  return @sorted;
}

sub parse_options
{
  local *ARGV = \@{$_[0]}; # modify our local arglist, not real ARGV.
  my $help; # no init; perl 5.8 will treat as REF() instead of SCALAR()

  my $parser = Getopt::Long::Parser->new;
  $parser->configure (qw(bundling autoabbrev gnu_compat no_ignorecase));

  my $succ = $parser->getoptions
    ("help"                     => sub { $help = 3 },
     "usage"                    => sub { $help = 1 },
     'V|version',               => \$opt{show_version},

     "H|no-header"              => \$opt{noheader},
     "a|all"                    => \$opt{all},
     "A|bind-mounts"            => \$opt{showbinds},
     "l|local"                  => \$opt{local},
     "p|persistent"             => \$opt{persistent},

     "i|inodes"                 => sub { $opt{inodes}    = 1;
                                         $opt{blocksize} = 1
                                           if $opt{blocksize} eq 'human';
                                       },

     "r|reverse"                => \$opt{reverse_sort},
     "s|sort=s"                 => sub { push @{$opt{sort}}, $_[1] },

     "si"                       =>      \$opt{si},
     "B|blocksize|block-size=s" =>      \$opt{blocksize},
     "k"                        => sub { $opt{blocksize} = '1k'    },
     "m"                        => sub { $opt{blocksize} = '1m'    },
     "g"                        => sub { $opt{blocksize} = '1g'    },
     "h|human-readable"         => sub { $opt{blocksize} = 'human' },
    );

  $help ||= 0; # but if not set yet, we need to set it here now.
  pod2usage (-exitstatus => 1, -verbose => 0)         unless $succ;
  pod2usage (-exitstatus => 0, -verbose => $help - 1) if $help > 0;

  if ($opt{show_version})
    {
      print "$progname version ", $version, "\n";
      print "This program is public domain.\n";
      exit (0);
    }

  if (@{$opt{sort}})
    {
      my @sortby = map { split (/[\s,]+/, $_) } @{$opt{sort}};
      map { my $orig = $_;
            $_ = $sortkey{lc $_} while defined $sortkey{lc $_};
            die "$progname: $orig: unknown sort key\n" unless /^\d+$/;
          } @sortby;
      $opt{sort} = \@sortby;
    }
  else
    { delete $opt{sort} }

  # On linux, if there are any fs args remaining, enable all so that bind-
  # and non-persistent mounts will be displayed.
  $opt{all} = 1 if @ARGV && $^O eq 'linux';
}

sub main
{
  parse_options (\@_);

  my @df = get_df (@_);
  shift @df; # discard header

  # Later distributions link /etc/mtab to /proc/mounts,
  # which has a dummy "rootfs" entry.
  # Until kernel 3.19.  Dummy entry seems to have gone away.
  # (Or is it some change to dracut?  Who knows.)
  shift @df  if ($df[0]->[DF_FS] eq 'rootfs');

  # Either way move file root fs entry to top.
  for (my $i = 0; $i < @df; $i++)
    {
      next unless $df[$i]->[DF_MPT] eq "/";
      unshift @df, splice (@df, $i, 1);
      last;
    }

  my @mountinfo = mountinfo ();
  my ($mdev, $notfirst) = parse_mountinfo (@mountinfo);
  # Do not show zero-size pseudofilesystems by default.
  @df = grep { $_->[DF_TOTAL]                } @df unless $opt{all};
  @df = grep { ! (   exists     $mdev->{$_->[DF_MPT]}
                  || exists $notfirst->{$_->[DF_MPT]})
             } @df unless $opt{all} || $opt{showbinds};
  if ($opt{showbinds} && !$opt{all})
    {
      # dup is a table of entries which are redundant with some other entry,
      # but not of entries which themselves appear multiple times.
      # That can happen if an identical mount is made twice.
      # We still want to show that mount one time.
      my %dup = duplicate_binds ($mdev, @mountinfo);
      my %seen;
      @df = grep { my $dir = $_->[DF_MPT];
                   my $uniq = !(exists $dup{$dir} || exists $seen{$dir});
                   $seen{$dir} = undef;
                   $uniq;
                 } @df;
    }

  # Fix bind mount "devices"
  map { $_->[DF_FS] = $mdev->{$_->[DF_MPT]} if $mdev->{$_->[DF_MPT]} } @df;
  # Sort before we process numbers
  @df = sortrows (@df) if $opt{sort};

  map { $_->[DF_TOTAL] = to_unit ($_->[DF_TOTAL]);
        $_->[DF_USED]  = to_unit ($_->[DF_USED]);
        $_->[DF_AVAIL] = to_unit ($_->[DF_AVAIL]);
        $_->[DF_PCT]  .= "%" unless $_->[DF_PCT] eq "-";
      } @df;

  unless ($opt{noheader})
    {
      my @hdr = ($opt{inodes}
                 ? (qw(Filesystem Inodes IUsed IFree IUse% Mountpoint))
                 : (qw(Filesystem   Size  Used Avail  Use% Mountpoint)));

      if ($opt{blocksize} ne 'human'
          && !($opt{inodes} && $opt{blocksize} == 1))
        {
          local $opt{header_unit} = 1;
          $hdr[1] = to_unit (expand_unit ($opt{blocksize}), 1);
          $hdr[1] =~ s/B$// if $opt{inodes};
          $hdr[1] .= $opt{inodes} ? "-inodes" : "-blocks";
        }
      $hdr[1] .= "(SI)" if $opt{si};
      unshift @df, \@hdr;
    }

  my $fmt = make_format (@df);
  map { printf($fmt, @$_) } @df;
}

main (@ARGV);


__END__

=head1 NAME

mdf - wrapper for df to provide more consistent formatting

=head1 SYNOPSIS

 mdf  {--help}   (more verbose)    {-V|--version}
      {--usage}  (less verbose)

      {-a|--all}                   {-B|--blocksize|--block-size  SIZE}
      {-A|--bind-mounts}           {-h|--human-readable}
      {-l|--local}                 {--si}
                                   {-k|-m|-g}

      {-H|--no-header}             {-s|--sort  KEY1,KEY2,...}
      {-i|--inodes}                {-r|--reverse}

      {file ...}

=head1 OPTIONS

=over 8

=item B<--help>, B<--usage>

Display usage information and exit.
The B<--help> option is more verbose than B<--usage>.

=item B<--version>

Output version information and exit.

=item B<-a>, B<--all>

Include dummy file systems and (linux only) C<bind> mounts.

=item B<-A>, B<--bind-mounts>  (linux only)

Display bind mounts.  That is, display additional mountpoints of all or
parts of a filesystem whose primary mount is already displayed.

In this case the "device" column for the mount is the path from whence the
mount was made.

=item B<-l>, B<--local>

Limit listing to local file systems.

=item B<-B>, B<--block-size=>I<SIZE>

Scale sizes by I<SIZE> before printing them.

I<SIZE> is an integer and optional case-insensitive unit
(example: C<10m> is 10*1024*1024).
Units are:

=over 4

=over 2

=item Powers of 1024: C<k>, C<m>, C<g>, C<t>, C<p>, C<e>, C<z>, C<y>

=item Powers of 1024: C<KiB>, C<MiB>, ...

=item Powers of 1000: C<kb>, C<mb>, ...

=back

=back

For example C<-B m> or C<-B 1024kib> prints sizes in units of 1,048,576 bytes.

=item B<-h>, B<--human-readable>

Print sizes in human readable format (e.g., C<1K>, C<234M>, C<2G>).

=item B<--si>

Likewise, but use powers of 1000, not 1024.

=item B<-H>, B<--no-header>

Do not display column headers.

=item B<-i>, B<--inodes>

List inode information instead of block usage.

=item B<-k>, B<-m>, B<-g>

Like C<--block-size=1K>, C<--block-size=1M>, or C<--block-size=1G>, respectively.

=item B<-s>, B<--sort=>I<K1,K2,...>

Sort rows by comparing columns I<K1>, I<K2>, etc.
That is, compare column I<K1> or each row and if they
are equivalent, move on to compare column I<K2>.

Columns known to be numeric are compared numerically.
(E.g. so that C<2> isn't considered greater than C<10>.)
Other columns are sorted lexicographically ordering to current locale.

Recognized column names (and number) include:

=over 4

=over 2

=item 0 : C<dev>, C<device>, C</dev>

=item 1 : C<size>

=item 2 : C<used>

=item 3 : C<avail>

=item 4 : C<pct>, C<percent>, C<%>, C<used%>, C<cap>, C<capacity>

=item 5 : C<mnt>, C<mount>, C<mountpoint>, C<fs>, C<filesystem>

=back

=back

=item B<-r>, B<--reverse>

Reverse sorting order specified by B<--sort>.

=back

=cut

# mdf ends here