#!/usr/bin/perl
# Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

use strict;
use File::Copy;
use File::Spec;
use File::Basename;
use File::Which;
use Cwd 'realpath';
use Getopt::Std;
use List::Util qw(max);
use URI::Encode;

my $extract_range_specifier;
my $extract_pid;
my $extract_file;
my $output_file;
my $output_path;
my $extract_offset;
my $extract_size;
my $pid_running;
my $verbose=0;
my $error=0;
my $output_to_stdout=0;

sub usage {
  print("Usage: $0 [-o|v|h] URI... \n");
  print("  URIs can be read from STDIN, one per line.\n");
  print("  From the URIs specified, extracts code objects into files named: ");
  print("<executable_name>-[pid<number>]-offset<number>-size<number>.co\n\n");
  print("Options:\n");
  print("  -o <path> \tPath for output. If \"-\" specified, code object is printed to STDOUT.\n");
  print("  -v        \tVerbose output to STDOUT.\n");
  print("  -h        \tShow this help message.\n");
  print("\nURI syntax:\n");
  print("\tcode_object_uri ::== file_uri | memory_uri\n");
  print("\tfile_uri        ::== \"file://\" extract_file [ range_specifier ]\n");
  print("\tmemory_uri      ::== \"memory://\" process_id range_specifier\n");
  print("\trange_specifier ::== range_delimiter range_attribute [\"&\" range_attribute]\n");
  print("\trange_delimiter ::== \"#\" | \"?\"\n");
  print("\trange_attribute ::== [\"offset=\" number | \"size=\" number ]\n");
  print("\textract_file    ::== URI_ENCODED_OS_FILE_PATH\n");
  print("\tprocess_id      ::== DECIMAL_NUMBER\n");
  print("\tnumber          ::== HEX_NUMBER \| DECIMAL_NUMBER \| OCTAL_NUMBER\n\n");
  print("\tExample: file://dir1/dir2/hello_world#offset=133&size=14472 \n");
  print("\t         memory://1234#offset=0x20000&size=3000\n\n");
  print("  NOTES:\n\n");
  print("\tWhen specifying a URI in a shell command you will need to escape the \'&\' character in the range_specifier.\n");
  print("\tIf \"size=\" is not specified, the default is the remainder of the file from the given offset.\n\n");

  exit($error);
}

# Process options
my %options=();
getopts('vho:', \%options);

if (defined $options{h}) {
  usage();
}

if (defined $options{v}) {
  $verbose = 1;
}

if (defined $options{o}) {
  $output_path = $options{o};
  if ($output_path eq "-") {
    $output_to_stdout=1;
  } else {
    (-d $output_path) || die("Error: Path \'$output_path\' cannot be found.\n");
  }
}

# push STDIN to ARGV array.
push @ARGV, <STDIN> unless -t STDIN;

# error check: enough arguments presented.
if ($#ARGV < 0) {
  print(STDERR "Error: No arguments.\n"); $error++;
  usage();
}

# error check: command dd is available.
my $dd_cmd = which("dd");
(-f $dd_cmd) || die("Error: Can't find dd command\n");

foreach my $uri_str(@ARGV) {
  chomp $uri_str;

  my ($uri_protocol, $specs) = split(/:\/\//,$uri_str);
  my $obj_uri_encode = URI::Encode->new();
  my $decoded_extract_file;
  my $file_size;

  if (lc($uri_protocol) eq "file") {
    # expect file path
    ($extract_file, $extract_range_specifier) = split(/[#,?]/,$specs);

    # decode the file name. URIs may have file/path names with non-alphanumeric characters, which will be encoded with %.  We need to decode these.
    $decoded_extract_file = $obj_uri_encode->decode($extract_file);

    # verify file exists:
    if (! -e $decoded_extract_file) {
      print(STDERR "Error: can't find file: $decoded_extract_file\n"); $error++;
      next;
    }

    # use the output_path is specified, otherwise use current working dir.
    if ($output_path ne "") {
      $output_file = File::Spec->catfile($output_path, basename($decoded_extract_file));
    } else {
      $output_file = basename($decoded_extract_file);
    }

  } elsif ( lc($uri_protocol) eq "memory") {
    # expect memory specifier
    ($extract_pid, $extract_range_specifier) = split(/[#,?]/,$specs);

    # verify pid is currently running
    $pid_running = kill 0, $extract_pid;
    if (! $pid_running) {
      print(STDERR "Error: PID: $extract_pid is NOT running\n"); $error++;
      next;
    }

    # get pid filename:
    $extract_file = "/proc/$extract_pid/mem";
   
    # verify file exists:
    if (! -e $extract_file) {
      print(STDERR "Error: can't find file: $extract_file\n"); $error++;
      next;
    }

    # for extracting from a pid, make the output file in the current dir/path with the pid value as a name.
    $output_file = "pid${extract_pid}";

    # need to set $decoded_extract_file, because later we use this for other checks.
    $decoded_extract_file = $extract_file;
  } else {
    # error, unrecognized Code Object URI
    print(STDERR "Error: \'$uri_protocol\' is not recognized as a supported code object URI.\n"); $error++;
    next;
  }

  # it is valid to not give a range specifier in a URI, in which case the entire code object will be extracted.
  if ($extract_range_specifier ne "") {
    my @tokens;
    my $str;
    my $value;
    my $size_specified = 0;

    @tokens = split(/[&]/,$extract_range_specifier);
    foreach (@tokens) {
      ($str,$value) = split(/=/,$_);
      if ($str eq "size") {
        $extract_size=$value;
        $size_specified = 1;
      } elsif ($str eq "offset") {
        $extract_offset=$value;
      }
    }

    if ($size_specified != 1) {
      # "size" not specified.  default to rest of file (total size - offset)
      $extract_size = -s $decoded_extract_file;
      $extract_size -= $extract_offset;
    }

  } else {
    # Error if URI is a memory request, and we have no range_specifier.
    if ($pid_running) {
      print(STDERR "Error: must specify a Range Specifier (offset and size) for a memory URI: $uri_str\n"); $error++;
      next;
    }

    $extract_offset = 0;
    $extract_size = -s $decoded_extract_file;
  }

  # We should have at least a valid size to extract; ignore cases with size=0.
  if ($extract_size != 0) {
    print("Reading input file \"$extract_file\" ...\n") if ($verbose);

    # only if this is a File URI.
    if (lc($uri_protocol) eq "file") {
      # verify that offset+size does not exceed file size:
      my $file_size = -s $decoded_extract_file;
      my $size = int($extract_offset) + int($extract_size);
      if ( $size > $file_size ) {
        print(STDERR "Error: requested offset($extract_offset) + size($extract_size) exceeds file size($file_size) for file \"$decoded_extract_file\".\n"); $error++;
        next;
      }
    }

    open(INPUT_FP, "<", $decoded_extract_file) || die $!;
    binmode INPUT_FP;

    # extract the code object
    my $co_filename;
    if (!$output_to_stdout) {
      $co_filename = "of=\'${output_file}-offset${extract_offset}-size${extract_size}.co\'";
    }

    my $dd_cmd_str = "$dd_cmd if=\'$decoded_extract_file\' $co_filename skip=$extract_offset count=$extract_size bs=1 status=none";

    print("DD Command: $dd_cmd_str\n") if ($verbose);

    my $dd_ret = system($dd_cmd_str);
    if ($dd_ret != 0) {
       print(STDERR "Error: DD command ($dd_cmd_str)  failed with RC: $dd_ret\n"); $error++;
    }

    print("Extract request:  file: $extract_file offset: $extract_offset size: $extract_size\n") if ($verbose);
  } else {
    print("Warning: trying to extract from $extract_file at offset=$extract_offset with size=0.  Nothing to extract.\n") if ($verbose);
  }

} # end of for each (URI) argument

exit($error);
