#!/usr/bin/perl -w

use strict;
use List::Util qw[min max];

unless($ARGV[5]){
print <<EOF;
	Generating distal/proximal annotation for  
	Usage: perl 0.define.proximal.distal.pl gencode_annotations(gtf) input_file output_file proximal/distal distance_threshold(bp) overlapping_percentage mode 
	distance_threshold - proximal:distance around TSS, default 2500, meaning +-2.5kb of TSS
			   - distal: at least distance from any TSS, default 10kb. 
	overlapping_percentage: the minimum percentage of overlap to define element overlapping with proximal/distal regions
	mode : 0 - truncate element to only include part falling into proximal/distal regions. 
	       1 -  to include whole element if the element falling within proximal/distal regions (>=overlapping_percentage) 

EOF
exit;
}

$| =1 ;

my ($gencode,$input,$output,$prox_dis,$overlap,$mode);
my $cut = "";
if (scalar @ARGV == 7){
	 ($gencode,$input,$output,$prox_dis,$cut,$overlap,$mode)=@ARGV;
}else{
	 ($gencode,$input,$output,$prox_dis,$overlap,$mode) = @ARGV;
}

if ($prox_dis ne "proximal" && $prox_dis ne "distal"){
	die "proximal/distal should be either proximal or distal...\n";
}
my $type= '"transcript"';
my $region = "";

unless ($overlap =~ /\d/ && $overlap <= 1  && $overlap >= 0){
	die "overlapping_percentage should be between 0~1 ...\n";
}
unless ($mode == 1 || $mode ==0){
	die "specify proper mode ...\n";
}
my $prox_cut = 2500;
my $dis_cut = 10000;
if ($cut =~ /\d+/){
	my $prox_cut = $cut;
	my $dis_cut = $cut;
}

open(IN, "awk '\$3 == $type' $gencode | ")||die;
while(<IN>){
	my ($chr,$strand,$start,$end) = (split)[0,6,3,4];
	$chr =~ s/^chr//i;
	$chr =~ s/^/chr/;
	if ($strand eq "+"){
		if ($prox_dis eq "proximal"){
			$region .= join("\t",$chr,max(0,$start-$prox_cut-1),$start+$prox_cut)."\n";	
		}else{
			$region .= join("\t",$chr,max(0,$start-$dis_cut-1),$start + $dis_cut)."\n";
		}
	}elsif($strand eq "-"){
		if ($prox_dis eq "proximal"){
			$region .= join("\t",$chr,max(0,$end-$prox_cut-1),$end+$prox_cut)."\n";
                }else{
			$region .= join("\t",$chr,max(0,$end-$dis_cut-1),$end + $dis_cut)."\n";
		}
	}
}
close IN;
open(O,">tmp");
print O $region;


if ($prox_dis eq "proximal"){
	if ($mode ==0 ){  #truncate
		` intersectBed -f $overlap -a $input -b tmp -u > $output`;
	}else{
		` intersectBed -f $overlap -a $input -b tmp -wa  -u > $output`;
	}
}else{
	if ($mode ==0){ 
		`intersectBed -f 1-$overlap -a $input -b tmp -wa -v | subtractBed  -a stdin -b $region > $output`;
	}else{
		`intersectBed -f 1-$overlap -a $input -b tmp -wa -v > $output`;
	}
}
unlink("tmp");
