## CODE TO CREATE .ptt FILE FROM gbff FILE(CONTAINING MULTIPLE ENTRIES.)
## DATE-15/6/12
##report Bugs: kcm.eid@gmail.com


if($#ARGV <0){
print STDERR "insufficient Arguements,";
print "\nUSAGE: convertGBFF.pl <GBFF file>";
}


my $gbFile=$ARGV[0];

open(GB,"< $gbFile") or die"Cannt read $gbFile";
my($FASTAh,$version,$dna,$bp,$dnaflag,$chromosome,%ptt);
my($startEnd,$strand,$porLen,$pid,$gene,$synCOde,$cog,$product,$pCount)=('-','-','-','-','-','-','-','-',0);
print "\n\nProcessing File. . .\n";
while(<GB>){
chomp;
		
	if($_=~m/^LOCUS/g){my @temp=split/\s+/,$_; $bp=$temp[2]; }
	elsif($_=~m/^DEFINITION/g){$FASTAh=$_;
	#Leishmania donovani BPK282A1 complete genome, chromosome 18.
	$chromosome=$1 if $FASTAh=~m/chromosome\s+(\d+)/g;
	}
	elsif($_=~m/^VERSION/g){my @temp=split/\s+/,$_; $version=$temp[1];}
	elsif($_=~m/^ORIGIN/g){$dnaflag=1;}
	elsif($dnaflag and $_=~m/\d+\s+[\w\s]+/){
	my $string=$_;
	$string=~s/\s+//g;
	$string=~s/\d+//g;
	$string= uc $string;
	$dna.=$string;
	}
	
	elsif($_=~m/gene\s+/g){
		if($startEnd ne "-" and $synCOde ne '-'){
		
		my($s,$e)=split/\.\./,$startEnd;
		$porLen=((($e-$s)+1)/3)-1;
		my $pttFile=$chromosome.".ptt";
		open(PTT,">>$pttFile") or die"cannt write ptt file";
		print PTT "$startEnd\t$strand\t$porLen\t$pid\t$gene\t$synCOde\t$cog\t$product\n";
		close PTT;
		$pCount++;
		}
	($startEnd,$strand,$porLen,$pid,$gene,$synCOde,$cog,$product)=('-','-','-','-','-','-','-','-');
	my @temp=split/\s+/,$_;
	my $pos=$temp[2];
		if($pos=~m/complement\(<*(\d+\.\.\d+)>*\)/g){
		$strand="-";
		$startEnd=$1;
		}
		else{$strand="+"; $startEnd=$pos;}
	
	}
	
	elsif($_=~m/\/locus_tag=\"([\w\_\d]+)\"/){$synCOde=$1;}
	elsif($_=~m/\/product=\"(.+)\"/){$product=$1;}
	elsif($_=~m/\/db_xref=\"GI:(\d+)\"/){$pid=$1;}	
	
	elsif($_=~m/^\/\//g){
	if($startEnd ne "-" and $synCOde ne '-'){
		
		my($s,$e)=split/\.\./,$startEnd;
		$porLen=((($e-$s)+1)/3)-1;
		my $pttFile=$chromosome.".ptt";
		open(PTT,">>$pttFile") or die"cannt write ptt file";
		print PTT "$startEnd\t$strand\t$porLen\t$pid\t$gene\t$synCOde\t$cog\t$product\n";
		$pCount++;
		seek(PTT,0,SEEK_SET);
		print PTT "$version|:1-$bp $FASTAh\n$pCount proteins\n";
		close PTT;
		
		}
	
	($startEnd,$strand,$porLen,$pid,$gene,$synCOde,$cog,$product,$pCount)=('-','-','-','-','-','-','-','-',0);
	$chromosome=$chromosome.".fasta";
		open(FAS,"> $chromosome") or "cannt write fasta file!!!";
		#>ref|NC_001905.2|:1-268984 Leishmania major strain Friedlin chromosome 1, complete sequence
		print FAS ">$version|:1-$bp $FASTAh\n$dna\n";	
		close FAS;
		($FASTAh,$version,$dna,$bp,$dnaflag,$chromosome,%ptt)=("","","",0,0,0,());
	
		}
	
	
	}
print "\nPTT file exported to $pttFile..";
print "\nJob completed!!!!!!!!!!!!!";