#!/usr/bin/env perl

# $Id: merge.pl 1306 2009-02-24 13:24:37Z pierre $

# Copyright (c) 2006 Pierre Senellart <pierre@senellart.com>
# 
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the
# following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
# NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
# USE OR OTHER DEALINGS IN THE SOFTWARE.

use strict;
use warnings;

my $suffix="";
$suffix=".".$ARGV[0] if exists($ARGV[0]);

$ENV{"LC_ALL"}="C"; # Otherwise sort behaves weirdly
open EDGES, "sort -n edges_reindex$suffix |" or die;

my $nb_nodes = 0;
open(INDEX, "index$suffix") or die;
my $buffer;
while (sysread INDEX, $buffer, 4096) {
  $nb_nodes += ($buffer =~ tr/\n//);
}
close INDEX;

open OUT,">edge_list$suffix" or die;
select OUT;

print "$nb_nodes\nwith values\n";

my $origin=-1;
my @links;
while(<EDGES>) {
  chomp;
  my @new_links=split / /;
  my $new_origin=shift @new_links;

  if($origin==$new_origin) {
    push @links,@new_links;
  } else {
    if($origin!=-1 && @links) {
      # Remove duplicates
      my %temp;
      @links = sort {$a<=>$b} (grep ++$temp{$_} < 2, @links);

      print $origin,' ',join(' ',map {"$_,".$temp{$_}} @links),"\n";
    }

    $origin=$new_origin;
    @links=@new_links;
  }
}

# One more time for the last line...
if(@links) {
  # Remove duplicates
  my %temp;
  @links = sort {$a<=>$b} (grep ++$temp{$_} < 2, @links);

  print $origin,' ',join(' ',map {"$_,".$temp{$_}} @links),"\n";
}

