#!/usr/bin/perl
#
my $spike_thresh2 = 400;
my $pos_pattern = "%.5f";
my @seq = ();
my %nodes = ();
my %nodepos = ();
my $inway = 0;

my $DEBUG = 0;

# Calculate the (square of the) distance between two nodes
# it's not the geographically correct way, but a good estimation
sub dist2 {
    my ($p1, $p2) = @_;
    # No square root ... 
    # it's wrong anyway, so we can easily live without this detail :)
    return (${$p1}{lat}-${$p2}{lat})**2 + (${$p1}{lon}-${$p2}{lon})**2;
}

# Add node information to hash
sub add_node {
    my ($id, $lat, $lon) = @_;

    # shorten the lat/lon for lookup
    my $lats = sprintf($pos_pattern, $lat);
    my $lons = sprintf($pos_pattern, $lon);
    # lookup existing node in vicinity
    if (defined($nodepos{$lats})) {
	if (defined(${$nodepos{$lats}}{$lons})) {
	    print STDERR "Match at $lat, $lon ($id) => $lats, $lons (" . ${$nodepos{$lats}}{$lons} . ")\n" if $DEBUG;
	    $nodes{$id} = { repl => ${$nodepos{$lats}}{$lons} };
	} else {
	    ${$nodepos{$lats}}{$lons} = $id;
	}
    } else {
	$nodepos{$lats} = { $lons => $id };
    }

    # Exit if duplicate
    return 0 if defined(${$nodes{$id}}{repl});

    # This is a real node
    $nodes{$id} = { lat => $lat, lon => $lon };
    return 1;
}



#### Main ####

while (<>) {
    # e.g.  <node id='-1' visible='true' lat='57.772092' lon='26.021256' />
    if (/<node .*id=['"]([^'"]*)['"] .*lat=['"]([^'"]*)['"] .*lon=['"]([^'"]*)['"]/) {
	# If node is unique pass through
	print $_ if add_node($1, $2, $3);
    } elsif (!$inway && /<way .*id=['"]([^'"]*)['"]/) {
	$inway = $1;
	print $_;
    } elsif ($inway) {
	# We expect the <nd .*> elements right after <way ...>
	# e.g. <nd ref='-1' />
	if (/<nd ref=['"]([^'"]*)['"]/) {
	    # find replacement node
	    if (defined(${$nodes{$1}}{repl})) {
		print STDERR "Replacing node in way ($inway): $1 -> " . ${$nodes{$1}}{repl} . "\n" if $DEBUG;
		push @seq, ${$nodes{$1}}{repl};
	    } else {
		push @seq, $1;
	    }
	    # remove duplicate of same node
	    if ($#seq > 0) {
		if ($seq[$#seq-1] == $seq[$#seq]) {
		    print STDERR "Found duplicate node in way ($inway): " . $seq[$#seq] . "\n" if $DEBUG;
		    pop @seq;
		}
	    }
	    # find spike between first and third or second and forth node
	    if ($#seq > 1) {
		if (dist2($nodes{$seq[$#seq-2]}, $nodes{$seq[$#seq-1]}) > 
			    $spike_thresh2 * dist2($nodes{$seq[$#seq-2]}, $nodes{$seq[$#seq]})) {
		    print STDERR "Found spike in way ($inway): " . $seq[$#seq-2] . ", " . $seq[$#seq-1] . ", " . $seq[$#seq] . "\n" if $DEBUG;
		    my $a = pop(@seq);
		    my $spike = pop(@seq);
		    ${$nodes{$n}}{used} -= 1;
		    # Remove one instance of same node
		    if ($seq[$#seq] == $a) {
			print STDERR "Found duplicate node after spike in way ($inway): $a\n" if $DEBUG;
		    } else {
			push @seq, $a;
		    }
		}
	    }
	} else {
	    # Ways need at least 2 nodes
	    if ($#seq > 0) {
		while ((my $n = shift(@seq))) {
		    ${$nodes{$n}}{used} += 1;	    # mark node as used
		    print "    <nd ref='$n' />\n";
		}
	    } else {
		@seq = ();
		print STDERR "Way without nodes: $inway\n";
	    }
	    $inway = 0; # if /<\/way/;
	    print $_;
	}
    } else {
	print $_;
    }
}

foreach $k (keys %nodes) {
    if ((!defined(${$nodes{$k}}{used}) || ${$nodes{$k}}{used} == 0) && !defined(${$nodes{$k}}{repl})) {
	print STDERR "Node without reference: $k\n";
    }
}
