416 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			416 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
#!/usr/bin/perl
 | 
						|
#
 | 
						|
# Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se>
 | 
						|
#
 | 
						|
# ------------------------------------------------------------------------
 | 
						|
#
 | 
						|
# This program is free software; you can redistribute it and/or modify
 | 
						|
# it under the terms of the GNU General Public License as published by
 | 
						|
# the Free Software Foundation.
 | 
						|
#
 | 
						|
# This program is distributed in the hope that it will be useful,
 | 
						|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
# GNU General Public License for more details.
 | 
						|
#
 | 
						|
# You should have received a copy of the GNU General Public License
 | 
						|
# along with this program; if not, see <http://www.gnu.org/licenses/>.
 | 
						|
#
 | 
						|
# ------------------------------------------------------------------------
 | 
						|
 | 
						|
=pod
 | 
						|
 | 
						|
=head1 NAME
 | 
						|
 | 
						|
import-directories - Import bits and pieces to Git.
 | 
						|
 | 
						|
=head1 SYNOPSIS
 | 
						|
 | 
						|
B<import-directories.perl> F<configfile> F<outputfile>
 | 
						|
 | 
						|
=head1 DESCRIPTION
 | 
						|
 | 
						|
Script to import arbitrary projects version controlled by the "copy the
 | 
						|
source directory to a new location and edit it there"-version controlled
 | 
						|
projects into version control. Handles projects with arbitrary branching
 | 
						|
and version trees, taking a file describing the inputs and generating a
 | 
						|
file compatible with the L<git-fast-import(1)> format.
 | 
						|
 | 
						|
=head1 CONFIGURATION FILE
 | 
						|
 | 
						|
=head2 Format
 | 
						|
 | 
						|
The configuration file is based on the standard I<.ini> format.
 | 
						|
 | 
						|
 ; Comments start with semi-colons
 | 
						|
 [section]
 | 
						|
 key=value
 | 
						|
 | 
						|
Please see below for information on how to escape special characters.
 | 
						|
 | 
						|
=head2 Global configuration
 | 
						|
 | 
						|
Global configuration is done in the B<[config]> section, which should be
 | 
						|
the first section in the file. Configuration can be changed by
 | 
						|
repeating configuration sections later on.
 | 
						|
 | 
						|
 [config]
 | 
						|
 ; configure conversion of CRLFs. "convert" means that all CRLFs
 | 
						|
 ; should be converted into LFs (suitable for the core.autocrlf
 | 
						|
 ; setting set to true in Git). "none" means that all data is
 | 
						|
 ; treated as binary.
 | 
						|
 crlf=convert
 | 
						|
 | 
						|
=head2 Revision configuration
 | 
						|
 | 
						|
Each revision that is to be imported is described in three
 | 
						|
sections. Revisions should be defined in topological order, so
 | 
						|
that a revision's parent has always been defined when a new revision
 | 
						|
is introduced. All the sections for one revision must be defined
 | 
						|
before defining the next revision.
 | 
						|
 | 
						|
Each revision is assigned a unique numerical identifier. The
 | 
						|
numbers do not need to be consecutive, nor monotonically
 | 
						|
increasing.
 | 
						|
 | 
						|
For instance, if your configuration file contains only the two
 | 
						|
revisions 4711 and 42, where 4711 is the initial commit, the
 | 
						|
only requirement is that 4711 is completely defined before 42.
 | 
						|
 | 
						|
=pod
 | 
						|
 | 
						|
=head3 Revision description section
 | 
						|
 | 
						|
A section whose section name is just an integer gives meta-data
 | 
						|
about the revision.
 | 
						|
 | 
						|
 [3]
 | 
						|
 ; author sets the author of the revisions
 | 
						|
 author=Peter Krefting <peter@softwolves.pp.se>
 | 
						|
 ; branch sets the branch that the revision should be committed to
 | 
						|
 branch=master
 | 
						|
 ; parent describes the revision that is the parent of this commit
 | 
						|
 ; (optional)
 | 
						|
 parent=1
 | 
						|
 ; merges describes a revision that is merged into this commit
 | 
						|
 ; (optional; can be repeated)
 | 
						|
 merges=2
 | 
						|
 ; selects one file to take the timestamp from
 | 
						|
 ; (optional; if unspecified, the most recent file from the .files
 | 
						|
 ;  section is used)
 | 
						|
 timestamp=3/source.c
 | 
						|
 | 
						|
=head3 Revision contents section
 | 
						|
 | 
						|
A section whose section name is an integer followed by B<.files>
 | 
						|
describe all the files included in this revision. If a file that
 | 
						|
was available previously is not included in this revision, it will
 | 
						|
be removed.
 | 
						|
 | 
						|
If an on-disk revision is incomplete, you can point to files from
 | 
						|
a previous revision. There are no restrictions on where the source
 | 
						|
files are located, nor on their names.
 | 
						|
 | 
						|
 [3.files]
 | 
						|
 ; the key is the path inside the repository, the value is the path
 | 
						|
 ; as seen from the importer script.
 | 
						|
 source.c=ver-3.00/source.c
 | 
						|
 source.h=ver-2.99/source.h
 | 
						|
 readme.txt=ver-3.00/introduction to the project.txt
 | 
						|
 | 
						|
File names are treated as byte strings (but please see below on
 | 
						|
quoting rules), and should be stored in the configuration file in
 | 
						|
the encoding that should be used in the generated repository.
 | 
						|
 | 
						|
=head3 Revision commit message section
 | 
						|
 | 
						|
A section whose section name is an integer followed by B<.message>
 | 
						|
gives the commit message. This section is read verbatim, up until
 | 
						|
the beginning of the next section. As such, a commit message may not
 | 
						|
contain a line that begins with an opening square bracket ("[") and
 | 
						|
ends with a closing square bracket ("]"), unless they are surrounded
 | 
						|
by whitespace or other characters.
 | 
						|
 | 
						|
 [3.message]
 | 
						|
 Implement foobar.
 | 
						|
 ; trailing blank lines are ignored.
 | 
						|
 | 
						|
=cut
 | 
						|
 | 
						|
# Globals
 | 
						|
use strict;
 | 
						|
use warnings;
 | 
						|
use integer;
 | 
						|
my $crlfmode = 0;
 | 
						|
my @revs;
 | 
						|
my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource);
 | 
						|
my $sectiontype = 0;
 | 
						|
my $rev = 0;
 | 
						|
my $mark = 1;
 | 
						|
 | 
						|
# Check command line
 | 
						|
if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/)
 | 
						|
{
 | 
						|
    exec('perldoc', $0);
 | 
						|
    exit 1;
 | 
						|
}
 | 
						|
 | 
						|
# Open configuration
 | 
						|
my $config = $ARGV[0];
 | 
						|
open CFG, '<', $config or die "Cannot open configuration file \"$config\": ";
 | 
						|
 | 
						|
# Open output
 | 
						|
my $output = $ARGV[1];
 | 
						|
open OUT, '>', $output or die "Cannot create output file \"$output\": ";
 | 
						|
binmode OUT;
 | 
						|
 | 
						|
LINE: while (my $line = <CFG>)
 | 
						|
{
 | 
						|
	$line =~ s/\r?\n$//;
 | 
						|
	next LINE if $sectiontype != 4 && $line eq '';
 | 
						|
	next LINE if $line =~ /^;/;
 | 
						|
	my $oldsectiontype = $sectiontype;
 | 
						|
	my $oldrev = $rev;
 | 
						|
 | 
						|
	# Sections
 | 
						|
	if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$")
 | 
						|
	{
 | 
						|
		if ($1 eq 'config')
 | 
						|
		{
 | 
						|
			$sectiontype = 1;
 | 
						|
		}
 | 
						|
		elsif ($3 eq '')
 | 
						|
		{
 | 
						|
			$sectiontype = 2;
 | 
						|
			$rev = $2;
 | 
						|
			# Create a new revision
 | 
						|
			die "Duplicate rev: $line\n " if defined $revmap{$rev};
 | 
						|
			print "Reading revision $rev\n";
 | 
						|
			push @revs, $rev;
 | 
						|
			$revmap{$rev} = $mark ++;
 | 
						|
			$time{$revmap{$rev}} = 0;
 | 
						|
		}
 | 
						|
		elsif ($3 eq '.files')
 | 
						|
		{
 | 
						|
			$sectiontype = 3;
 | 
						|
			$rev = $2;
 | 
						|
			die "Revision mismatch: $line\n " unless $rev == $oldrev;
 | 
						|
		}
 | 
						|
		elsif ($3 eq '.message')
 | 
						|
		{
 | 
						|
			$sectiontype = 4;
 | 
						|
			$rev = $2;
 | 
						|
			die "Revision mismatch: $line\n " unless $rev == $oldrev;
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			die "Internal parse error: $line\n ";
 | 
						|
		}
 | 
						|
		next LINE;
 | 
						|
	}
 | 
						|
 | 
						|
	# Parse data
 | 
						|
	if ($sectiontype != 4)
 | 
						|
	{
 | 
						|
		# Key and value
 | 
						|
		if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$")
 | 
						|
		{
 | 
						|
			my ($key, $value) = &parsekeyvaluepair($1);
 | 
						|
			# Global configuration
 | 
						|
			if (1 == $sectiontype)
 | 
						|
			{
 | 
						|
				if ($key eq 'crlf')
 | 
						|
				{
 | 
						|
					$crlfmode = 1, next LINE if $value eq 'convert';
 | 
						|
					$crlfmode = 0, next LINE if $value eq 'none';
 | 
						|
				}
 | 
						|
				die "Unknown configuration option: $line\n ";
 | 
						|
			}
 | 
						|
			# Revision specification
 | 
						|
			if (2 == $sectiontype)
 | 
						|
			{
 | 
						|
				my $current = $revmap{$rev};
 | 
						|
				$author{$current} = $value, next LINE if $key eq 'author';
 | 
						|
				$branch{$current} = $value, next LINE if $key eq 'branch';
 | 
						|
				$parent{$current} = $value, next LINE if $key eq 'parent';
 | 
						|
				$timesource{$current} = $value, next LINE if $key eq 'timestamp';
 | 
						|
				push(@{$merges{$current}}, $value), next LINE if $key eq 'merges';
 | 
						|
				die "Unknown revision option: $line\n ";
 | 
						|
			}
 | 
						|
			# Filespecs
 | 
						|
			if (3 == $sectiontype)
 | 
						|
			{
 | 
						|
				# Add the file and create a marker
 | 
						|
				die "File not found: $line\n " unless -f $value;
 | 
						|
				my $current = $revmap{$rev};
 | 
						|
				${$files{$current}}{$key} = $mark;
 | 
						|
				my $time = &fileblob($value, $crlfmode, $mark ++);
 | 
						|
 | 
						|
				# Update revision timestamp if more recent than other
 | 
						|
				# files seen, or if this is the file we have selected
 | 
						|
				# to take the time stamp from using the "timestamp"
 | 
						|
				# directive.
 | 
						|
				if ((defined $timesource{$current} && $timesource{$current} eq $value)
 | 
						|
				    || $time > $time{$current})
 | 
						|
				{
 | 
						|
					$time{$current} = $time;
 | 
						|
				}
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			die "Parse error: $line\n ";
 | 
						|
		}
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		# Commit message
 | 
						|
		my $current = $revmap{$rev};
 | 
						|
		if (defined $message{$current})
 | 
						|
		{
 | 
						|
			$message{$current} .= "\n";
 | 
						|
		}
 | 
						|
		$message{$current} .= $line;
 | 
						|
	}
 | 
						|
}
 | 
						|
close CFG;
 | 
						|
 | 
						|
# Start spewing out data for git-fast-import
 | 
						|
foreach my $commit (@revs)
 | 
						|
{
 | 
						|
	# Progress
 | 
						|
	print OUT "progress Creating revision $commit\n";
 | 
						|
 | 
						|
	# Create commit header
 | 
						|
	my $mark = $revmap{$commit};
 | 
						|
 | 
						|
	# Branch and commit id
 | 
						|
	print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n";
 | 
						|
 | 
						|
	# Author and timestamp
 | 
						|
	die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark};
 | 
						|
	print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n";
 | 
						|
 | 
						|
	# Commit message
 | 
						|
	die "No message defined for $commit\n" unless defined $message{$mark};
 | 
						|
	my $message = $message{$mark};
 | 
						|
	$message =~ s/\n$//; # Kill trailing empty line
 | 
						|
	print OUT "data ", length($message), "\n", $message, "\n";
 | 
						|
 | 
						|
	# Parent and any merges
 | 
						|
	print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark};
 | 
						|
	if (defined $merges{$mark})
 | 
						|
	{
 | 
						|
		foreach my $merge (@{$merges{$mark}})
 | 
						|
		{
 | 
						|
			print OUT "merge :", $revmap{$merge}, "\n";
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	# Output file marks
 | 
						|
	print OUT "deleteall\n"; # start from scratch
 | 
						|
	foreach my $file (sort keys %{$files{$mark}})
 | 
						|
	{
 | 
						|
		print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n";
 | 
						|
	}
 | 
						|
	print OUT "\n";
 | 
						|
}
 | 
						|
 | 
						|
# Create one file blob
 | 
						|
sub fileblob
 | 
						|
{
 | 
						|
	my ($filename, $crlfmode, $mark) = @_;
 | 
						|
 | 
						|
	# Import the file
 | 
						|
	print OUT "progress Importing $filename\nblob\nmark :$mark\n";
 | 
						|
	open FILE, '<', $filename or die "Cannot read $filename\n ";
 | 
						|
	binmode FILE;
 | 
						|
	my ($size, $mtime) = (stat(FILE))[7,9];
 | 
						|
	my $file;
 | 
						|
	read FILE, $file, $size;
 | 
						|
	close FILE;
 | 
						|
	$file =~ s/\r\n/\n/g if $crlfmode;
 | 
						|
	print OUT "data ", length($file), "\n", $file, "\n";
 | 
						|
 | 
						|
	return $mtime;
 | 
						|
}
 | 
						|
 | 
						|
# Parse a key=value pair
 | 
						|
sub parsekeyvaluepair
 | 
						|
{
 | 
						|
=pod
 | 
						|
 | 
						|
=head2 Escaping special characters
 | 
						|
 | 
						|
Key and value strings may be enclosed in quotes, in which case
 | 
						|
whitespace inside the quotes is preserved. Additionally, an equal
 | 
						|
sign may be included in the key by preceding it with a backslash.
 | 
						|
For example:
 | 
						|
 | 
						|
 "key1 "=value1
 | 
						|
 key2=" value2"
 | 
						|
 key\=3=value3
 | 
						|
 key4=value=4
 | 
						|
 "key5""=value5
 | 
						|
 | 
						|
Here the first key is "key1 " (note the trailing white-space) and the
 | 
						|
second value is " value2" (note the leading white-space). The third
 | 
						|
key contains an equal sign "key=3" and so does the fourth value, which
 | 
						|
does not need to be escaped. The fifth key contains a trailing quote,
 | 
						|
which does not need to be escaped since it is inside a surrounding
 | 
						|
quote.
 | 
						|
 | 
						|
=cut
 | 
						|
	my $pair = shift;
 | 
						|
 | 
						|
	# Separate key and value by the first non-quoted equal sign
 | 
						|
	my ($key, $value);
 | 
						|
	if ($pair =~ /^(.*[^\\])=(.*)$/)
 | 
						|
	{
 | 
						|
		($key, $value) = ($1, $2)
 | 
						|
	}
 | 
						|
	else
 | 
						|
	{
 | 
						|
		die "Parse error: $pair\n ";
 | 
						|
	}
 | 
						|
 | 
						|
	# Unquote and unescape the key and value separately
 | 
						|
	return (&unescape($key), &unescape($value));
 | 
						|
}
 | 
						|
 | 
						|
# Unquote and unescape
 | 
						|
sub unescape
 | 
						|
{
 | 
						|
	my $string = shift;
 | 
						|
 | 
						|
	# First remove enclosing quotes. Backslash before the trailing
 | 
						|
	# quote leaves both.
 | 
						|
	if ($string =~ /^"(.*[^\\])"$/)
 | 
						|
	{
 | 
						|
		$string = $1;
 | 
						|
	}
 | 
						|
 | 
						|
	# Second remove any backslashes inside the unquoted string.
 | 
						|
	# For later: Handle special sequences like \t ?
 | 
						|
	$string =~ s/\\(.)/$1/g;
 | 
						|
 | 
						|
	return $string;
 | 
						|
}
 | 
						|
 | 
						|
__END__
 | 
						|
 | 
						|
=pod
 | 
						|
 | 
						|
=head1 EXAMPLES
 | 
						|
 | 
						|
B<import-directories.perl> F<project.import>
 | 
						|
 | 
						|
=head1 AUTHOR
 | 
						|
 | 
						|
Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se>
 | 
						|
 | 
						|
This program is free software; you can redistribute it and/or modify
 | 
						|
it under the terms of the GNU General Public License as published by
 | 
						|
the Free Software Foundation.
 | 
						|
 | 
						|
=cut
 |