gitscraper: Provide way to get timestamps for files.

When scraping files from the repository allow passing an additional list of files for which the timestamp of the last change is retrieved. Since this is slow (the log needs to be traversed, since a blob doesn't know about commits its referenced by) only specified files get this retrieved. Change-Id: I61a2ef812ac18ffb5898c01ce070046d4ca00a15
author: Dominik Riebeling <Dominik.Riebeling@gmail.com> 2012-04-29 11:38:23 +0200
committer: Dominik Riebeling <Dominik.Riebeling@gmail.com> 2012-04-29 11:48:07 +0200
commit: 91cf4a77687e650ff92e8129424955cea692d875 (patch)
tree: 8508e26117afa9c968b1ed0c60606c3cf4a81932
parent: a8b597e620e4936d9039f10050326b6feded7905 (diff)
1 files changed, 30 insertions, 7 deletions
diff --git a/utils/common/gitscraper.py b/utils/common/gitscraper.py
index 85a8467617..2ced9ec07e 100755
--- a/utils/common/gitscraper.py
+++ b/utils/common/gitscraper.py
@@ -42,8 +42,8 @@ def get_refs(repo):
     @return Dict matching hashes to each ref.
     '''
     print("Getting list of refs")
-    output = subprocess.Popen(["git", "show-ref"], stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE, cwd=repo)
+    output = subprocess.Popen(["git", "show-ref", "--abbrev"],
+            stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
     cmdout = output.communicate()
     refs = {}
 
@@ -99,6 +99,21 @@ def get_lstree(repo, start, filterlist=[]):
     return objects
 
 
+def get_file_timestamp(repo, tree, filename):
+    '''Get timestamp for a file.
+    @param repo Path to repository root.
+    @param tree Hash of tree to use.
+    @param filename Filename in tree
+    @return Timestamp as string.
+    '''
+    output = subprocess.Popen(
+            ["git", "log", "--format=%ai", "-n", "1", tree, filename],
+            stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
+    cmdout = output.communicate()
+
+    return cmdout[0].decode().rstrip()
+
+
 def get_object(repo, blob, destfile):
     '''Get an identified object from the repository.
     @param repo Path to repository root.
@@ -138,7 +153,7 @@ def describe_treehash(repo, treehash):
     return cmdout[0].rstrip()
 
 
-def scrape_files(repo, treehash, filelist, dest=""):
+def scrape_files(repo, treehash, filelist, dest="", timestamp_files=[]):
     '''Scrape list of files from repository.
     @param repo Path to repository root.
     @param treehash Hash identifying the tree.
@@ -146,17 +161,23 @@ def scrape_files(repo, treehash, filelist, dest=""):
     @param dest Destination path for files. Files will get retrieved with full
                 path from the repository, and the folder structure will get
                 created below dest as necessary.
-    @return Destination path.
+    @param timestamp_files List of files to also get the last modified date.
+                           WARNING: this is SLOW!
+    @return Destination path, filename:timestamp dict.
     '''
     print("Scraping files from repository")
 
     if dest == "":
         dest = tempfile.mkdtemp()
     treeobjects = get_lstree(repo, treehash, filelist)
+    timestamps = {}
     for obj in treeobjects:
         get_object(repo, treeobjects[obj], os.path.join(dest.encode(), obj))
+        for f in timestamp_files:
+            if obj.find(f) == 0:
+                timestamps[obj] = get_file_timestamp(repo, treehash, obj)
 
-    return dest
+    return [dest, timestamps]
 
 
 def archive_files(repo, treehash, filelist, basename, tmpfolder="",
@@ -182,7 +203,7 @@ def archive_files(repo, treehash, filelist, basename, tmpfolder="",
     else:
         temp_remove = False
     workfolder = scrape_files(repo, treehash, filelist,
-            os.path.join(tmpfolder, basename))
+            os.path.join(tmpfolder, basename))[0]
     if basename is "":
         return ""
     print("Archiving files from repository")
@@ -192,11 +213,13 @@ def archive_files(repo, treehash, filelist, basename, tmpfolder="",
             os.path.join(os.getcwd(), basename + ".7z"), basename],
             cwd=tmpfolder, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         output.communicate()
-    else:
+    elif archive == "tbz":
         outfile = basename + ".tar.bz2"
         tf = tarfile.open(outfile, "w:bz2")
         tf.add(workfolder, basename)
         tf.close()
+    else:
+        print("Files not archived")
     if tmpfolder != workfolder:
         shutil.rmtree(workfolder)
     if temp_remove:
author	Dominik Riebeling <Dominik.Riebeling@gmail.com>	2012-04-29 11:38:23 +0200
committer	Dominik Riebeling <Dominik.Riebeling@gmail.com>	2012-04-29 11:48:07 +0200
commit	91cf4a77687e650ff92e8129424955cea692d875 (patch)
tree	8508e26117afa9c968b1ed0c60606c3cf4a81932
parent	a8b597e620e4936d9039f10050326b6feded7905 (diff)