diff --git a/cli/Program.fs b/cli/Program.fs index 2fc0429..7e60d87 100644 --- a/cli/Program.fs +++ b/cli/Program.fs @@ -40,6 +40,21 @@ type AppCommand() = // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +type DumpRepoObjectSettings() = + inherit AppSettings() + [" )>] + member val ObjName = "" with get, set + +type DumpRepoObjectCommand() = + inherit Command() + override this.Execute( ctx, settings ) = + if settings.ObjName = "" then + failwith "Missing object name." + dumpRepoObject settings.RepoDir settings.ObjName + 0 + +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + type DumpPackFileSettings() = inherit AppSettings() [" )>] @@ -85,6 +100,9 @@ let main argv = let app = CommandApp() app.Configure( fun cfg -> cfg.SetApplicationName( System.AppDomain.CurrentDomain.FriendlyName ) |> ignore + cfg.AddCommand( "dump-object" ).WithDescription( + "Dump an object in a git repo." + ) |> ignore cfg.AddCommand( "dump-packfile" ).WithDescription( "Dump a pack file." ) |> ignore diff --git a/git-guts/GitGuts.fs b/git-guts/GitGuts.fs index 1d9d126..7271818 100644 --- a/git-guts/GitGuts.fs +++ b/git-guts/GitGuts.fs @@ -53,6 +53,16 @@ module GitGuts = buf + let decompressEntireStream (inp: Stream) = + // skip the header + inp.ReadByte() |> ignore + inp.ReadByte() |> ignore + // decompress the rest of the input stream + use zstream = new DeflateStream( inp, CompressionMode.Decompress ) + use buf = new MemoryStream() + zstream.CopyTo( buf ) + buf.ToArray() + let readUntil (inp: Stream) (endByte: byte) = // read bytes until the specified end byte is seen let rec readBytes () = seq { diff --git a/git-guts/GitObject.fs b/git-guts/GitObject.fs index fd014ac..9c4e111 100644 --- a/git-guts/GitObject.fs +++ b/git-guts/GitObject.fs @@ -92,14 +92,28 @@ type TagGitObject( objData ) = // -------------------------------------------------------------------- +// This is used to hold the raw data for objects extracted from a pack. +type ObjRec = { + objType: int + objData: byte[] +} + [] module GitObject = - let makeGitObject objType objData = + let makeGitObject objRec = // create a GitObject-derived object + match objRec.objType with + | 1 -> (CommitGitObject objRec.objData) :> GitObject + | 2 -> (TreeGitObject objRec.objData) :> GitObject + | 3 -> (BlobGitObject objRec.objData) :> GitObject + | 4 -> (TagGitObject objRec.objData) :> GitObject + | _ -> failwithf "Unknown object type: %d" objRec.objType + + let parseObjType objType = match objType with - | 1 -> (CommitGitObject objData) :> GitObject - | 2 -> (TreeGitObject objData) :> GitObject - | 3 -> (BlobGitObject objData) :> GitObject - | 4 -> (TagGitObject objData) :> GitObject - | _ -> failwithf "Unknown object type: %d" objType + | "commit" -> 1 + | "tree" -> 2 + | "blob" -> 3 + | "tag" -> 4 + | _ -> failwithf "Unknown object type: %s" objType diff --git a/git-guts/GitRepo.fs b/git-guts/GitRepo.fs new file mode 100644 index 0000000..22306c4 --- /dev/null +++ b/git-guts/GitRepo.fs @@ -0,0 +1,69 @@ +namespace git_guts + +open System.IO +open System.Text +open System.Collections.Generic + +// -------------------------------------------------------------------- + +[] +module GitRepo = + + let rec _findRepoObjRec (repoDir: string) (objName: string) = + + // check if the object is loose + let fname = Path.Join( repoDir, sprintf ".git/objects/%s/%s" objName.[0..1] objName.[2..] ) + if File.Exists( fname ) then + + // yup - get it directly from there + use inp = new FileStream( fname, FileMode.Open, FileAccess.Read, FileShare.Read ) + let data = decompressEntireStream inp + + // extract the object type and size + let pos = Seq.findIndex (fun byt -> byt = 0uy) data + let header = Encoding.ASCII.GetString( data, 0, pos ) + let words = header.Split( ' ' ) + if words.Length <> 2 then + failwithf "Unexpected loose object header: %s" header + let objType = parseObjType words.[0] + let objSize = int( words.[1] ) + + // get the object data + let objData = data.[ pos+1 .. ] + if objData.Length <> objSize then + failwithf "Unexpected object data size: %d/%d" objData.Length objSize + + // return the object + Some { objType=objType; objData=objData } + + else + + // nope - check all the packs in the repo + let fnames = findRepoPacks repoDir + try + fnames |> Seq.map (fun fname -> _readPackObjRec fname objName _findRepoObjRec) |> Seq.find (fun obj -> obj.IsSome) + with + | :? KeyNotFoundException -> None + + let findRepoObject repoDir objName = + // find the specified object in the repo + let objRec = _findRepoObjRec repoDir objName + if objRec.IsNone then + None + else + Some( makeGitObject objRec.Value ) + + let dumpRepoObject repoDir objName = + // find and dump the specified repo object + let obj = findRepoObject repoDir objName + if obj.IsNone then + failwith "Object not found." + obj.Value.dumpObj() + + let dumpPackFile fname = + // FUDGE! This is a wrapper function that passes in the recursively-called _findRepoObjRec function :-/ + _dumpPackFile fname _findRepoObjRec + + let dumpPackObject fname objName = + // FUDGE! This is a wrapper function that passes in the recursively-called _findRepoObjRec function :-/ + _dumpPackObject fname objName _findRepoObjRec diff --git a/git-guts/Pack.fs b/git-guts/Pack.fs index d192bf0..b39fe9e 100644 --- a/git-guts/Pack.fs +++ b/git-guts/Pack.fs @@ -8,7 +8,7 @@ open System.Collections.Generic [] module Pack = - let dumpPackFile fname = + let internal _dumpPackFile fname findRepoObjRecFunc = // initialize if not ( File.Exists fname ) then @@ -17,13 +17,13 @@ module Pack = // figure out what to do let extn = Path.GetExtension( fname ).ToLower() if extn = ".pack" then - _dumpPackDataFile fname + _dumpPackDataFile fname findRepoObjRecFunc else if extn = ".idx" then _dumpPackIndexFile fname else failwithf "Unknown pack file extension: %s" extn - let readPackObject fname objName :GitObject option = + let _readPackObjRec fname objName findRepoObjRecFunc = let fpos = _findObjInPack fname objName if fpos.IsNone then None @@ -32,17 +32,24 @@ module Pack = let fname2 = changeExtn fname ".pack" use inp = new FileStream( fname2, FileMode.Open, FileAccess.Read, FileShare.Read ) inp.Seek( int64( fpos.Value ), SeekOrigin.Begin ) |> ignore - let objType, objData, fpos2 = _readPackObject inp - Some ( makeGitObject objType objData ) + let objRec, fpos2 = _readPackObjRec inp findRepoObjRecFunc + Some objRec - let dumpPackObject fname objName = + let readPackObject fname objName findRepoObjRecFunc :GitObject option = + let objRec = _readPackObjRec fname objName findRepoObjRecFunc + if objRec.IsNone then + None + else + Some ( makeGitObject objRec.Value ) + + let _dumpPackObject fname objName findRepoObjRecFunc = // initialize if not ( File.Exists fname ) then failwithf "Can't find pack file: %s" fname // find the specified pack object - let obj = readPackObject fname objName + let obj = readPackObject fname objName findRepoObjRecFunc if obj.IsNone then failwith "Object not found." obj.Value.dumpObj() diff --git a/git-guts/PackData.fs b/git-guts/PackData.fs index 01eb8fd..2dcbe1f 100644 --- a/git-guts/PackData.fs +++ b/git-guts/PackData.fs @@ -69,7 +69,7 @@ module PackData = objData - let rec internal _readPackObject (inp: Stream) = + let rec internal _readPackObjRec (inp: Stream) findRepoObjRecFunc = // remember where the object starts in the pack data let fpos = inp.Position @@ -91,23 +91,21 @@ module PackData = let objSize = getShiftedBytes (byt &&& 0x8F) 0 |> Seq.fold foldShiftedByte 0 // read the object data - let mutable objType2 = -1 - let mutable objData = null - let obj = + let objRec = match objType with | 1 | 2 | 3 | 4 -> // commit, tree, blob, tag - objData <- decompressStream inp objSize - objType2 <- objType + let objData = decompressStream inp objSize + { objType=objType; objData=objData } | 6 -> // ofs_delta - let deltaObjAndType = _readOfsDeltaObj inp fpos objSize - objData <- snd deltaObjAndType - objType2 <- fst deltaObjAndType + _readOfsDeltaObjRec inp fpos objSize findRepoObjRecFunc + | 7 -> // ref_delta + _readRefDeltaObjRec inp fpos objSize findRepoObjRecFunc | _ -> failwithf "Unknown object type: %d" objType - ( objType2, objData, fpos ) + ( objRec, fpos ) - and private _readOfsDeltaObj (inp: Stream) fpos objSize = + and private _readOfsDeltaObjRec (inp: Stream) fpos objSize findRepoObjRecFunc = // read the base object offset let offset = int64( readVliBe inp true ) @@ -117,24 +115,43 @@ module PackData = // IMPORTANT: The base object could itself be delta'fied. let prevPos = inp.Position inp.Seek( baseObjOffset, SeekOrigin.Begin ) |> ignore - let baseObjType, baseObjData, fpos = _readPackObject inp + let baseObjRec, fpos = _readPackObjRec inp findRepoObjRecFunc inp.Seek( prevPos, SeekOrigin.Begin ) |> ignore // reconstruct the delta'fied object - let objData = _makeDeltaObj inp baseObjData objSize + let objData = _makeDeltaObj inp baseObjRec.objData objSize - ( baseObjType, objData ) + { objType=baseObjRec.objType; objData=objData } - let private _readPackObjects (inp: Stream) = seq { + and private _readRefDeltaObjRec (inp: Stream) fpos objSize (findRepoObjRecFunc: string -> string -> ObjRec option) = + + // read the base object name + let baseObjName = readObjName inp + + // get the base object + let fstream = inp :?> FileStream // NOTE: REF_DELTA's will only work with repo's stored on disk. + let dname = Path.GetDirectoryName( fstream.Name ) + let repoDir = Path.GetFullPath( Path.Join( dname, "../../.." ) ) + // FUDGE! The algorithm for reconstructing objects from base object(s) is inherently recursive, + // and since there isn't a way to forward declare a function, everyone has to pass around + // a reference to the _findRepoObjRec function, so that we can use it here :-/ + let baseObjRec = findRepoObjRecFunc repoDir baseObjName + + // reconstruct the delta'fied object + let objData = _makeDeltaObj inp baseObjRec.Value.objData objSize + + { objType=baseObjRec.Value.objType; objData=objData } + + let private _readPackObjects (inp: Stream) findRepoObjRecFunc = seq { // read each object let endPos = inp.Length - 20L // nb: we ignore the 20-byte checksum at the end while inp.Position < endPos do - let objType, objData, fpos = _readPackObject inp - let obj = makeGitObject objType objData - yield obj, fpos, objData + let objRec, fpos = _readPackObjRec inp findRepoObjRecFunc + let obj = makeGitObject objRec + yield obj, objRec, fpos } - let internal _dumpPackDataFile fname = + let internal _dumpPackDataFile fname findRepoObjRecFunc = // initialize use inp = new FileStream( fname, FileMode.Open, FileAccess.Read, FileShare.Read ) @@ -143,11 +160,11 @@ module PackData = let version, nObjs = _readPackDataHeader inp // dump each object - _readPackObjects inp |> Seq.iteri ( fun objNo row -> - let obj, fpos, objData = row + _readPackObjects inp findRepoObjRecFunc |> Seq.iteri ( fun objNo row -> + let obj, objRec, fpos = row AnsiConsole.MarkupLine( makeHeader ( sprintf "OBJECT %d: %s" objNo obj.objType ) - ( sprintf "(fpos=0x%x, size=%d)" fpos objData.Length ) + ( sprintf "(fpos=0x%x, size=%d)" fpos objRec.objData.Length ) ) printfn "" obj.dumpObj() diff --git a/git-guts/git-guts.fsproj b/git-guts/git-guts.fsproj index b4e7acb..126cd6b 100644 --- a/git-guts/git-guts.fsproj +++ b/git-guts/git-guts.fsproj @@ -13,6 +13,7 @@ + diff --git a/git-guts/tests/TestPacks.fs b/git-guts/tests/TestPacks.fs index 3db6788..68e6b12 100644 --- a/git-guts/tests/TestPacks.fs +++ b/git-guts/tests/TestPacks.fs @@ -54,7 +54,7 @@ type TestPacks () = let nObjs = readNboInt inp for objNo = 0 to nObjs-1 do let objName, _, _ = readPackIndexObject inp objNo nObjs - let obj = readPackObject packDataFname objName + let obj = findRepoObject gitTestRepo.repoDir objName Assert.IsTrue( obj.IsSome ) )