Allow any object (loose or packed) to be retrieved from a repo.

master
Pacman Ghost 2 years ago
parent 716d146a51
commit 0aab5db4a3
  1. 18
      cli/Program.fs
  2. 10
      git-guts/GitGuts.fs
  3. 26
      git-guts/GitObject.fs
  4. 69
      git-guts/GitRepo.fs
  5. 21
      git-guts/Pack.fs
  6. 61
      git-guts/PackData.fs
  7. 1
      git-guts/git-guts.fsproj
  8. 2
      git-guts/tests/TestPacks.fs

@ -40,6 +40,21 @@ type AppCommand() =
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type DumpRepoObjectSettings() =
inherit AppSettings()
[<CommandOption( "-o|--obj <OBJ-NAME>" )>]
member val ObjName = "" with get, set
type DumpRepoObjectCommand() =
inherit Command<DumpRepoObjectSettings>()
override this.Execute( ctx, settings ) =
if settings.ObjName = "" then
failwith "Missing object name."
dumpRepoObject settings.RepoDir settings.ObjName
0
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type DumpPackFileSettings() =
inherit AppSettings()
[<CommandOption( "-f|--pack-file <PACK-FILE>" )>]
@ -85,6 +100,9 @@ let main argv =
let app = CommandApp<AppCommand>()
app.Configure( fun cfg ->
cfg.SetApplicationName( System.AppDomain.CurrentDomain.FriendlyName ) |> ignore
cfg.AddCommand<DumpRepoObjectCommand>( "dump-object" ).WithDescription(
"Dump an object in a git repo."
) |> ignore
cfg.AddCommand<DumpPackFileCommand>( "dump-packfile" ).WithDescription(
"Dump a pack file."
) |> ignore

@ -53,6 +53,16 @@ module GitGuts =
buf
let decompressEntireStream (inp: Stream) =
// skip the header
inp.ReadByte() |> ignore
inp.ReadByte() |> ignore
// decompress the rest of the input stream
use zstream = new DeflateStream( inp, CompressionMode.Decompress )
use buf = new MemoryStream()
zstream.CopyTo( buf )
buf.ToArray()
let readUntil (inp: Stream) (endByte: byte) =
// read bytes until the specified end byte is seen
let rec readBytes () = seq {

@ -92,14 +92,28 @@ type TagGitObject( objData ) =
// --------------------------------------------------------------------
// This is used to hold the raw data for objects extracted from a pack.
type ObjRec = {
objType: int
objData: byte[]
}
[<AutoOpen>]
module GitObject =
let makeGitObject objType objData =
let makeGitObject objRec =
// create a GitObject-derived object
match objRec.objType with
| 1 -> (CommitGitObject objRec.objData) :> GitObject
| 2 -> (TreeGitObject objRec.objData) :> GitObject
| 3 -> (BlobGitObject objRec.objData) :> GitObject
| 4 -> (TagGitObject objRec.objData) :> GitObject
| _ -> failwithf "Unknown object type: %d" objRec.objType
let parseObjType objType =
match objType with
| 1 -> (CommitGitObject objData) :> GitObject
| 2 -> (TreeGitObject objData) :> GitObject
| 3 -> (BlobGitObject objData) :> GitObject
| 4 -> (TagGitObject objData) :> GitObject
| _ -> failwithf "Unknown object type: %d" objType
| "commit" -> 1
| "tree" -> 2
| "blob" -> 3
| "tag" -> 4
| _ -> failwithf "Unknown object type: %s" objType

@ -0,0 +1,69 @@
namespace git_guts
open System.IO
open System.Text
open System.Collections.Generic
// --------------------------------------------------------------------
[<AutoOpen>]
module GitRepo =
let rec _findRepoObjRec (repoDir: string) (objName: string) =
// check if the object is loose
let fname = Path.Join( repoDir, sprintf ".git/objects/%s/%s" objName.[0..1] objName.[2..] )
if File.Exists( fname ) then
// yup - get it directly from there
use inp = new FileStream( fname, FileMode.Open, FileAccess.Read, FileShare.Read )
let data = decompressEntireStream inp
// extract the object type and size
let pos = Seq.findIndex (fun byt -> byt = 0uy) data
let header = Encoding.ASCII.GetString( data, 0, pos )
let words = header.Split( ' ' )
if words.Length <> 2 then
failwithf "Unexpected loose object header: %s" header
let objType = parseObjType words.[0]
let objSize = int( words.[1] )
// get the object data
let objData = data.[ pos+1 .. ]
if objData.Length <> objSize then
failwithf "Unexpected object data size: %d/%d" objData.Length objSize
// return the object
Some { objType=objType; objData=objData }
else
// nope - check all the packs in the repo
let fnames = findRepoPacks repoDir
try
fnames |> Seq.map (fun fname -> _readPackObjRec fname objName _findRepoObjRec) |> Seq.find (fun obj -> obj.IsSome)
with
| :? KeyNotFoundException -> None
let findRepoObject repoDir objName =
// find the specified object in the repo
let objRec = _findRepoObjRec repoDir objName
if objRec.IsNone then
None
else
Some( makeGitObject objRec.Value )
let dumpRepoObject repoDir objName =
// find and dump the specified repo object
let obj = findRepoObject repoDir objName
if obj.IsNone then
failwith "Object not found."
obj.Value.dumpObj()
let dumpPackFile fname =
// FUDGE! This is a wrapper function that passes in the recursively-called _findRepoObjRec function :-/
_dumpPackFile fname _findRepoObjRec
let dumpPackObject fname objName =
// FUDGE! This is a wrapper function that passes in the recursively-called _findRepoObjRec function :-/
_dumpPackObject fname objName _findRepoObjRec

@ -8,7 +8,7 @@ open System.Collections.Generic
[<AutoOpen>]
module Pack =
let dumpPackFile fname =
let internal _dumpPackFile fname findRepoObjRecFunc =
// initialize
if not ( File.Exists fname ) then
@ -17,13 +17,13 @@ module Pack =
// figure out what to do
let extn = Path.GetExtension( fname ).ToLower()
if extn = ".pack" then
_dumpPackDataFile fname
_dumpPackDataFile fname findRepoObjRecFunc
else if extn = ".idx" then
_dumpPackIndexFile fname
else
failwithf "Unknown pack file extension: %s" extn
let readPackObject fname objName :GitObject option =
let _readPackObjRec fname objName findRepoObjRecFunc =
let fpos = _findObjInPack fname objName
if fpos.IsNone then
None
@ -32,17 +32,24 @@ module Pack =
let fname2 = changeExtn fname ".pack"
use inp = new FileStream( fname2, FileMode.Open, FileAccess.Read, FileShare.Read )
inp.Seek( int64( fpos.Value ), SeekOrigin.Begin ) |> ignore
let objType, objData, fpos2 = _readPackObject inp
Some ( makeGitObject objType objData )
let objRec, fpos2 = _readPackObjRec inp findRepoObjRecFunc
Some objRec
let dumpPackObject fname objName =
let readPackObject fname objName findRepoObjRecFunc :GitObject option =
let objRec = _readPackObjRec fname objName findRepoObjRecFunc
if objRec.IsNone then
None
else
Some ( makeGitObject objRec.Value )
let _dumpPackObject fname objName findRepoObjRecFunc =
// initialize
if not ( File.Exists fname ) then
failwithf "Can't find pack file: %s" fname
// find the specified pack object
let obj = readPackObject fname objName
let obj = readPackObject fname objName findRepoObjRecFunc
if obj.IsNone then
failwith "Object not found."
obj.Value.dumpObj()

@ -69,7 +69,7 @@ module PackData =
objData
let rec internal _readPackObject (inp: Stream) =
let rec internal _readPackObjRec (inp: Stream) findRepoObjRecFunc =
// remember where the object starts in the pack data
let fpos = inp.Position
@ -91,23 +91,21 @@ module PackData =
let objSize = getShiftedBytes (byt &&& 0x8F) 0 |> Seq.fold foldShiftedByte 0
// read the object data
let mutable objType2 = -1
let mutable objData = null
let obj =
let objRec =
match objType with
| 1 | 2 | 3 | 4 -> // commit, tree, blob, tag
objData <- decompressStream inp objSize
objType2 <- objType
let objData = decompressStream inp objSize
{ objType=objType; objData=objData }
| 6 -> // ofs_delta
let deltaObjAndType = _readOfsDeltaObj inp fpos objSize
objData <- snd deltaObjAndType
objType2 <- fst deltaObjAndType
_readOfsDeltaObjRec inp fpos objSize findRepoObjRecFunc
| 7 -> // ref_delta
_readRefDeltaObjRec inp fpos objSize findRepoObjRecFunc
| _ ->
failwithf "Unknown object type: %d" objType
( objType2, objData, fpos )
( objRec, fpos )
and private _readOfsDeltaObj (inp: Stream) fpos objSize =
and private _readOfsDeltaObjRec (inp: Stream) fpos objSize findRepoObjRecFunc =
// read the base object offset
let offset = int64( readVliBe inp true )
@ -117,24 +115,43 @@ module PackData =
// IMPORTANT: The base object could itself be delta'fied.
let prevPos = inp.Position
inp.Seek( baseObjOffset, SeekOrigin.Begin ) |> ignore
let baseObjType, baseObjData, fpos = _readPackObject inp
let baseObjRec, fpos = _readPackObjRec inp findRepoObjRecFunc
inp.Seek( prevPos, SeekOrigin.Begin ) |> ignore
// reconstruct the delta'fied object
let objData = _makeDeltaObj inp baseObjData objSize
let objData = _makeDeltaObj inp baseObjRec.objData objSize
( baseObjType, objData )
{ objType=baseObjRec.objType; objData=objData }
let private _readPackObjects (inp: Stream) = seq {
and private _readRefDeltaObjRec (inp: Stream) fpos objSize (findRepoObjRecFunc: string -> string -> ObjRec option) =
// read the base object name
let baseObjName = readObjName inp
// get the base object
let fstream = inp :?> FileStream // NOTE: REF_DELTA's will only work with repo's stored on disk.
let dname = Path.GetDirectoryName( fstream.Name )
let repoDir = Path.GetFullPath( Path.Join( dname, "../../.." ) )
// FUDGE! The algorithm for reconstructing objects from base object(s) is inherently recursive,
// and since there isn't a way to forward declare a function, everyone has to pass around
// a reference to the _findRepoObjRec function, so that we can use it here :-/
let baseObjRec = findRepoObjRecFunc repoDir baseObjName
// reconstruct the delta'fied object
let objData = _makeDeltaObj inp baseObjRec.Value.objData objSize
{ objType=baseObjRec.Value.objType; objData=objData }
let private _readPackObjects (inp: Stream) findRepoObjRecFunc = seq {
// read each object
let endPos = inp.Length - 20L // nb: we ignore the 20-byte checksum at the end
while inp.Position < endPos do
let objType, objData, fpos = _readPackObject inp
let obj = makeGitObject objType objData
yield obj, fpos, objData
let objRec, fpos = _readPackObjRec inp findRepoObjRecFunc
let obj = makeGitObject objRec
yield obj, objRec, fpos
}
let internal _dumpPackDataFile fname =
let internal _dumpPackDataFile fname findRepoObjRecFunc =
// initialize
use inp = new FileStream( fname, FileMode.Open, FileAccess.Read, FileShare.Read )
@ -143,11 +160,11 @@ module PackData =
let version, nObjs = _readPackDataHeader inp
// dump each object
_readPackObjects inp |> Seq.iteri ( fun objNo row ->
let obj, fpos, objData = row
_readPackObjects inp findRepoObjRecFunc |> Seq.iteri ( fun objNo row ->
let obj, objRec, fpos = row
AnsiConsole.MarkupLine( makeHeader
( sprintf "OBJECT %d: %s" objNo obj.objType )
( sprintf "(fpos=0x%x, size=%d)" fpos objData.Length )
( sprintf "(fpos=0x%x, size=%d)" fpos objRec.objData.Length )
)
printfn ""
obj.dumpObj()

@ -13,6 +13,7 @@
<Compile Include="PackData.fs" />
<Compile Include="PackIndex.fs" />
<Compile Include="Pack.fs" />
<Compile Include="GitRepo.fs" />
</ItemGroup>
<ItemGroup>

@ -54,7 +54,7 @@ type TestPacks () =
let nObjs = readNboInt inp
for objNo = 0 to nObjs-1 do
let objName, _, _ = readPackIndexObject inp objNo nObjs
let obj = readPackObject packDataFname objName
let obj = findRepoObject gitTestRepo.repoDir objName
Assert.IsTrue( obj.IsSome )
)

Loading…
Cancel
Save