Compare commits

...

5 Commits

  1. 9
      README.md
  2. 76
      cli/Program.fs
  3. 17
      git-guts/GitGuts.fs
  4. 24
      git-guts/GitRepo.fs
  5. 93
      git-guts/Logs.fs
  6. 10
      git-guts/PackData.fs
  7. 106
      git-guts/PackIndex.fs
  8. 66
      git-guts/Refs.fs
  9. 309
      git-guts/StagingIndex.fs
  10. 107
      git-guts/Utils.fs
  11. 183
      git-guts/Verify.fs
  12. 4
      git-guts/git-guts.fsproj
  13. 41
      git-guts/tests/TestIndex.fs
  14. 39
      git-guts/tests/TestLogs.fs
  15. 9
      git-guts/tests/TestPacks.fs
  16. 48
      git-guts/tests/TestRefs.fs
  17. 49
      git-guts/tests/TestVerify.fs
  18. 53
      git-guts/tests/Utils.fs
  19. 0
      git-guts/tests/fixtures/empty.logs.txt
  20. 6
      git-guts/tests/fixtures/empty.refs-packed.txt
  21. 6
      git-guts/tests/fixtures/empty.refs.txt
  22. 0
      git-guts/tests/fixtures/empty.staging-index.txt
  23. BIN
      git-guts/tests/fixtures/empty.zip
  24. 57
      git-guts/tests/fixtures/full2.logs.txt
  25. 113
      git-guts/tests/fixtures/full2.pack-data.txt
  26. 43
      git-guts/tests/fixtures/full2.pack-index.txt
  27. 10
      git-guts/tests/fixtures/full2.refs-packed.txt
  28. 12
      git-guts/tests/fixtures/full2.refs.txt
  29. BIN
      git-guts/tests/fixtures/full2.zip
  30. 19
      git-guts/tests/fixtures/simple.logs.txt
  31. 42
      git-guts/tests/fixtures/simple.staging-index.txt
  32. 3
      git-guts/tests/tests.fsproj

@ -0,0 +1,9 @@
A small library that examines the internal files of a git repo.
It can handle:
- objects, stored loose or in packs (including ref and offset deltas)
- the staging index
- refs
- reflogs
Compile the `cli` project (a CLI wrapper around the main library), then run it with `--help` to get help.

@ -85,6 +85,65 @@ type DumpPackObjectCommand() =
dumpPackObject fname settings.ObjName
0
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type DumpStagingIndexSettings() =
inherit AppSettings()
[<CommandOption( "-f|--full" )>]
member val FullDump = false with get, set
type DumpStagingIndexCommand() =
inherit Command<DumpStagingIndexSettings>()
override this.Execute( ctx, settings ) =
dumpStagingIndex settings.RepoDir settings.FullDump
0
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type DumpRefsSettings() =
inherit AppSettings()
type DumpRefsCommand() =
inherit Command<DumpRefsSettings>()
override this.Execute( ctx, settings ) =
dumpRefs settings.RepoDir
0
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type DumpLogsSettings() =
inherit AppSettings()
type DumpLogsCommand() =
inherit Command<DumpLogsSettings>()
override this.Execute( ctx, settings ) =
dumpLogs settings.RepoDir
0
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type VerifyObjectsSettings() =
inherit AppSettings()
[<CommandOption( "-p|--progress" )>]
member val Progress = false with get, set
type VerifyObjectsCommand() =
inherit Command<VerifyObjectsSettings>()
override this.Execute( ctx, settings ) =
verifyObjects settings.RepoDir settings.Progress
0
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type VerifyLogsSettings() =
inherit AppSettings()
type VerifyLogsCommand() =
inherit Command<VerifyLogsSettings>()
override this.Execute( ctx, settings ) =
verifyLogs settings.RepoDir
0
// --------------------------------------------------------------------
[<EntryPoint>]
@ -94,7 +153,7 @@ let main argv =
// colors (see ColorSystemDetector.Detect()), instead of checking if stdout is connected to a terminal,
// so we do that here.
if Console.IsOutputRedirected then
disableSpectreCapabilities
disableSpectreCapabilities ()
// parse the command-line arguments
let app = CommandApp<AppCommand>()
@ -109,5 +168,20 @@ let main argv =
cfg.AddCommand<DumpPackObjectCommand>( "dump-packobject" ).WithDescription(
"Dump a pack object."
) |> ignore
cfg.AddCommand<DumpStagingIndexCommand>( "dump-stagingindex" ).WithDescription(
"Dump the staging index."
) |> ignore
cfg.AddCommand<DumpRefsCommand>( "dump-refs" ).WithDescription(
"Dump references."
) |> ignore
cfg.AddCommand<DumpLogsCommand>( "dump-logs" ).WithDescription(
"Dump logs."
) |> ignore
cfg.AddCommand<VerifyObjectsCommand>( "verify-objects" ).WithDescription(
"Verify retrieval of objects from a git repo."
) |> ignore
cfg.AddCommand<VerifyLogsCommand>( "verify-logs" ).WithDescription(
"Verify logs in a git repo."
) |> ignore
)
app.Run( argv )

@ -26,8 +26,7 @@ module GitGuts =
// FUDGE! In Python, we ask zlib to decompress bytes from the stream until it's done, but in .NET,
// we have to tell it how many bytes of uncompressed data we want, which doesn't seem to work when
// the byte count is 0 :-/ I'm not sure if skipping bytes like this is right, but it'll do for now...
for i = 0 to 5 do
inp.ReadByte() |> ignore
readBytes inp 6 |> ignore
[||]
else
use zstream = new DeflateStream( inp, CompressionMode.Decompress, true )
@ -82,15 +81,17 @@ module GitGuts =
let readObjName (inp: Stream) =
// read an object name (20 raw bytes) and return it as a hex-string
let buf = Array.zeroCreate 20
inp.Read( buf, 0, 20 ) |> ignore
let buf = readBytes inp 20
Convert.ToHexString( buf ).ToLower()
let readNboInt (inp: Stream) =
let _readNboInt (inp: Stream) nBytes =
// read a network-byte-order int
let buf = Array.zeroCreate 4
inp.Read( buf, 0, 4 ) |> ignore
( int(buf.[0]) <<< 24 ) ||| ( int(buf.[1]) <<< 16 ) ||| ( int(buf.[2]) <<< 8 ) ||| int(buf.[3])
let getBytes = Seq.initInfinite ( fun n -> inp.ReadByte() )
let foldByte acc byt =
( acc <<< 8 ) ||| int( byt )
Seq.take nBytes getBytes |> Seq.fold foldByte 0
let readNboInt4 (inp: Stream) = _readNboInt inp 4
let readNboInt2 (inp: Stream) = _readNboInt inp 2
let readVliBe (inp: Stream) isOffsetEncoding =
// read a variable-length integer (big-endian)

@ -2,6 +2,7 @@ namespace git_guts
open System.IO
open System.Text
open System.Text.RegularExpressions
open System.Collections.Generic
// --------------------------------------------------------------------
@ -60,6 +61,29 @@ module GitRepo =
failwith "Object not found."
obj.Value.dumpObj()
let getObjNames repoDir = seq {
// return all loose objects in the repo
let dname = Path.Join( repoDir, ".git/objects" )
let regex = Regex( @"/([0-9a-f]{2})/([0-9a-f]{38}$)" )
for fname in Directory.GetFiles( dname, "*", SearchOption.AllDirectories ) do
let fname2 = fname.Replace( Path.DirectorySeparatorChar, '/' )
let matches = regex.Matches( fname2 )
if matches.Count > 0 then
let groups = matches.[0].Groups
let objName = groups.[1].Value + groups.[2].Value
yield objName, fname2
// return all objects in each pack
for fname in findRepoPacks repoDir do
let mutable objs = [] // FUDGE! Can't yield from inside the onObject callback :-/
_readPackIndexFile fname ( fun fanout -> () ) ( fun objNo nObjs objName crc offset ->
objs <- objs @ [ ( objName, fname ) ]
)
yield! objs
}
let dumpPackFile fname =
// FUDGE! This is a wrapper function that passes in the recursively-called _findRepoObjRec function :-/
_dumpPackFile fname _findRepoObjRec

@ -0,0 +1,93 @@
namespace git_guts
open System
open System.IO
open System.Text.RegularExpressions
open Spectre.Console
// --------------------------------------------------------------------
type LogEntry =
{
entryType: string option
prevRef: string option
nextRef: string option
userName: string
userEmail: string
tstamp: int * string // epoch timestamp + timezone
msg: string option
}
member this.dumpLogEntry =
AnsiConsole.Markup( "[cyan]{0}[/]",
if this.entryType.IsSome then this.entryType.Value else "(no type)"
)
if this.msg.IsSome then
printf ": %s" this.msg.Value
printfn ""
let dt = parseTimestamp ( fst this.tstamp )
let tstamp = sprintf "%s %s" (dt.ToString "yyyy-MM-dd HH:mm:ss") (snd this.tstamp)
printfn "%s (%s) %s" this.userName this.userEmail tstamp
if this.prevRef.IsSome then
AnsiConsole.Markup( "{0} ", objNameStr this.prevRef.Value )
printf "->"
if this.nextRef.IsSome then
AnsiConsole.Markup( " {0}", objNameStr this.nextRef.Value )
printfn ""
// --------------------------------------------------------------------
[<AutoOpen>]
module Logs =
let internal _findLogFiles repoDir = seq {
// find log files in the specified repo
let dname = Path.Join( repoDir, "/.git/logs" )
if Directory.Exists( dname ) then
let prefix = dname + Path.DirectorySeparatorChar.ToString()
for fname in Directory.GetFiles( dname, "*", SearchOption.AllDirectories ) do
if not ( fname.StartsWith( prefix ) ) then
failwithf "Unexpected log filename: %s" fname
let ref = fname.Substring( prefix.Length )
yield ref, fname
}
let internal _readLogFile fname = seq {
let regex = Regex( @"^([0-9a-f]{40}) ([0-9a-f]{40}) (.+?) \<(.+?)\> (\d+) ([+-]\d{4})(\s+[^:]+)?" )
for line in File.ReadLines( fname ) do
let line2 = line.Trim()
let matches = regex.Matches( line2 )
if matches.Count <> 1 then
failwithf "Couldn't parse log line: %s" line2
let groups = matches.[0].Groups
let prevRef, nextRef = groups.[1].Value, groups.[2].Value
let userName, userEmail = groups.[3].Value, groups.[4].Value
let tstamp, tzone = groups.[5].Value, groups.[6].Value
let entryType = if groups.[7].Success then Some( groups.[7].Value.Trim() ) else None
let msg =
if groups.[0].Length < line2.Length then
Some ( line2.Substring( groups.[0].Length + 2 ) )
else
None
let checkRef ref =
if ref = "0000000000000000000000000000000000000000" then None else Some ref
yield {
entryType = entryType
prevRef = checkRef prevRef; nextRef = checkRef nextRef
userName = userName; userEmail = userEmail
tstamp = ( int(tstamp), tzone )
msg = msg
}
}
let dumpLogs repoDir =
// dump the log files (sorted, for stable output)
_findLogFiles repoDir |> Seq.sortBy ( fun f -> fst f ) |> Seq.iteri ( fun logNo (ref, fname) ->
if logNo > 0 then
printfn ""
AnsiConsole.MarkupLine( makeHeader ref "" )
for entry in _readLogFile fname do
printfn ""
entry.dumpLogEntry
)

@ -14,14 +14,13 @@ module PackData =
let private _readPackDataHeader (inp: Stream) =
// read the header
let buf = Array.zeroCreate 4
inp.Read( buf, 0, 4 ) |> ignore
let buf = readBytes inp 4
if (Encoding.ASCII.GetString buf) <> "PACK" then
failwithf "Incorrect magic number: %A" buf
let version = readNboInt inp
let version = readNboInt4 inp
if version <> 2 then
failwithf "Unsupported pack file version: %d" version
let nObjs = readNboInt inp
let nObjs = readNboInt4 inp
( version, nObjs )
@ -58,8 +57,7 @@ module PackData =
else
// add new data
let nBytes = byt &&& 0x7f
let buf = Array.zeroCreate nBytes
tdataStream.Read( buf, 0, nBytes ) |> ignore
let buf = readBytes tdataStream nBytes
yield! buf
yield! getBytes()
}

@ -16,11 +16,10 @@ module PackIndex =
// NOTE: v1 index files don't have a version number, and just start with the fanout table, but
// the magic number we check for here is an unreasonable value for fanout[0], so this is how
// we detect the different versions.
let buf = Array.zeroCreate 4
inp.Read( buf, 0, 4 ) |> ignore
let buf = readBytes inp 4
if buf <> [|0xffuy; 116uy; 79uy; 99uy|] then // nb: 0xff, then "tOc"
failwithf "Incorrect magic number: %A" buf
let version = readNboInt inp
let version = readNboInt4 inp
if version <> 2 then
// NOTE: Version 1 is also valid, but quite old, and we don't support it.
failwithf "Unsupported pack index version: %d" version
@ -39,13 +38,13 @@ module PackIndex =
// read the fanout table
let byte0 = Int32.Parse( objName.[0..1], Globalization.NumberStyles.AllowHexSpecifier )
inp.Seek( int64( 4*byte0 ), SeekOrigin.Current ) |> ignore
let endIndex = readNboInt inp
let endIndex = readNboInt4 inp
let startIndex =
if byte0 = 0 then
0
else
inp.Seek( -8L, SeekOrigin.Current ) |> ignore
readNboInt inp
readNboInt4 inp
// NOTE: We now have two indexes into the table of object names:
// - startIndex = index of an object name <= the name we're looking for
// - endIndex = index of an object name > the name we're looking for
@ -67,9 +66,9 @@ module PackIndex =
if midObjName = objName then
// yup - get the object's offset in the pack data
seekTo ( fposNames - 4 )
let nObjs = readNboInt inp // nb: this is the last value in the fanout table
let nObjs = readNboInt4 inp // nb: this is the last value in the fanout table
seekTo ( fposNames + 20*nObjs + 4*nObjs + 4*midIndex )
Some ( readNboInt inp )
Some ( readNboInt4 inp )
else
// nope - continue the binary search
if midObjName < objName then
@ -92,63 +91,80 @@ module PackIndex =
seekTo fposNames 20 objNo
let objName = readObjName inp
seekTo fposCrcs 4 objNo
let crc = readNboInt inp
let crc = readNboInt4 inp
seekTo fposOffsets 4 objNo
let offset = readNboInt inp
let offset = readNboInt4 inp
if offset &&& 0x80000000 <> 0 then
failwithf "Large offsets are not supported."
( objName, crc, offset )
let internal _dumpPackIndexFile fname =
let internal _readPackIndexFile fname onFanoutTable onObject =
// initialize
use inp = new FileStream( fname, FileMode.Open, FileAccess.Read, FileShare.Read )
let fname2 = changeExtn fname ".idx"
use inp = new FileStream( fname2, FileMode.Open, FileAccess.Read, FileShare.Read )
// read the header
let version = _readPackIndexHeader inp
// read the fanout table
let getFanoutVals = Seq.initInfinite (fun n -> readNboInt inp)
let getFanoutVals = Seq.initInfinite (fun n -> readNboInt4 inp)
let fanout = Seq.take 256 getFanoutVals |> Seq.toArray
onFanoutTable fanout
let nObjs = fanout.[255]
// dump the fanout table header
AnsiConsole.MarkupLine( "{0}", makeHeader "FANOUT" "" )
printfn ""
let fieldWidth = Math.Max( String.Format( "{0}", nObjs ).Length, 2 )
let fmt = String.Format( "{{0,{0}}}", fieldWidth )
printf " "
for col = 0 to 15 do
let iVal = String.Format( "{0:x2}", col )
printf " %s" ( String.Format( fmt, iVal ) )
printfn ""
printf " "
for col = 0 to 15 do
let ruler = String( '-', fieldWidth )
printf " %s" ruler
printfn ""
// dump the fanout table
for row = 0 to 15 do
printf "%02x:" (16 * row)
for col = 0 to 15 do
let fanoutVal = String.Format( fmt, fanout.[16*row+col] )
printf " %s" fanoutVal
printfn ""
// dump the objects
let fieldWidth2 = String.Format( "{0}", nObjs ).Length
printfn ""
let hdr = sprintf "OBJECTS (%d)" nObjs
AnsiConsole.MarkupLine( "{0}", makeHeader hdr "" )
printfn ""
let prefix = String( ' ', fieldWidth2 )
printfn "%s name crc offset" prefix
printfn "%s ---------------------------------------- -------- --------" prefix
let fmt = sprintf "{0,%d}: {1} {2:x8} 0x{3:x}" fieldWidth2
for objNo = 0 to nObjs-1 do
let objName, crc, offset = readPackIndexObject inp objNo nObjs
onObject objNo nObjs objName crc offset
let internal _dumpPackIndexFile fname =
_readPackIndexFile fname ( fun fanout ->
// dump the fanout table header
AnsiConsole.MarkupLine( "{0}", makeHeader "FANOUT" "" )
printfn ""
let nObjs = fanout.[255]
let fieldWidth = Math.Max( String.Format( "{0}", nObjs ).Length, 2 )
let fmt = String.Format( "{{0,{0}}}", fieldWidth )
printf " "
for col = 0 to 15 do
let iVal = String.Format( "{0:x2}", col )
printf " %s" ( String.Format( fmt, iVal ) )
printfn ""
printf " "
for col = 0 to 15 do
let ruler = String( '-', fieldWidth )
printf " %s" ruler
printfn ""
// dump the fanout table
for row = 0 to 15 do
printf "%02x:" (16 * row)
for col = 0 to 15 do
let fanoutVal = String.Format( fmt, fanout.[16*row+col] )
printf " %s" fanoutVal
printfn ""
) ( fun objNo nObjs objName crc offset ->
let fieldWidth = String.Format( "{0}", nObjs ).Length
if objNo = 0 then
// output the header
printfn ""
let hdr = sprintf "OBJECTS (%d)" nObjs
AnsiConsole.MarkupLine( "{0}", makeHeader hdr "" )
printfn ""
let prefix = String( ' ', fieldWidth )
printfn "%s name crc offset" prefix
printfn "%s ---------------------------------------- -------- --------" prefix
// dump the next object
let fmt = sprintf "{0,%d}: {1} {2:x8} 0x{3:x}" fieldWidth
AnsiConsole.MarkupLine( fmt,
objNo, objNameStr objName, crc, offset
)
)

@ -0,0 +1,66 @@
namespace git_guts
open System.IO
open System.Text
open Spectre.Console
// --------------------------------------------------------------------
[<AutoOpen>]
module Refs =
let private _findLooseRefs repoDir refType = seq {
// find loose refs in the git repo
let refsDir = Path.Join( repoDir, ".git/refs", refType )
if Directory.Exists refsDir then
for fname in Directory.GetFiles( refsDir ) do
let objName = File.ReadAllText( fname, Encoding.ASCII ).Trim()
yield ( Path.GetFileName fname, objName )
}
let private _findPackedRefs repoDir = seq {
// find packed refs in the git repo
let fname = Path.Join( repoDir, ".git/packed-refs" )
if File.Exists fname then
let mutable currRef = None
for line in File.ReadLines( fname ) do
let line2 = line.Trim()
if line2.Length > 0 && line2.[0] <> '#' then
if line2.[0] = '^' then
// the previous line is an annotated tag, this line is the target commit
yield ( fst currRef.Value, snd currRef.Value, Some (line2.Substring 1) )
currRef <- None
else
// the previous line was a normal tag - we can now return it to the caller
if currRef.IsSome then
yield ( fst currRef.Value, snd currRef.Value, None )
// save the current line (to be yield'ed later)
// NOTE: We can't yield the tag now because it might be an annotated tag,
// in which case we need to wait for the next line to get the target commit.
currRef <- Some ( line2.Substring(41), line2.Substring(0,40) )
if currRef.IsSome then
yield ( fst currRef.Value, snd currRef.Value, None )
}
let dumpRefs repoDir =
// dump the loose refs
for refType in [| "heads"; "tags" |] do
AnsiConsole.MarkupLine( makeHeader refType "" )
let looseRefs = _findLooseRefs repoDir refType |> Seq.sortBy ( fun r -> fst r ) |> Seq.toList
if looseRefs.Length > 0 then
printfn ""
for ref, objName in looseRefs do
AnsiConsole.MarkupLine( "{0} -> {1}", refStr ref, objNameStr objName )
printfn ""
// dump the packed refs
AnsiConsole.MarkupLine( makeHeader "packed refs" "" )
printfn ""
let packedRefs = _findPackedRefs repoDir |> Seq.sortBy ( fun (r, _, _) -> r )
for ref, objName, target in packedRefs do
AnsiConsole.Markup( "{0} -> {1}", refStr ref, objNameStr objName )
if target.IsSome then
AnsiConsole.Markup( " -> {0}", objNameStr target.Value )
printfn ""

@ -0,0 +1,309 @@
namespace git_guts
open System
open System.IO
open System.Text
open Spectre.Console
// --------------------------------------------------------------------
// This records a single entry in a repo's staging index.
type StagingIndexEntry =
{
ctime: int * int // nb: seconds + nanoseconds
mtime: int * int // nb: seconds + nanoseconds
dev: int
ino: int
objType: int
perms: uint16
uid: int
gid: int
fileSize: int
objName: string
flags: uint16
extendedFlags: uint16 option
path: byte[] // nb: the encoding is unknown
}
static member private _OBJECT_TYPES = Map[ ( 0x08, "regular file" ); ( 0x0a, "symlink" ); ( 0x0e, "gitlink" ) ]
static member private _FLAG_NAMES = Map[ (0x8000u, "assume-valid"); (0x4000u, "extended") ]
static member private _EXTENDED_FLAG_NAMES = Map[ (0x4000u, "skip-worktree"); (0x2000u, "intent-to-add") ]
member private this._flagsStr =
// return the StagingIndexEntry's flags as a string
let mutable vals = []
let flags = uint( this.flags )
let bflags = bitflagString flags 2 StagingIndexEntry._FLAG_NAMES
if bflags.Length >= 10 then
vals <- vals @ [ bflags.[ 0 .. bflags.Length-10 ] ]
vals <- vals @ [ sprintf "stage=%d" ((flags &&& 0x3000u) >>> 12) ]
// NOTE: The "name length" field is for the entry path name, not the object name.
let namelen = flags &&& 0x0fffu
vals <- vals @ [
if namelen < 0xfffu then sprintf "namelen=%d" namelen else "namelen=0xFFF"
]
let valsStr = String.Join( ", ", vals )
let bflags2 = if bflags.Length >= 8 then bflags.Substring( bflags.Length-7, 6 ) else bflags
sprintf "%s (%s)" valsStr bflags2
member private this._xflagsStr =
// return the StagingIndexEntry's extended flags as a string
let xflags = uint( this.extendedFlags.Value )
bitflagString xflags 2 StagingIndexEntry._EXTENDED_FLAG_NAMES
member this.dumpObj fullDump =
// NOTE: The encoding for the path is actually unknown :-/ Encoding.UTF8 uses replacement fallback.
AnsiConsole.MarkupLine( "- path: {0}", pathStr (Encoding.UTF8.GetString( this.path )) )
AnsiConsole.MarkupLine( "- name: {0}", objNameStr this.objName )
printfn "- flags: %s" this._flagsStr
if this.extendedFlags.IsSome then
printfn " %s" this._xflagsStr
printfn "- type: %s (%d)" StagingIndexEntry._OBJECT_TYPES.[this.objType] this.objType
printfn "- size: %d" this.fileSize
printfn "- perms: %s" (permsString this.perms)
if fullDump then
printfn "- uid: %d" this.uid
printfn "- gid: %d" this.gid
let makeTimeStr timeVal =
let epoch = DateTime( 1970, 1, 1, 0, 0, 0, DateTimeKind.Utc )
let dt = epoch.AddSeconds( float( fst timeVal ) )
sprintf "%s (%d.%09d)" (dt.ToLocalTime().ToString("yyyy-MM-dd HH:mm:ss")) (fst timeVal) (snd timeVal)
printfn "- ctime: %s" (makeTimeStr this.ctime)
printfn "- mtime: %s" (makeTimeStr this.mtime)
printfn "- dev: %d" this.dev
printfn "- ino: %d" this.ino
// --------------------------------------------------------------------
[<AbstractClass>]
type StagingIndexExtension () =
// Base class for extensions stored in the staging index.
abstract member extnSig: string
abstract member dumpExtn: unit -> unit
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Used to hold a TREE extension.
type TreeExtensionEntry =
{
path: byte[] // nb: the encoding is unknown
nEntries: int
nSubTrees: int
objName: string option
}
member this.dumpEntry =
// NOTE: The encoding for the path is actually unknown :-/ Encoding.UTF8 uses replacement fallback.
AnsiConsole.MarkupLine( "- path = {0}", pathStr ( Encoding.UTF8.GetString this.path ) )
printfn " - entries: %d" this.nEntries
printfn " - subtrees: %d" this.nSubTrees
if this.objName.IsSome then
AnsiConsole.MarkupLine( " - name: {0}", objNameStr this.objName.Value )
type TreeExtension( extnData: byte[] ) =
inherit StagingIndexExtension ()
override this.extnSig = "TREE"
member val entries =
// parse the TREE extension data
use extnDataBuf = new MemoryStream( extnData )
Seq.initInfinite ( fun _ ->
if extnDataBuf.Position < extnDataBuf.Length then
let path = readUntil extnDataBuf 0uy
let nEntries = int( Encoding.ASCII.GetString( readUntil extnDataBuf 0x20uy ) )
let nSubTrees = int( Encoding.ASCII.GetString( readUntil extnDataBuf 0x0auy ) )
let objName = if nEntries <> -1 then Some( readObjName extnDataBuf ) else None
Some { path=path; nEntries=nEntries; nSubTrees=nSubTrees; objName=objName }
else
None
) |> Seq.takeWhile ( fun e -> e.IsSome ) |> Seq.map ( fun e -> e.Value ) |> Seq.toArray
override this.dumpExtn () =
// dump the TREE extension
for entry in this.entries do
entry.dumpEntry
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Used to hold a REUC extension.
type ReucExtensionEntry =
{
path: byte[] // nb: the encoding is unknown
stages: ( string * string option )[] // perms + object name
}
member this.dumpEntry =
// NOTE: The encoding for the path is actually unknown :-/ Encoding.UTF8 uses replacement fallback.
AnsiConsole.MarkupLine( "- path = {0}", pathStr ( Encoding.UTF8.GetString this.path ) )
for i = 0 to this.stages.Length-1 do
let perms, objName = this.stages.[i]
let objName2 = if objName.IsSome then sprintf " %s" objName.Value else ""
AnsiConsole.MarkupLine( " - stage {0}: {1}{2}", i+1, perms, objNameStr objName2 )
type ReucExtension( extnData: byte[] ) =
inherit StagingIndexExtension ()
override this.extnSig = "REUC"
member val entries =
// parse the REUC extension data
use extnDataBuf = new MemoryStream( extnData )
Seq.initInfinite ( fun _ ->
if extnDataBuf.Position < extnDataBuf.Length then
let path = readUntil extnDataBuf 0uy
let getMode _ =
readString extnDataBuf "ascii" // nb: these are ASCII octal numbers
let modes = Seq.initInfinite getMode |> Seq.take 3 |> Seq.toArray
let getObjName mode =
if mode = "0" then None else Some( readObjName extnDataBuf )
let objNames = modes |> Seq.map getObjName |> Seq.toArray
let stages = Array.zip modes objNames
Some { path=path; stages=stages }
else
None
) |> Seq.takeWhile ( fun e -> e.IsSome ) |> Seq.map ( fun e -> e.Value ) |> Seq.toArray
override this.dumpExtn () =
// dump the REUC extension
for entry in this.entries do
entry.dumpEntry
// --------------------------------------------------------------------
[<AutoOpen>]
module StagingIndex =
let private _readStagingIndexHeader (inp: Stream) =
// read the header
let buf = readBytes inp 4
if (Encoding.ASCII.GetString buf) <> "DIRC" then
failwithf "Incorrect magic number: %A" buf
let version = readNboInt4 inp
if version <> 2 && version <> 3 && version <> 4 then
failwithf "Unexpected version: %d" version
version
let private _readStagingIndexEntry (inp: Stream) version =
// NOTE: Entries usually represent a file, but can sometimes refer to a directory (if sparse checkout
// is enabled in cone mode, and the sparse index extension is enabled), in which case:
// - mode = 040000
// - has SKIP_WORKTREE in the extended flags
// - the path ends with a directory separator
// IMPORTANT! We assume we're not in split index mode (the entry format is completely different).
let readPath () =
if version = 4 then
// NOTE: The path format is completely different in v4, so for simplicity, we don't support it.
failwith "Version 4 is not supported."
readUntil inp 0uy
// read the next staging index entry
let fposStart = inp.Position
let ctime = ( readNboInt4 inp, readNboInt4 inp )
let mtime = ( readNboInt4 inp, readNboInt4 inp )
let dev = readNboInt4 inp
let ino = readNboInt4 inp
// NOTE: The doco says that the mode field is a 32-bit value, but only accounts for 16 of them :-/
let mode = readBytes inp 4
let objType = int( mode.[2] &&& 0xf0uy ) >>> 4
let perms = uint16( mode.[2] &&& 0x01uy ) <<< 8 ||| uint16(mode.[3])
let uid = readNboInt4 inp
let gid = readNboInt4 inp
let fileSize = readNboInt4 inp
let objName = readObjName inp
let flags = uint16( readNboInt2 inp )
let extendedFlags =
if version >= 3 && flags &&& 0x400us <> 0us then
Some( uint16( readNboInt2 inp ) )
else
None
let path = readPath ()
// skip over the pad bytes (used to 8-align entries)
if version <> 4 then
while (inp.Position - fposStart) % 8L <> 0L do
inp.ReadByte() |> ignore
// create the StagingIndexEntry record
let entry = {
ctime=ctime; mtime=mtime
dev=dev; ino=ino
objType=objType
perms=perms
uid=uid; gid=gid
fileSize=fileSize
objName=objName
flags=flags; extendedFlags=extendedFlags
path=path
}
( entry, fposStart )
let private _makeStagingIndexExtension extnSig extnData =
// create a StagingIndexExtension-derived object
match extnSig with
| [| 84uy; 82uy; 69uy; 69uy |] -> // "TREE"
(TreeExtension extnData) :> StagingIndexExtension
| [|82uy; 69uy; 85uy; 67uy|] -> // "REUC"
(ReucExtension extnData) :> StagingIndexExtension
| _ -> failwithf "Unknown extension sig: %A" extnSig
let private _readExtension inp =
// read the extension data
let extnSig = readBytes inp 4
let nBytes = readNboInt4 inp
let extnData = readBytes inp nBytes
let extn = _makeStagingIndexExtension extnSig extnData
( extn, extnData )
let dumpStagingIndex repoDir fullDump =
// initialize
let fname = Path.Join( repoDir, ".git/index" )
if not ( File.Exists( fname ) ) then
failwith "Can't find the staging index file."
use inp = new FileStream( fname, FileMode.Open, FileAccess.Read, FileShare.Read )
// dump the header
let version = _readStagingIndexHeader inp
let nEntries = readNboInt4 inp
AnsiConsole.MarkupLine( makeHeader "HEADER" "" )
printfn ""
printfn "version: %d" version
printfn "entries: %d" nEntries
// dump the entries
printfn ""
AnsiConsole.MarkupLine( makeHeader "ENTRIES" "" )
for entryNo = 0 to nEntries-1 do
printfn ""
let entry, fpos = _readStagingIndexEntry inp version
AnsiConsole.MarkupLine( sprintf "[cyan]Entry %d[/]: fpos=0x%x" entryNo fpos )
entry.dumpObj fullDump
// dump the extensions
let fposEnd = inp.Length - 20L // we ignore the checksum at the end of the file
Seq.initInfinite ( fun _ ->
if inp.Position < fposEnd then
let fposStart = inp.Position
let extn, extnData = _readExtension inp
Some ( extn, extnData, fposStart )
else
None
) |> Seq.takeWhile ( fun e -> e.IsSome ) |> Seq.map ( fun e -> e.Value ) |> Seq.iteri ( fun extnNo row ->
let extn, extnData, fpos = row
if extnNo = 0 then
printfn ""
AnsiConsole.MarkupLine( makeHeader "EXTENSIONS" "" )
printfn ""
AnsiConsole.MarkupLine( "[cyan]{0}[/]: fpos=0x{1:x}, #bytes={2}",
extn.extnSig, fpos, extnData.Length
)
extn.dumpExtn ()
)

@ -3,11 +3,36 @@ namespace git_guts
open System
open System.Text
open System.IO
open System.Diagnostics
open Spectre.Console
// --------------------------------------------------------------------
type CaptureStdout () =
// Temporarily capture output sent to stdout.
// set up a buffer to capture stdout
let _writer = new StringWriter()
let _prevOut = System.Console.Out
let _prevAnsiConsole = AnsiConsole.Console.Profile.Out
do
System.Console.SetOut( _writer )
AnsiConsole.Console.Profile.Out <- new AnsiConsoleOutput( _writer )
interface IDisposable with
member this.Dispose() =
// clean up
System.Console.SetOut( _prevOut )
AnsiConsole.Console.Profile.Out <- _prevAnsiConsole
_writer.Dispose()
member this.getOutput =
// return the captured output
_writer.ToString()
// --------------------------------------------------------------------
type StringBuilderBuf () =
// Allow messages to be built up in a StringBuilder.
@ -31,11 +56,37 @@ type StringBuilderBuf () =
[<AutoOpen>]
module Utils =
let disableSpectreCapabilities =
let runGit repoDir cmd args =
// run git and capture the output
let gitPath = "git" // nb: we assume this is on the PATH
let gitDir = Path.Combine( repoDir, ".git" )
let startInfo = ProcessStartInfo( FileName=gitPath, RedirectStandardOutput=true, UseShellExecute=false )
let addArg arg = startInfo.ArgumentList.Add( arg )
Seq.iter addArg [| "--git-dir"; gitDir; cmd |]
Seq.iter addArg args
let proc = Process.Start( startInfo )
let getBytes = Seq.initInfinite ( fun _ -> proc.StandardOutput.BaseStream.ReadByte() )
let output = getBytes |> Seq.takeWhile ( fun b -> b <> -1 ) |> Seq.map ( fun b -> byte(b) ) |> Seq.toArray
proc.WaitForExit()
if proc.ExitCode <> 0 then
failwithf "git failure: rc=%d" proc.ExitCode
output
let runGitText repoDir cmd args =
// run git and capture the output as text
Encoding.UTF8.GetString( runGit repoDir cmd args )
let runGitGc repoDir =
// run git garbage collection
runGit repoDir "gc" [] |> ignore
let disableSpectreCapabilities () =
// disable colors (and other capabilities) in Spectre.Console
AnsiConsole.Profile.Capabilities.ColorSystem <- ColorSystem.NoColors
AnsiConsole.Profile.Capabilities.Ansi <- false
AnsiConsole.Profile.Capabilities.Links <- false
// FUDGE! Spectre.Console wraps output?!?!
AnsiConsole.Profile.Width <- 99999
let safeSpectreString (str: string) =
// escape characters that have meaning for Spectre
@ -117,8 +168,62 @@ module Utils =
// return the object name display string
"[yellow]" + objName + "[/]"
let refStr ref =
// return the ref display string
"[green]" + ref + "[/]"
let pathStr path =
// return the path display string
match path with
| "" -> "(empty)"
| _ -> "[green]" + (if path.Substring(0,2) = "./" then path.Substring(2) else path) + "[/]"
let bitflagString (flags: uint) nBytes (flagNames: Map<uint,string>) =
// convert the bitflags to a formatted string
let fmt = sprintf "0x{0:x%d}" (2 * nBytes)
let flagsStr = String.Format( fmt, flags )
let checkBitflag (bflag, flagName) =
flags &&& bflag <> 0u
let fnames = Map.toSeq flagNames |> Seq.filter checkBitflag |> Seq.map snd |> Seq.toArray
if fnames.Length = 0 then
flagsStr
else
sprintf "%s (%s)" (String.Join( ", ", fnames )) flagsStr
let permsString (perms: uint16) =
// convert the file permission flags to a formatted string
let permNames = "rwxrwxrwx"
let permsStr = String.Join( "", seq {
for flagNo = 0 to permNames.Length-1 do
let bmask = uint16( 1 <<< (8 - flagNo) )
yield if perms &&& bmask <> 0us then permNames.[flagNo] else '-'
} )
sprintf "%s (0x%x)" permsStr perms
let readBytes (inp: Stream) nBytes =
// read the specified number of bytes from the stream
let buf = Array.zeroCreate nBytes
let nBytesRead = inp.Read( buf, 0, nBytes )
if nBytesRead <> nBytes then
failwithf "Unexpected number of bytes read: %d/%d" nBytesRead nBytes
buf
let parseTimestamp tstamp =
// parse a timestamp
let epoch = DateTime( 1970, 1, 1, 0, 0, 0, DateTimeKind.Utc )
epoch.AddSeconds( float( tstamp ) )
let plural n val1 val2 =
// return a pluralized string
sprintf "%d %s" n ( if n = 1 then val1 else val2 )
let friendlyByteCount nBytes =
// return a friendly byte-count string
if nBytes < 1024L then
plural (int nBytes) "byte" "bytes"
else if nBytes < 1024L * 1024L then
sprintf "%.1f KB" ( float( nBytes ) / 1024.0 )
else if nBytes < 1024L * 1024L * 1024L then
sprintf "%.1f MB" ( float( nBytes ) / 1024.0 / 1024.0 )
else
sprintf "%.1f GB" ( float( nBytes ) / 1024.0 / 1024.0 / 1024.0 )

@ -0,0 +1,183 @@
namespace git_guts
open System
open System.IO
open System.Text
open System.Text.RegularExpressions
// --------------------------------------------------------------------
[<AutoOpen>]
module VerifyObjects =
let private _adjustGitTreeOutput (objDump: string) =
// adjust git's output for TREE objects so that it matches what we output
let regex = Regex( @"^(\d+) [a-z]+ ([0-9a-f]{40})\s+(.+)$" )
let buf = new StringBuilder()
let reader = new StreamReader( new MemoryStream( Encoding.UTF8.GetBytes( objDump ) ) )
while not reader.EndOfStream do
let line = reader.ReadLine()
let matches = regex.Matches( line )
let groups = matches.[0].Groups
let perms, objName = groups.[1].Value, groups.[2].Value
let mutable path = groups.[3].Value
// FUDGE! git outputs UTF-8 bytes as octal-encoded strings, not the bytes themselves?!
if path.[0] = '"' && path.[path.Length-1] = '"' then
let rec updatePath (partialPath: string) = seq {
let pos = partialPath.IndexOf( '\\' )
if pos < 0 then
yield! Encoding.UTF8.GetBytes( partialPath )
else
yield! Encoding.UTF8.GetBytes( partialPath.Substring( 0, pos ) )
let n = Convert.ToByte( partialPath.Substring( pos+1, 3 ), 8 )
yield byte( n )
yield! updatePath ( partialPath.Substring( pos+4 ) )
}
path <- Encoding.UTF8.GetString(
updatePath ( path.Substring( 1, path.Length-2 ) ) |> Seq.toArray
)
buf.AppendFormat( "{0} {1} {2}\n", perms, objName, path ) |> ignore
buf.ToString().TrimEnd()
let verifyObjects repoDir progress =
// NOTE: This will iterate over every object in a repo, and compare what we retrieve with what
// "git cat-file" returns. In particular, this will include *every* revision of *every* file,
// so for large repo's, it will take some time...
// initialize
disableSpectreCapabilities ()
let mutable currPackFname = ""
let mutable packObjCounts = Map[ ( "", 0 ) ]
let onEndPackFile () =
let nObjs = packObjCounts.[ currPackFname ]
printfn "- Checked %s." ( plural nObjs "object" "objects" )
// check each object in the repo
printfn "Checking loose objects..." // nb: because getObjNames returns loose objects first
for objName, fname in getObjNames repoDir do
// check if we have a loose object or an object in a pack
let packFname =
if Path.GetExtension( fname ) = ".pack" then Path.GetFileName( fname ) else ""
// check if we've started a new pack
if packFname <> currPackFname then
// yup - log the end of the current one
onEndPackFile ()
// prepare to start processing the new pack file
currPackFname <- packFname
packObjCounts <- packObjCounts.Add ( currPackFname, 0 )
let fsize = FileInfo( fname ).Length
printfn ""
printfn "Checking pack file (%s): %s" (friendlyByteCount fsize) currPackFname
// find the next object
if progress then
eprintfn "- Checking object: %s" objName
let objRec = _findRepoObjRec repoDir objName
if objRec.IsNone then
failwithf "Can't find object: %s" objName
let mutable objData = objRec.Value.objData
let obj = makeGitObject objRec.Value
packObjCounts <- packObjCounts.Add ( currPackFname, packObjCounts.[currPackFname]+1 )
if progress then
eprintfn " - Got %s: #bytes=%d" obj.objType objData.Length
// check the object type
let expectedObjType = ( runGitText repoDir "cat-file" [ "-t"; objName ] ).TrimEnd()
if obj.objType <> expectedObjType then
failwithf "Object type mismatch for %s: got \"%s\", expected \"%s\"." objName obj.objType expectedObjType
// check the object data
let mutable expectedObjData = ( runGit repoDir "cat-file" [ "-p"; objName ] )
if obj.objType = "tree" then
objData <- Encoding.UTF8.GetBytes(
using ( new CaptureStdout() ) ( fun cap ->
obj.dumpObj()
cap.getOutput.TrimEnd()
)
)
expectedObjData <- Encoding.UTF8.GetBytes(
_adjustGitTreeOutput ( Encoding.UTF8.GetString expectedObjData )
)
if objData <> expectedObjData then
let dname = Path.GetTempPath()
File.WriteAllBytes( Path.Join( dname, "git-content.expected" ), expectedObjData )
File.WriteAllBytes( Path.Join( dname, "git-content.actual" ), objData )
failwithf "Object data mismatch for %s." objName
onEndPackFile ()
// NOTE: These functions generate object names that are invalid (they contain a non-hex character),
// but they will always compare greater/less than a valid name, based on the first byte, which will
// help test how we use the fanout table, and the binary search through the table of object names.
let makeObjName1 byte0 =
sprintf "%02x%s!" byte0 (String('0',37))
let makeObjName2 byte0 =
sprintf "%02x%sz" byte0 (String('f',37))
// NOTE: Also test with an object name that appears in the middle of the range, for a given first byte.
let makeObjName3 byte0 =
sprintf "%02x%s" byte0 "80808080808080808080808080808080808080"
// verify looking up unknown objects
printfn ""
printfn "Checking unknown objects..."
let mutable nObjs = 0
[| makeObjName1; makeObjName2; makeObjName3 |] |> Seq.iter ( fun makeObjName ->
for byte0 = 0 to 255 do
let objName = makeObjName byte0
let obj = findRepoObject repoDir objName
if obj.IsSome then
failwithf "Unexpectedly found object: %s" objName
nObjs <- nObjs + 1
)
printf "- Checked %s." ( plural nObjs "unknown object" "unknown objects." )
// --------------------------------------------------------------------
[<AutoOpen>]
module VerifyLogs =
let verifyLogs repoDir =
// verify reading each log file
for ref, fname in _findLogFiles repoDir do
printfn "Processing log file: %s" fname
// run git to get the log entries for the current ref
let ref2 =
let ref2 = ref.Replace( Path.DirectorySeparatorChar, '/' )
if ref2.Length >= 12 && ref2.Substring( 0, 11 ) = "refs/heads/" then
ref2.Substring( 11 )
else
ref2
let expected = ( runGitText repoDir "reflog" [| "show"; ref2 |] ).TrimEnd()
// NOTE: git shows just enough of the object names for them to be unique, so we need
// to figure out how much that is, so that we can generate the same output :-/
// We assume the first line is a log entry, that starts with an abbreviated object name.
let objNamePrefixLen = expected.IndexOf( ' ' )
// extract the log entries for the current ref
let buf = new StringBuilder()
let mutable nLogEntries = 0
_readLogFile fname |> Seq.rev |> Seq.iteri ( fun logEntryNo logEntry ->
if logEntry.nextRef.IsSome then
let objNamePrefix = logEntry.nextRef.Value.Substring( 0, objNamePrefixLen )
buf.AppendFormat( "{0} {1}@{{{2}}}: {3}", objNamePrefix, ref2, logEntryNo, logEntry.entryType.Value ) |> ignore
if logEntry.msg.IsSome then
buf.AppendFormat( ": {0}", logEntry.msg.Value ) |> ignore
buf.AppendLine( "" ) |> ignore
nLogEntries <- nLogEntries + 1
)
let output = buf.ToString().TrimEnd()
// compare what we extracted with the git output
if output <> expected then
let dname = Path.GetTempPath()
File.WriteAllText( Path.Join( dname, "git-log.expected" ), expected )
File.WriteAllText( Path.Join( dname, "git-log.actual" ), output )
failwithf "Mismatched output for ref: %s" ref2
printfn "- Checked %s." ( plural nLogEntries "log entry" "log entries" )

@ -13,7 +13,11 @@
<Compile Include="PackData.fs" />
<Compile Include="PackIndex.fs" />
<Compile Include="Pack.fs" />
<Compile Include="StagingIndex.fs" />
<Compile Include="Refs.fs" />
<Compile Include="Logs.fs" />
<Compile Include="GitRepo.fs" />
<Compile Include="Verify.fs" />
</ItemGroup>
<ItemGroup>

@ -0,0 +1,41 @@
namespace tests
open System
open System.IO
open Microsoft.VisualStudio.TestTools.UnitTesting
open git_guts
// --------------------------------------------------------------------
[<TestClass>]
type TestStagingIndex () =
[<TestInitialize>]
member this.init () =
// prepare to run a test
disableSpectreCapabilities ()
[<TestMethod>]
member this.TestDumpStagingIndex () =
let doTest zipFname =
// set up the test repo
use gitTestRepo = new GitTestRepo( zipFname )
runGitGc gitTestRepo.repoDir
// dump the staging index
using ( new CaptureStdout() ) ( fun cap ->
dumpStagingIndex gitTestRepo.repoDir false
let expectedFname =
let fname = Path.GetFileNameWithoutExtension( zipFname ) + ".staging-index.txt"
Path.Combine( __SOURCE_DIRECTORY__, "fixtures", fname )
cap.checkOutput expectedFname
)
// run the tests
Assert.ThrowsException<Exception>( fun () ->
doTest "empty.zip"
) |> ignore
doTest "simple.zip"

@ -0,0 +1,39 @@
namespace tests
open System
open System.IO
open Microsoft.VisualStudio.TestTools.UnitTesting
open git_guts
// --------------------------------------------------------------------
[<TestClass>]
type TestLogs () =
[<TestInitialize>]
member this.init () =
// prepare to run a test
disableSpectreCapabilities ()
[<TestMethod>]
member this.TestDumpLogs () =
let doTest zipFname =
// set up the test repo
use gitTestRepo = new GitTestRepo( zipFname )
// dump the logs
using ( new CaptureStdout() ) ( fun cap ->
dumpLogs gitTestRepo.repoDir
let expectedFname =
let fname = Path.GetFileNameWithoutExtension( zipFname ) + ".logs.txt"
Path.Combine( __SOURCE_DIRECTORY__, "fixtures", fname )
checkCapturedOutput cap expectedFname
)
// run the tests
doTest "empty.zip"
doTest "simple.zip"
doTest "full2.zip"

@ -13,7 +13,7 @@ type TestPacks () =
[<TestInitialize>]
member this.init () =
// prepare to run a test
disableSpectreCapabilities
disableSpectreCapabilities ()
[<TestMethod>]
member this.TestDumpPack () =
@ -35,7 +35,7 @@ type TestPacks () =
let expectedFname =
let fname = Path.GetFileNameWithoutExtension( zipFname ) + ".pack-data.txt"
Path.Combine( __SOURCE_DIRECTORY__, "fixtures", fname )
cap.checkOutput expectedFname
checkCapturedOutput cap expectedFname
)
// dump the pack index file
@ -45,13 +45,13 @@ type TestPacks () =
let expectedFname =
let fname = Path.GetFileNameWithoutExtension( zipFname ) + ".pack-index.txt"
Path.Combine( __SOURCE_DIRECTORY__, "fixtures", fname )
cap.checkOutput expectedFname
checkCapturedOutput cap expectedFname
)
// check that we can find each object name correctly
using ( new FileStream( packIndexFname, FileMode.Open, FileAccess.Read, FileShare.Read ) ) ( fun inp ->
inp.Seek( int64( 4 + 4 + 4*256 - 4 ), SeekOrigin.Begin ) |> ignore
let nObjs = readNboInt inp
let nObjs = readNboInt4 inp
for objNo = 0 to nObjs-1 do
let objName, _, _ = readPackIndexObject inp objNo nObjs
let obj = findRepoObject gitTestRepo.repoDir objName
@ -62,3 +62,4 @@ type TestPacks () =
doTest "simple.zip"
doTest "license.zip"
doTest "full.zip"
doTest "full2.zip"

@ -0,0 +1,48 @@
namespace tests
open System
open System.IO
open Microsoft.VisualStudio.TestTools.UnitTesting
open git_guts
// --------------------------------------------------------------------
[<TestClass>]
type TestRefs () =
[<TestInitialize>]
member this.init () =
// prepare to run a test
disableSpectreCapabilities ()
[<TestMethod>]
member this.TestDumpRefs () =
let doTest zipFname =
// set up the test repo
use gitTestRepo = new GitTestRepo( zipFname )
// dump the refs
using ( new CaptureStdout() ) ( fun cap ->
dumpRefs gitTestRepo.repoDir
let expectedFname =
let fname = Path.GetFileNameWithoutExtension( zipFname ) + ".refs.txt"
Path.Combine( __SOURCE_DIRECTORY__, "fixtures", fname )
checkCapturedOutput cap expectedFname
)
// move the loose objects to a pack, and check again
runGitGc gitTestRepo.repoDir
using ( new CaptureStdout() ) ( fun cap ->
dumpRefs gitTestRepo.repoDir
let expectedFname =
let fname = Path.GetFileNameWithoutExtension( zipFname ) + ".refs-packed.txt"
Path.Combine( __SOURCE_DIRECTORY__, "fixtures", fname )
checkCapturedOutput cap expectedFname
)
// run the tests
doTest "empty.zip"
doTest "full2.zip"

@ -0,0 +1,49 @@
namespace tests
open System
open System.IO
open Microsoft.VisualStudio.TestTools.UnitTesting
open git_guts
// --------------------------------------------------------------------
[<TestClass>]
type TestVerify () =
[<TestInitialize>]
member this.init () =
// prepare to run a test
disableSpectreCapabilities ()
[<TestMethod>]
member this.TestVerify () =
let doChecks repoDir =
// verify retrieving objects from the repo
using ( new CaptureStdout() ) ( fun cap ->
verifyObjects repoDir false
)
// verify retrieving logs from the repo
using ( new CaptureStdout() ) ( fun cap ->
verifyLogs repoDir
)
let doTest zipFname =
// set up the test repo
use gitTestRepo = new GitTestRepo( zipFname )
// do the checks
doChecks gitTestRepo.repoDir
// run garbage collection, and verify again
runGitGc gitTestRepo.repoDir
doChecks gitTestRepo.repoDir
// run the tests
let dname = Path.Combine( __SOURCE_DIRECTORY__, "fixtures" )
for fname in Directory.GetFiles( dname, "*.zip", SearchOption.AllDirectories ) do
doTest ( Path.GetFileName fname )

@ -4,11 +4,12 @@ open System
open System.Text
open System.IO
open System.IO.Compression
open System.Diagnostics
open Microsoft.VisualStudio.TestTools.UnitTesting
open Spectre.Console
open git_guts
// --------------------------------------------------------------------
type TempDir () =
@ -49,58 +50,16 @@ type GitTestRepo ( zipFname ) =
// --------------------------------------------------------------------
type CaptureStdout () =
// Temporarily capture output sent to stdout.
// set up a buffer to capture stdout
let _writer = new StringWriter()
let _prevOut = System.Console.Out
let _prevAnsiConsole = AnsiConsole.Console.Profile.Out
do
System.Console.SetOut( _writer )
AnsiConsole.Console.Profile.Out <- new AnsiConsoleOutput( _writer )
interface IDisposable with
member this.Dispose() =
// clean up
System.Console.SetOut( _prevOut )
AnsiConsole.Console.Profile.Out <- _prevAnsiConsole
_writer.Dispose()
[<AutoOpen>]
module Utils =
member this.checkOutput fname =
let checkCapturedOutput (cap: CaptureStdout) fname =
// compare the captured output with what's expected
let expected = File.ReadAllText( fname, Encoding.UTF8 )
let output = this.getOutput
let output = cap.getOutput
if output <> expected then
let fname2 = Path.Combine( Path.GetTempPath(), "captured-output.txt" )
File.WriteAllText( fname2, output, Encoding.UTF8 )
Assert.Fail(
sprintf "Captured output`mismatch: %s" ( Path.GetFileName( fname ) )
)
member this.getOutput =
// return the captured output
_writer.ToString()
// --------------------------------------------------------------------
[<AutoOpen>]
module Utils =
let runGit repoDir cmd args =
// run git and capture the output
let gitPath = "git" // nb: we assume this is on the PATH
let gitDir = Path.Combine( repoDir, ".git" )
let startInfo = ProcessStartInfo( FileName=gitPath, RedirectStandardOutput=true, UseShellExecute=false )
let addArg arg = startInfo.ArgumentList.Add( arg )
Seq.iter addArg [| "--git-dir"; gitDir; cmd |]
Seq.iter addArg args
let proc = Process.Start( startInfo )
let output = proc.StandardOutput.ReadToEnd()
proc.WaitForExit()
Assert.AreEqual( 0, proc.ExitCode )
output
let runGitGc repoDir =
// run git garbage collection
runGit repoDir "gc" [] |> ignore

@ -0,0 +1,6 @@
--- heads ----------------------------------------------------------------------
--- tags -----------------------------------------------------------------------
--- packed refs ----------------------------------------------------------------

@ -0,0 +1,6 @@
--- heads ----------------------------------------------------------------------
--- tags -----------------------------------------------------------------------
--- packed refs ----------------------------------------------------------------

Binary file not shown.

@ -0,0 +1,57 @@
--- HEAD -----------------------------------------------------------------------
commit (initial): Added a file with spaces in its path.
Taka (tutorial@git-guts) 2021-12-29 03:36:07 +0000
-> de483a0a2160dc698967033f2c351208e6da9066
commit: Added a binary file.
Taka (tutorial@git-guts) 2021-12-29 03:36:31 +0000
de483a0a2160dc698967033f2c351208e6da9066 -> 89bac892dbe3bc028ec2c361d836791f446ffc11
commit: Added an empty file.
Taka (tutorial@git-guts) 2021-12-29 03:36:46 +0000
89bac892dbe3bc028ec2c361d836791f446ffc11 -> fb407a730e3730eb07698f7ced84b596fe7fb7bd
commit: Added a file with Unicode (日本) in its name and content.
Taka (tutorial@git-guts) 2021-12-29 03:37:10 +0000
fb407a730e3730eb07698f7ced84b596fe7fb7bd -> 30c84cc603759f82470a119c5b5348001ab13500
checkout: moving from master to a-branch
Taka (tutorial@git-guts) 2021-12-29 03:52:26 +0000
30c84cc603759f82470a119c5b5348001ab13500 -> de483a0a2160dc698967033f2c351208e6da9066
commit: Changed the greeting.
Taka (tutorial@git-guts) 2021-12-29 03:53:14 +0000
de483a0a2160dc698967033f2c351208e6da9066 -> 06d4e9c2beb2c23a955bdb213d02ac63e15e3318
checkout: moving from a-branch to master
Taka (tutorial@git-guts) 2021-12-29 03:54:11 +0000
06d4e9c2beb2c23a955bdb213d02ac63e15e3318 -> 30c84cc603759f82470a119c5b5348001ab13500
--- refs/heads/a-branch --------------------------------------------------------
branch: Created from de483a0a2160dc698967033f2c351208e6da9066
Taka (tutorial@git-guts) 2021-12-29 03:52:26 +0000
-> de483a0a2160dc698967033f2c351208e6da9066
commit: Changed the greeting.
Taka (tutorial@git-guts) 2021-12-29 03:53:14 +0000
de483a0a2160dc698967033f2c351208e6da9066 -> 06d4e9c2beb2c23a955bdb213d02ac63e15e3318
--- refs/heads/master ----------------------------------------------------------
commit (initial): Added a file with spaces in its path.
Taka (tutorial@git-guts) 2021-12-29 03:36:07 +0000
-> de483a0a2160dc698967033f2c351208e6da9066
commit: Added a binary file.
Taka (tutorial@git-guts) 2021-12-29 03:36:31 +0000
de483a0a2160dc698967033f2c351208e6da9066 -> 89bac892dbe3bc028ec2c361d836791f446ffc11
commit: Added an empty file.
Taka (tutorial@git-guts) 2021-12-29 03:36:46 +0000
89bac892dbe3bc028ec2c361d836791f446ffc11 -> fb407a730e3730eb07698f7ced84b596fe7fb7bd
commit: Added a file with Unicode (日本) in its name and content.
Taka (tutorial@git-guts) 2021-12-29 03:37:10 +0000
fb407a730e3730eb07698f7ced84b596fe7fb7bd -> 30c84cc603759f82470a119c5b5348001ab13500

@ -0,0 +1,113 @@
--- OBJECT 0: commit (fpos=0xc, size=218) --------------------------------------
tree 36c4c87ea568a6728a769586ecaac54a0cbee508
parent de483a0a2160dc698967033f2c351208e6da9066
author Taka <tutorial@git-guts> 1640749994 +0000
committer Taka <tutorial@git-guts> 1640749994 +0000
Changed the greeting.
--- OBJECT 1: commit (fpos=0xa7, size=256) -------------------------------------
tree b2029d7668b35e4bd3d9f661b6b89028943124f3
parent fb407a730e3730eb07698f7ced84b596fe7fb7bd
author Taka <tutorial@git-guts> 1640749030 +0000
committer Taka <tutorial@git-guts> 1640749030 +0000
Added a file with Unicode (日本) in its name and content.
--- OBJECT 2: tag (fpos=0x16a, size=154) ---------------------------------------
object 06d4e9c2beb2c23a955bdb213d02ac63e15e3318
type commit
tag annotated-tag
tagger Taka <tutorial@git-guts> 1640750013 +0000
This is an annotated tag.
--- OBJECT 3: commit (fpos=0x1f2, size=217) ------------------------------------
tree dda83875e2c2e9d5f31615a53280bb7ecca3c3f9
parent 89bac892dbe3bc028ec2c361d836791f446ffc11
author Taka <tutorial@git-guts> 1640749006 +0000
committer Taka <tutorial@git-guts> 1640749006 +0000
Added an empty file.
--- OBJECT 4: commit (fpos=0x28c, size=217) ------------------------------------
tree 4794689f37a386016b4c5bd6e6ba757410c31e2f
parent de483a0a2160dc698967033f2c351208e6da9066
author Taka <tutorial@git-guts> 1640748991 +0000
committer Taka <tutorial@git-guts> 1640748991 +0000
Added a binary file.
--- OBJECT 5: commit (fpos=0x326, size=186) ------------------------------------
tree 92dfd7467382cbb75e4e808fe62e7e2bca44f9f2
author Taka <tutorial@git-guts> 1640748967 +0000
committer Taka <tutorial@git-guts> 1640748967 +0000
Added a file with spaces in its path.
--- OBJECT 6: tree (fpos=0x3ac, size=141) --------------------------------------
100644 818c71d03f435db011069584cda25c1f66af1a85 1x1.png
040000 7d9872b3f8a2d8f1eca570e34203e1a530242e06 a subdir
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 empty
100644 1072e024f685d47432c4d30abdbf4564eb42ba36 日本.txt
--- OBJECT 7: tree (fpos=0x43e, size=43) ---------------------------------------
100644 f75ba05f340c51065cbea2e1fdbfe5fe13144c97 hello world.txt
--- OBJECT 8: tree (fpos=0x474, size=35) ---------------------------------------
040000 f6fa0c3be2a987cc010b467aced34fdb7841e729 a subdir
--- OBJECT 9: tree (fpos=0x4a0, size=43) ---------------------------------------
100644 f967a9db1b6145568be7176f411f02dd174cc56c hello world.txt
--- OBJECT 10: tree (fpos=0x4d5, size=103) -------------------------------------
100644 818c71d03f435db011069584cda25c1f66af1a85 1x1.png
040000 7d9872b3f8a2d8f1eca570e34203e1a530242e06 a subdir
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 empty
--- OBJECT 11: tree (fpos=0x4e5, size=70) --------------------------------------
100644 818c71d03f435db011069584cda25c1f66af1a85 1x1.png
040000 7d9872b3f8a2d8f1eca570e34203e1a530242e06 a subdir
--- OBJECT 12: tree (fpos=0x4f3, size=35) --------------------------------------
040000 7d9872b3f8a2d8f1eca570e34203e1a530242e06 a subdir
--- OBJECT 13: blob (fpos=0x520, size=119) -------------------------------------
00000 | 89 50 4e 47 0d 0a 1a 0a 00 00 00 0d 49 48 44 52 | .PNG........IHDR
00010 | 00 00 00 01 00 00 00 01 08 02 00 00 00 90 77 53 | ..............wS
00020 | de 00 00 00 01 73 52 47 42 00 ae ce 1c e9 00 00 | .....sRGB.......
00030 | 00 04 67 41 4d 41 00 00 b1 8f 0b fc 61 05 00 00 | ..gAMA......a...
00040 | 00 09 70 48 59 73 00 00 0e c3 00 00 0e c3 01 c7 | ..pHYs..........
00050 | 6f a8 64 00 00 00 0c 49 44 41 54 18 57 63 f8 ff | o.d....IDAT.Wc..
00060 | ff 3f 00 05 fe 02 fe a7 35 81 84 00 00 00 00 49 | .?......5......I
00070 | 45 4e 44 ae 42 60 82 .. .. .. .. .. .. .. .. .. | END.B`.
--- OBJECT 14: blob (fpos=0x594, size=14) --------------------------------------
Hello, world.
--- OBJECT 15: blob (fpos=0x5ab, size=0) ---------------------------------------
--- OBJECT 16: blob (fpos=0x5b4, size=15) --------------------------------------
japan = 日本
--- OBJECT 17: blob (fpos=0x5cc, size=4) ---------------------------------------
Yo!

@ -0,0 +1,43 @@
--- FANOUT ---------------------------------------------------------------------
00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f
-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
00: 0 0 1 1 1 1 2 2 2 2 2 2 2 2 2 2
10: 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
20: 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
30: 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5
40: 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6
50: 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
60: 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
70: 6 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7
80: 7 8 8 8 8 8 8 8 8 9 9 9 9 9 9 9
90: 9 9 10 10 10 10 10 10 10 10 10 10 10 10 10 10
a0: 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
b0: 10 10 11 11 11 11 11 11 11 11 11 11 11 11 11 11
c0: 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11
d0: 11 11 11 11 11 11 11 11 11 11 11 11 11 12 13 13
e0: 13 13 13 13 13 13 14 14 14 14 14 14 14 14 14 14
f0: 14 14 14 14 14 14 15 16 16 17 17 18 18 18 18 18
--- OBJECTS (18) ---------------------------------------------------------------
name crc offset
---------------------------------------- -------- --------
0: 0214f2698f786600f169378e3001348b345fa3d8 084da0ff 0x16a
1: 06d4e9c2beb2c23a955bdb213d02ac63e15e3318 d68e100e 0xc
2: 1072e024f685d47432c4d30abdbf4564eb42ba36 4a9f022f 0x5b4
3: 30c84cc603759f82470a119c5b5348001ab13500 2e1dc193 0xa7
4: 36c4c87ea568a6728a769586ecaac54a0cbee508 c35e0867 0x474
5: 4794689f37a386016b4c5bd6e6ba757410c31e2f 36ec494e 0x4e5
6: 7d9872b3f8a2d8f1eca570e34203e1a530242e06 c78df9a1 0x43e
7: 818c71d03f435db011069584cda25c1f66af1a85 cddd5436 0x520
8: 89bac892dbe3bc028ec2c361d836791f446ffc11 e28588c0 0x28c
9: 92dfd7467382cbb75e4e808fe62e7e2bca44f9f2 e42ccf40 0x4f3
10: b2029d7668b35e4bd3d9f661b6b89028943124f3 7d1e7053 0x3ac
11: dda83875e2c2e9d5f31615a53280bb7ecca3c3f9 8f17452a 0x4d5
12: de483a0a2160dc698967033f2c351208e6da9066 62e6c92f 0x326
13: e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 6e760029 0x5ab
14: f6fa0c3be2a987cc010b467aced34fdb7841e729 0f842725 0x4a0
15: f75ba05f340c51065cbea2e1fdbfe5fe13144c97 cd6524d6 0x594
16: f967a9db1b6145568be7176f411f02dd174cc56c 358a7d04 0x5cc
17: fb407a730e3730eb07698f7ced84b596fe7fb7bd 3785ebe1 0x1f2

@ -0,0 +1,10 @@
--- heads ----------------------------------------------------------------------
--- tags -----------------------------------------------------------------------
--- packed refs ----------------------------------------------------------------
refs/heads/a-branch -> 06d4e9c2beb2c23a955bdb213d02ac63e15e3318
refs/heads/master -> 30c84cc603759f82470a119c5b5348001ab13500
refs/tags/annotated-tag -> 0214f2698f786600f169378e3001348b345fa3d8 -> 06d4e9c2beb2c23a955bdb213d02ac63e15e3318
refs/tags/lightweight-tag -> 30c84cc603759f82470a119c5b5348001ab13500

@ -0,0 +1,12 @@
--- heads ----------------------------------------------------------------------
a-branch -> 06d4e9c2beb2c23a955bdb213d02ac63e15e3318
master -> 30c84cc603759f82470a119c5b5348001ab13500
--- tags -----------------------------------------------------------------------
annotated-tag -> 0214f2698f786600f169378e3001348b345fa3d8
lightweight-tag -> 30c84cc603759f82470a119c5b5348001ab13500
--- packed refs ----------------------------------------------------------------

Binary file not shown.

@ -0,0 +1,19 @@
--- HEAD -----------------------------------------------------------------------
commit (initial): Added a greeting.
Taka (tutorial@git-guts) 2021-12-29 03:19:03 +0000
-> 2cccb4accf4fffcf9f6487a32b16750dfe298cc7
commit: Added 2 files in a sub-directory.
Taka (tutorial@git-guts) 2021-12-29 03:19:33 +0000
2cccb4accf4fffcf9f6487a32b16750dfe298cc7 -> 828139d586d32cbe03dc4b32cfefe42e678a3f2e
--- refs/heads/master ----------------------------------------------------------
commit (initial): Added a greeting.
Taka (tutorial@git-guts) 2021-12-29 03:19:03 +0000
-> 2cccb4accf4fffcf9f6487a32b16750dfe298cc7
commit: Added 2 files in a sub-directory.
Taka (tutorial@git-guts) 2021-12-29 03:19:33 +0000
2cccb4accf4fffcf9f6487a32b16750dfe298cc7 -> 828139d586d32cbe03dc4b32cfefe42e678a3f2e

@ -0,0 +1,42 @@
--- HEADER ---------------------------------------------------------------------
version: 2
entries: 3
--- ENTRIES --------------------------------------------------------------------
Entry 0: fpos=0xc
- path: hello.txt
- name: f75ba05f340c51065cbea2e1fdbfe5fe13144c97
- flags: stage=0, namelen=9 (0x0009)
- type: regular file (8)
- size: 14
- perms: rw-r--r-- (0x1a4)
Entry 1: fpos=0x54
- path: subdir/file1.txt
- name: 5c1170f2eaac6f78662a8cf899326a4b95c80dd2
- flags: stage=0, namelen=16 (0x0010)
- type: regular file (8)
- size: 16
- perms: rw-r--r-- (0x1a4)
Entry 2: fpos=0xa4
- path: subdir/file2.txt
- name: 3eac351c95c4facb0e99d156f14e3527e0f1c3e0
- flags: stage=0, namelen=16 (0x0010)
- type: regular file (8)
- size: 16
- perms: rw-r--r-- (0x1a4)
--- EXTENSIONS -----------------------------------------------------------------
TREE: fpos=0xf4, #bytes=56
- path = (empty)
- entries: 3
- subtrees: 1
- name: 533b8093315fa1071d9cfb2e1543677bea011f6e
- path = subdir
- entries: 2
- subtrees: 0
- name: f2a25c9255b37fb1e9491349524b532a86701bcc

@ -10,6 +10,9 @@
<ItemGroup>
<Compile Include="Utils.fs" />
<Compile Include="TestPacks.fs" />
<Compile Include="TestRefs.fs" />
<Compile Include="TestLogs.fs" />
<Compile Include="TestVerify.fs" />
<Compile Include="Program.fs" />
</ItemGroup>

Loading…
Cancel
Save