Added a script to dump the staging index.

master
Pacman Ghost 2 years ago
parent 0aab5db4a3
commit f83fc6aa5f
  1. 16
      cli/Program.fs
  2. 17
      git-guts/GitGuts.fs
  3. 10
      git-guts/PackData.fs
  4. 19
      git-guts/PackIndex.fs
  5. 309
      git-guts/StagingIndex.fs
  6. 30
      git-guts/Utils.fs
  7. 1
      git-guts/git-guts.fsproj
  8. 41
      git-guts/tests/TestIndex.fs
  9. 2
      git-guts/tests/TestPacks.fs
  10. 0
      git-guts/tests/fixtures/empty.staging-index.txt
  11. BIN
      git-guts/tests/fixtures/empty.zip
  12. 42
      git-guts/tests/fixtures/simple.staging-index.txt
  13. 1
      git-guts/tests/tests.fsproj

@ -85,6 +85,19 @@ type DumpPackObjectCommand() =
dumpPackObject fname settings.ObjName
0
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
type DumpStagingIndexSettings() =
inherit AppSettings()
[<CommandOption( "-f|--full" )>]
member val FullDump = false with get, set
type DumpStagingIndexCommand() =
inherit Command<DumpStagingIndexSettings>()
override this.Execute( ctx, settings ) =
dumpStagingIndex settings.RepoDir settings.FullDump
0
// --------------------------------------------------------------------
[<EntryPoint>]
@ -109,5 +122,8 @@ let main argv =
cfg.AddCommand<DumpPackObjectCommand>( "dump-packobject" ).WithDescription(
"Dump a pack object."
) |> ignore
cfg.AddCommand<DumpStagingIndexCommand>( "dump-stagingindex" ).WithDescription(
"Dump the staging index."
) |> ignore
)
app.Run( argv )

@ -26,8 +26,7 @@ module GitGuts =
// FUDGE! In Python, we ask zlib to decompress bytes from the stream until it's done, but in .NET,
// we have to tell it how many bytes of uncompressed data we want, which doesn't seem to work when
// the byte count is 0 :-/ I'm not sure if skipping bytes like this is right, but it'll do for now...
for i = 0 to 5 do
inp.ReadByte() |> ignore
readBytes inp 6 |> ignore
[||]
else
use zstream = new DeflateStream( inp, CompressionMode.Decompress, true )
@ -82,15 +81,17 @@ module GitGuts =
let readObjName (inp: Stream) =
// read an object name (20 raw bytes) and return it as a hex-string
let buf = Array.zeroCreate 20
inp.Read( buf, 0, 20 ) |> ignore
let buf = readBytes inp 20
Convert.ToHexString( buf ).ToLower()
let readNboInt (inp: Stream) =
let _readNboInt (inp: Stream) nBytes =
// read a network-byte-order int
let buf = Array.zeroCreate 4
inp.Read( buf, 0, 4 ) |> ignore
( int(buf.[0]) <<< 24 ) ||| ( int(buf.[1]) <<< 16 ) ||| ( int(buf.[2]) <<< 8 ) ||| int(buf.[3])
let getBytes = Seq.initInfinite ( fun n -> inp.ReadByte() )
let foldByte acc byt =
( acc <<< 8 ) ||| int( byt )
Seq.take nBytes getBytes |> Seq.fold foldByte 0
let readNboInt4 (inp: Stream) = _readNboInt inp 4
let readNboInt2 (inp: Stream) = _readNboInt inp 2
let readVliBe (inp: Stream) isOffsetEncoding =
// read a variable-length integer (big-endian)

@ -14,14 +14,13 @@ module PackData =
let private _readPackDataHeader (inp: Stream) =
// read the header
let buf = Array.zeroCreate 4
inp.Read( buf, 0, 4 ) |> ignore
let buf = readBytes inp 4
if (Encoding.ASCII.GetString buf) <> "PACK" then
failwithf "Incorrect magic number: %A" buf
let version = readNboInt inp
let version = readNboInt4 inp
if version <> 2 then
failwithf "Unsupported pack file version: %d" version
let nObjs = readNboInt inp
let nObjs = readNboInt4 inp
( version, nObjs )
@ -58,8 +57,7 @@ module PackData =
else
// add new data
let nBytes = byt &&& 0x7f
let buf = Array.zeroCreate nBytes
tdataStream.Read( buf, 0, nBytes ) |> ignore
let buf = readBytes tdataStream nBytes
yield! buf
yield! getBytes()
}

@ -16,11 +16,10 @@ module PackIndex =
// NOTE: v1 index files don't have a version number, and just start with the fanout table, but
// the magic number we check for here is an unreasonable value for fanout[0], so this is how
// we detect the different versions.
let buf = Array.zeroCreate 4
inp.Read( buf, 0, 4 ) |> ignore
let buf = readBytes inp 4
if buf <> [|0xffuy; 116uy; 79uy; 99uy|] then // nb: 0xff, then "tOc"
failwithf "Incorrect magic number: %A" buf
let version = readNboInt inp
let version = readNboInt4 inp
if version <> 2 then
// NOTE: Version 1 is also valid, but quite old, and we don't support it.
failwithf "Unsupported pack index version: %d" version
@ -39,13 +38,13 @@ module PackIndex =
// read the fanout table
let byte0 = Int32.Parse( objName.[0..1], Globalization.NumberStyles.AllowHexSpecifier )
inp.Seek( int64( 4*byte0 ), SeekOrigin.Current ) |> ignore
let endIndex = readNboInt inp
let endIndex = readNboInt4 inp
let startIndex =
if byte0 = 0 then
0
else
inp.Seek( -8L, SeekOrigin.Current ) |> ignore
readNboInt inp
readNboInt4 inp
// NOTE: We now have two indexes into the table of object names:
// - startIndex = index of an object name <= the name we're looking for
// - endIndex = index of an object name > the name we're looking for
@ -67,9 +66,9 @@ module PackIndex =
if midObjName = objName then
// yup - get the object's offset in the pack data
seekTo ( fposNames - 4 )
let nObjs = readNboInt inp // nb: this is the last value in the fanout table
let nObjs = readNboInt4 inp // nb: this is the last value in the fanout table
seekTo ( fposNames + 20*nObjs + 4*nObjs + 4*midIndex )
Some ( readNboInt inp )
Some ( readNboInt4 inp )
else
// nope - continue the binary search
if midObjName < objName then
@ -92,9 +91,9 @@ module PackIndex =
seekTo fposNames 20 objNo
let objName = readObjName inp
seekTo fposCrcs 4 objNo
let crc = readNboInt inp
let crc = readNboInt4 inp
seekTo fposOffsets 4 objNo
let offset = readNboInt inp
let offset = readNboInt4 inp
if offset &&& 0x80000000 <> 0 then
failwithf "Large offsets are not supported."
@ -109,7 +108,7 @@ module PackIndex =
let version = _readPackIndexHeader inp
// read the fanout table
let getFanoutVals = Seq.initInfinite (fun n -> readNboInt inp)
let getFanoutVals = Seq.initInfinite (fun n -> readNboInt4 inp)
let fanout = Seq.take 256 getFanoutVals |> Seq.toArray
let nObjs = fanout.[255]

@ -0,0 +1,309 @@
namespace git_guts
open System
open System.IO
open System.Text
open Spectre.Console
// --------------------------------------------------------------------
// This records a single entry in a repo's staging index.
type StagingIndexEntry =
{
ctime: int * int // nb: seconds + nanoseconds
mtime: int * int // nb: seconds + nanoseconds
dev: int
ino: int
objType: int
perms: uint16
uid: int
gid: int
fileSize: int
objName: string
flags: uint16
extendedFlags: uint16 option
path: byte[] // nb: the encoding is unknown
}
static member private _OBJECT_TYPES = Map[ ( 0x08, "regular file" ); ( 0x0a, "symlink" ); ( 0x0e, "gitlink" ) ]
static member private _FLAG_NAMES = Map[ (0x8000u, "assume-valid"); (0x4000u, "extended") ]
static member private _EXTENDED_FLAG_NAMES = Map[ (0x4000u, "skip-worktree"); (0x2000u, "intent-to-add") ]
member private this._flagsStr =
// return the StagingIndexEntry's flags as a string
let mutable vals = []
let flags = uint( this.flags )
let bflags = bitflagString flags 2 StagingIndexEntry._FLAG_NAMES
if bflags.Length >= 10 then
vals <- vals @ [ bflags.[ 0 .. bflags.Length-10 ] ]
vals <- vals @ [ sprintf "stage=%d" ((flags &&& 0x3000u) >>> 12) ]
// NOTE: The "name length" field is for the entry path name, not the object name.
let namelen = flags &&& 0x0fffu
vals <- vals @ [
if namelen < 0xfffu then sprintf "namelen=%d" namelen else "namelen=0xFFF"
]
let valsStr = String.Join( ", ", vals )
let bflags2 = if bflags.Length >= 8 then bflags.Substring( bflags.Length-7, 6 ) else bflags
sprintf "%s (%s)" valsStr bflags2
member private this._xflagsStr =
// return the StagingIndexEntry's extended flags as a string
let xflags = uint( this.extendedFlags.Value )
bitflagString xflags 2 StagingIndexEntry._EXTENDED_FLAG_NAMES
member this.dumpObj fullDump =
// NOTE: The encoding for the path is actually unknown :-/ Encoding.UTF8 uses replacement fallback.
AnsiConsole.MarkupLine( "- path: {0}", pathStr (Encoding.UTF8.GetString( this.path )) )
AnsiConsole.MarkupLine( "- name: {0}", objNameStr this.objName )
printfn "- flags: %s" this._flagsStr
if this.extendedFlags.IsSome then
printfn " %s" this._xflagsStr
printfn "- type: %s (%d)" StagingIndexEntry._OBJECT_TYPES.[this.objType] this.objType
printfn "- size: %d" this.fileSize
printfn "- perms: %s" (permsString this.perms)
if fullDump then
printfn "- uid: %d" this.uid
printfn "- gid: %d" this.gid
let makeTimeStr timeVal =
let epoch = DateTime( 1970, 1, 1, 0, 0, 0, DateTimeKind.Utc )
let dt = epoch.AddSeconds( float( fst timeVal ) )
sprintf "%s (%d.%09d)" (dt.ToLocalTime().ToString("yyyy-MM-dd HH:mm:ss")) (fst timeVal) (snd timeVal)
printfn "- ctime: %s" (makeTimeStr this.ctime)
printfn "- mtime: %s" (makeTimeStr this.mtime)
printfn "- dev: %d" this.dev
printfn "- ino: %d" this.ino
// --------------------------------------------------------------------
[<AbstractClass>]
type StagingIndexExtension () =
// Base class for extensions stored in the staging index.
abstract member extnSig: string
abstract member dumpExtn: unit -> unit
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Used to hold a TREE extension.
type TreeExtensionEntry =
{
path: byte[] // nb: the encoding is unknown
nEntries: int
nSubTrees: int
objName: string option
}
member this.dumpEntry =
// NOTE: The encoding for the path is actually unknown :-/ Encoding.UTF8 uses replacement fallback.
AnsiConsole.MarkupLine( "- path = {0}", pathStr ( Encoding.UTF8.GetString this.path ) )
printfn " - entries: %d" this.nEntries
printfn " - subtrees: %d" this.nSubTrees
if this.objName.IsSome then
AnsiConsole.MarkupLine( " - name: {0}", objNameStr this.objName.Value )
type TreeExtension( extnData: byte[] ) =
inherit StagingIndexExtension ()
override this.extnSig = "TREE"
member val entries =
// parse the TREE extension data
use extnDataBuf = new MemoryStream( extnData )
Seq.initInfinite ( fun _ ->
if extnDataBuf.Position < extnDataBuf.Length then
let path = readUntil extnDataBuf 0uy
let nEntries = int( Encoding.ASCII.GetString( readUntil extnDataBuf 0x20uy ) )
let nSubTrees = int( Encoding.ASCII.GetString( readUntil extnDataBuf 0x0auy ) )
let objName = if nEntries <> -1 then Some( readObjName extnDataBuf ) else None
Some { path=path; nEntries=nEntries; nSubTrees=nSubTrees; objName=objName }
else
None
) |> Seq.takeWhile ( fun e -> e.IsSome ) |> Seq.map ( fun e -> e.Value ) |> Seq.toArray
override this.dumpExtn () =
// dump the TREE extension
for entry in this.entries do
entry.dumpEntry
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// Used to hold a REUC extension.
type ReucExtensionEntry =
{
path: byte[] // nb: the encoding is unknown
stages: ( string * string option )[] // perms + object name
}
member this.dumpEntry =
// NOTE: The encoding for the path is actually unknown :-/ Encoding.UTF8 uses replacement fallback.
AnsiConsole.MarkupLine( "- path = {0}", pathStr ( Encoding.UTF8.GetString this.path ) )
for i = 0 to this.stages.Length-1 do
let perms, objName = this.stages.[i]
let objName2 = if objName.IsSome then sprintf " %s" objName.Value else ""
AnsiConsole.MarkupLine( " - stage {0}: {1}{2}", i+1, perms, objNameStr objName2 )
type ReucExtension( extnData: byte[] ) =
inherit StagingIndexExtension ()
override this.extnSig = "REUC"
member val entries =
// parse the REUC extension data
use extnDataBuf = new MemoryStream( extnData )
Seq.initInfinite ( fun _ ->
if extnDataBuf.Position < extnDataBuf.Length then
let path = readUntil extnDataBuf 0uy
let getMode _ =
readString extnDataBuf "ascii" // nb: these are ASCII octal numbers
let modes = Seq.initInfinite getMode |> Seq.take 3 |> Seq.toArray
let getObjName mode =
if mode = "0" then None else Some( readObjName extnDataBuf )
let objNames = modes |> Seq.map getObjName |> Seq.toArray
let stages = Array.zip modes objNames
Some { path=path; stages=stages }
else
None
) |> Seq.takeWhile ( fun e -> e.IsSome ) |> Seq.map ( fun e -> e.Value ) |> Seq.toArray
override this.dumpExtn () =
// dump the REUC extension
for entry in this.entries do
entry.dumpEntry
// --------------------------------------------------------------------
[<AutoOpen>]
module StagingIndex =
let private _readStagingIndexHeader (inp: Stream) =
// read the header
let buf = readBytes inp 4
if (Encoding.ASCII.GetString buf) <> "DIRC" then
failwithf "Incorrect magic number: %A" buf
let version = readNboInt4 inp
if version <> 2 && version <> 3 && version <> 4 then
failwithf "Unexpected version: %d" version
version
let private _readStagingIndexEntry (inp: Stream) version =
// NOTE: Entries usually represent a file, but can sometimes refer to a directory (if sparse checkout
// is enabled in cone mode, and the sparse index extension is enabled), in which case:
// - mode = 040000
// - has SKIP_WORKTREE in the extended flags
// - the path ends with a directory separator
// IMPORTANT! We assume we're not in split index mode (the entry format is completely different).
let readPath () =
if version = 4 then
// NOTE: The path format is completely different in v4, so for simplicity, we don't support it.
failwith "Version 4 is not supported."
readUntil inp 0uy
// read the next staging index entry
let fposStart = inp.Position
let ctime = ( readNboInt4 inp, readNboInt4 inp )
let mtime = ( readNboInt4 inp, readNboInt4 inp )
let dev = readNboInt4 inp
let ino = readNboInt4 inp
// NOTE: The doco says that the mode field is a 32-bit value, but only accounts for 16 of them :-/
let mode = readBytes inp 4
let objType = int( mode.[2] &&& 0xf0uy ) >>> 4
let perms = uint16( mode.[2] &&& 0x01uy ) <<< 8 ||| uint16(mode.[3])
let uid = readNboInt4 inp
let gid = readNboInt4 inp
let fileSize = readNboInt4 inp
let objName = readObjName inp
let flags = uint16( readNboInt2 inp )
let extendedFlags =
if version >= 3 && flags &&& 0x400us <> 0us then
Some( uint16( readNboInt2 inp ) )
else
None
let path = readPath ()
// skip over the pad bytes (used to 8-align entries)
if version <> 4 then
while (inp.Position - fposStart) % 8L <> 0L do
inp.ReadByte() |> ignore
// create the StagingIndexEntry record
let entry = {
ctime=ctime; mtime=mtime
dev=dev; ino=ino
objType=objType
perms=perms
uid=uid; gid=gid
fileSize=fileSize
objName=objName
flags=flags; extendedFlags=extendedFlags
path=path
}
( entry, fposStart )
let private _makeStagingIndexExtension extnSig extnData =
// create a StagingIndexExtension-derived object
match extnSig with
| [| 84uy; 82uy; 69uy; 69uy |] -> // "TREE"
(TreeExtension extnData) :> StagingIndexExtension
| [|82uy; 69uy; 85uy; 67uy|] -> // "REUC"
(ReucExtension extnData) :> StagingIndexExtension
| _ -> failwithf "Unknown extension sig: %A" extnSig
let private _readExtension inp =
// read the extension data
let extnSig = readBytes inp 4
let nBytes = readNboInt4 inp
let extnData = readBytes inp nBytes
let extn = _makeStagingIndexExtension extnSig extnData
( extn, extnData )
let dumpStagingIndex repoDir fullDump =
// initialize
let fname = Path.Join( repoDir, ".git/index" )
if not ( File.Exists( fname ) ) then
failwith "Can't find the staging index file."
use inp = new FileStream( fname, FileMode.Open, FileAccess.Read, FileShare.Read )
// dump the header
let version = _readStagingIndexHeader inp
let nEntries = readNboInt4 inp
AnsiConsole.MarkupLine( makeHeader "HEADER" "" )
printfn ""
printfn "version: %d" version
printfn "entries: %d" nEntries
// dump the entries
printfn ""
AnsiConsole.MarkupLine( makeHeader "ENTRIES" "" )
for entryNo = 0 to nEntries-1 do
printfn ""
let entry, fpos = _readStagingIndexEntry inp version
AnsiConsole.MarkupLine( sprintf "[cyan]Entry %d[/]: fpos=0x%x" entryNo fpos )
entry.dumpObj fullDump
// dump the extensions
let fposEnd = inp.Length - 20L // we ignore the checksum at the end of the file
Seq.initInfinite ( fun _ ->
if inp.Position < fposEnd then
let fposStart = inp.Position
let extn, extnData = _readExtension inp
Some ( extn, extnData, fposStart )
else
None
) |> Seq.takeWhile ( fun e -> e.IsSome ) |> Seq.map ( fun e -> e.Value ) |> Seq.iteri ( fun extnNo row ->
let extn, extnData, fpos = row
if extnNo = 0 then
printfn ""
AnsiConsole.MarkupLine( makeHeader "EXTENSIONS" "" )
printfn ""
AnsiConsole.MarkupLine( "[cyan]{0}[/]: fpos=0x{1:x}, #bytes={2}",
extn.extnSig, fpos, extnData.Length
)
extn.dumpExtn ()
)

@ -122,3 +122,33 @@ module Utils =
match path with
| "" -> "(empty)"
| _ -> "[green]" + (if path.Substring(0,2) = "./" then path.Substring(2) else path) + "[/]"
let bitflagString (flags: uint) nBytes (flagNames: Map<uint,string>) =
// convert the bitflags to a formatted string
let fmt = sprintf "0x{0:x%d}" (2 * nBytes)
let flagsStr = String.Format( fmt, flags )
let checkBitflag (bflag, flagName) =
flags &&& bflag <> 0u
let fnames = Map.toSeq flagNames |> Seq.filter checkBitflag |> Seq.map snd |> Seq.toArray
if fnames.Length = 0 then
flagsStr
else
sprintf "%s (%s)" (String.Join( ", ", fnames )) flagsStr
let permsString (perms: uint16) =
// convert the file permission flags to a formatted string
let permNames = "rwxrwxrwx"
let permsStr = String.Join( "", seq {
for flagNo = 0 to permNames.Length-1 do
let bmask = uint16( 1 <<< (8 - flagNo) )
yield if perms &&& bmask <> 0us then permNames.[flagNo] else '-'
} )
sprintf "%s (0x%x)" permsStr perms
let readBytes (inp: Stream) nBytes =
// read the specified number of bytes from the stream
let buf = Array.zeroCreate nBytes
let nBytesRead = inp.Read( buf, 0, nBytes )
if nBytesRead <> nBytes then
failwithf "Unexpected number of bytes read: %d/%d" nBytesRead nBytes
buf

@ -13,6 +13,7 @@
<Compile Include="PackData.fs" />
<Compile Include="PackIndex.fs" />
<Compile Include="Pack.fs" />
<Compile Include="StagingIndex.fs" />
<Compile Include="GitRepo.fs" />
</ItemGroup>

@ -0,0 +1,41 @@
namespace tests
open System
open System.IO
open Microsoft.VisualStudio.TestTools.UnitTesting
open git_guts
// --------------------------------------------------------------------
[<TestClass>]
type TestStagingIndex () =
[<TestInitialize>]
member this.init () =
// prepare to run a test
disableSpectreCapabilities
[<TestMethod>]
member this.TestDumpStagingIndex () =
let doTest zipFname =
// set up the test repo
use gitTestRepo = new GitTestRepo( zipFname )
runGitGc gitTestRepo.repoDir
// dump the staging index
using ( new CaptureStdout() ) ( fun cap ->
dumpStagingIndex gitTestRepo.repoDir false
let expectedFname =
let fname = Path.GetFileNameWithoutExtension( zipFname ) + ".staging-index.txt"
Path.Combine( __SOURCE_DIRECTORY__, "fixtures", fname )
cap.checkOutput expectedFname
)
// run the tests
Assert.ThrowsException<Exception>( fun () ->
doTest "empty.zip"
) |> ignore
doTest "simple.zip"

@ -51,7 +51,7 @@ type TestPacks () =
// check that we can find each object name correctly
using ( new FileStream( packIndexFname, FileMode.Open, FileAccess.Read, FileShare.Read ) ) ( fun inp ->
inp.Seek( int64( 4 + 4 + 4*256 - 4 ), SeekOrigin.Begin ) |> ignore
let nObjs = readNboInt inp
let nObjs = readNboInt4 inp
for objNo = 0 to nObjs-1 do
let objName, _, _ = readPackIndexObject inp objNo nObjs
let obj = findRepoObject gitTestRepo.repoDir objName

Binary file not shown.

@ -0,0 +1,42 @@
--- HEADER ---------------------------------------------------------------------
version: 2
entries: 3
--- ENTRIES --------------------------------------------------------------------
Entry 0: fpos=0xc
- path: hello.txt
- name: f75ba05f340c51065cbea2e1fdbfe5fe13144c97
- flags: stage=0, namelen=9 (0x0009)
- type: regular file (8)
- size: 14
- perms: rw-r--r-- (0x1a4)
Entry 1: fpos=0x54
- path: subdir/file1.txt
- name: 5c1170f2eaac6f78662a8cf899326a4b95c80dd2
- flags: stage=0, namelen=16 (0x0010)
- type: regular file (8)
- size: 16
- perms: rw-r--r-- (0x1a4)
Entry 2: fpos=0xa4
- path: subdir/file2.txt
- name: 3eac351c95c4facb0e99d156f14e3527e0f1c3e0
- flags: stage=0, namelen=16 (0x0010)
- type: regular file (8)
- size: 16
- perms: rw-r--r-- (0x1a4)
--- EXTENSIONS -----------------------------------------------------------------
TREE: fpos=0xf4, #bytes=56
- path = (empty)
- entries: 3
- subtrees: 1
- name: 533b8093315fa1071d9cfb2e1543677bea011f6e
- path = subdir
- entries: 2
- subtrees: 0
- name: f2a25c9255b37fb1e9491349524b532a86701bcc

@ -10,6 +10,7 @@
<ItemGroup>
<Compile Include="Utils.fs" />
<Compile Include="TestPacks.fs" />
<Compile Include="TestIndex.fs" />
<Compile Include="Program.fs" />
</ItemGroup>

Loading…
Cancel
Save