Skip to content

Commit def0c7b

Browse files
carlosmnJ Wyman
authored and
J Wyman
committed
PoC for writing directly into the next stream
The filters are run in-line with the rest of the system, and libgit2 is set up to have its last filter/writestream write to the filesystem. Instead of using an intermediate buffer/file/pipe, we can make the writes from the user's Smude() or Clean() filter into their output stream propagate immediately into the next filter, leading up the chain to the filesystem, removing any intermediate buffering other than that which is necessary for the filters themselves to work. In the general case, we only have the one clean or smudge filter, which means that even in the case of out-of-band storage for large files, where a small blob can cause very large outputs, we would still write directly to the filesystem. by J Wyman (@whoisj): Buffered writes to next filter to minimize managed to native marshaling for performance optimizations by J Wyman (@whoisj): Split `WriteStream` into its own file and cleaned it up (checks, style, etc)
1 parent 4ac3557 commit def0c7b

File tree

3 files changed

+76
-81
lines changed

3 files changed

+76
-81
lines changed

LibGit2Sharp/Core/WriteStream.cs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
using System;
2+
using System.IO;
3+
4+
namespace LibGit2Sharp.Core
5+
{
6+
class WriteStream : Stream
7+
{
8+
readonly GitWriteStream nextStream;
9+
readonly IntPtr nextPtr;
10+
11+
public WriteStream(GitWriteStream nextStream, IntPtr nextPtr)
12+
{
13+
this.nextStream = nextStream;
14+
this.nextPtr = nextPtr;
15+
}
16+
17+
public override bool CanWrite { get { return true; } }
18+
19+
public override bool CanRead { get { return false; } }
20+
21+
public override bool CanSeek { get { return false; } }
22+
23+
public override long Position
24+
{
25+
get { throw new NotImplementedException(); }
26+
set { throw new InvalidOperationException(); }
27+
}
28+
29+
public override long Length { get { throw new InvalidOperationException(); } }
30+
31+
public override void Flush()
32+
{
33+
}
34+
35+
public override void SetLength(long value)
36+
{
37+
throw new InvalidOperationException();
38+
}
39+
40+
public override int Read(byte[] buffer, int offset, int count)
41+
{
42+
throw new InvalidOperationException();
43+
}
44+
45+
public override long Seek(long offset, SeekOrigin origin)
46+
{
47+
throw new InvalidOperationException();
48+
}
49+
50+
public override void Write(byte[] buffer, int offset, int count)
51+
{
52+
unsafe
53+
{
54+
fixed (byte* bufferPtr = &buffer[offset])
55+
{
56+
if (nextStream.write(nextPtr, (IntPtr)bufferPtr, (UIntPtr)count) < 0)
57+
{
58+
throw new LibGit2SharpException("failed to write to next buffer");
59+
}
60+
}
61+
}
62+
}
63+
}
64+
}

LibGit2Sharp/Filter.cs

Lines changed: 11 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
using System;
22
using System.Collections.Generic;
3-
using System.Diagnostics;
43
using System.IO;
54
using System.Linq;
65
using System.Runtime.InteropServices;
@@ -17,6 +16,8 @@ public abstract class Filter : IEquatable<Filter>
1716
{
1817
private static readonly LambdaEqualityHelper<Filter> equalityHelper =
1918
new LambdaEqualityHelper<Filter>(x => x.Name, x => x.Attributes);
19+
// 64K is optimal buffer size per https://technet.microsoft.com/en-us/library/cc938632.aspx
20+
private const int BufferSize = 64 * 1024;
2021

2122
private readonly string name;
2223
private readonly IEnumerable<FilterAttributeEntry> attributes;
@@ -51,6 +52,7 @@ protected Filter(string name, IEnumerable<FilterAttributeEntry> attributes)
5152
private IntPtr thisPtr;
5253
private IntPtr nextPtr;
5354
private FilterSource filterSource;
55+
private Stream output;
5456

5557
/// <summary>
5658
/// The name that this filter was registered with
@@ -80,7 +82,7 @@ internal GitFilter GitFilter
8082
/// Complete callback on filter
8183
///
8284
/// This optional callback will be invoked when the upstream filter is
83-
/// closed. Gives the filter a change to perform any final actions or
85+
/// closed. Gives the filter a chance to perform any final actions or
8486
/// necissary clean up.
8587
/// </summary>
8688
/// <param name="path">The path of the file being filtered</param>
@@ -224,6 +226,7 @@ int StreamCreateCallback(out IntPtr git_writestream_out, GitFilter self, IntPtr
224226
nextStream = new GitWriteStream();
225227
Marshal.PtrToStructure(nextPtr, nextStream);
226228
filterSource = FilterSource.FromNativePtr(filterSourcePtr);
229+
output = new WriteStream(nextStream, nextPtr);
227230
}
228231
catch (Exception exception)
229232
{
@@ -254,20 +257,10 @@ int StreamCloseCallback(IntPtr stream)
254257
Ensure.ArgumentNotZeroIntPtr(stream, "stream");
255258
Ensure.ArgumentIsExpectedIntPtr(stream, thisPtr, "stream");
256259

257-
string tempFileName = Path.GetTempFileName();
258-
// Setup a file system backed write-to stream to work with this gives the runtime
259-
// somewhere to put bits if the amount of data could cause an OOM scenario.
260-
using (FileStream output = File.Open(tempFileName, FileMode.Open, FileAccess.Write, FileShare.ReadWrite))
261-
// Setup a septerate read-from stream on the same file system backing
262-
// a seperate stream helps avoid a flush to disk when reading the written content
263-
using (FileStream reader = File.Open(tempFileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
260+
using (BufferedStream outputBuffer = new BufferedStream(output, BufferSize))
264261
{
265-
Complete(filterSource.Path, filterSource.Root, output);
266-
output.Flush();
267-
WriteToNextFilter(reader);
262+
Complete(filterSource.Path, filterSource.Root, outputBuffer);
268263
}
269-
// clean up after outselves
270-
File.Delete(tempFileName);
271264
}
272265
catch (Exception exception)
273266
{
@@ -310,53 +303,22 @@ unsafe int StreamWriteCallback(IntPtr stream, IntPtr buffer, UIntPtr len)
310303

311304
string tempFileName = Path.GetTempFileName();
312305
using (UnmanagedMemoryStream input = new UnmanagedMemoryStream((byte*)buffer.ToPointer(), (long)len))
313-
// Setup a file system backed write-to stream to work with this gives the runtime
314-
// somewhere to put bits if the amount of data could cause an OOM scenario.
315-
using (FileStream output = File.Open(tempFileName, FileMode.Open, FileAccess.Write, FileShare.ReadWrite))
316-
// Setup a septerate read-from stream on the same file system backing
317-
// a seperate stream helps avoid a flush to disk when reading the written content
318-
using (FileStream reader = File.Open(tempFileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
306+
using (BufferedStream outputBuffer = new BufferedStream(output, BufferSize))
319307
{
320308
switch (filterSource.SourceMode)
321309
{
322310
case FilterMode.Clean:
323-
try
324-
{
325-
Clean(filterSource.Path, filterSource.Root, input, output);
326-
}
327-
catch (Exception exception)
328-
{
329-
Log.Write(LogLevel.Error, "Filter.StreamWriteCallback exception");
330-
Log.Write(LogLevel.Error, exception.ToString());
331-
Proxy.giterr_set_str(GitErrorCategory.Filter, exception);
332-
result = (int)GitErrorCode.Error;
333-
}
311+
Clean(filterSource.Path, filterSource.Root, input, outputBuffer);
334312
break;
335313

336314
case FilterMode.Smudge:
337-
try
338-
{
339-
Smudge(filterSource.Path, filterSource.Root, input, output);
340-
}
341-
catch (Exception exception)
342-
{
343-
Log.Write(LogLevel.Error, "Filter.StreamWriteCallback exception");
344-
Log.Write(LogLevel.Error, exception.ToString());
345-
Proxy.giterr_set_str(GitErrorCategory.Filter, exception);
346-
result = (int)GitErrorCode.Error;
347-
}
315+
Smudge(filterSource.Path, filterSource.Root, input, outputBuffer);
348316
break;
317+
349318
default:
350319
Proxy.giterr_set_str(GitErrorCategory.Filter, "Unexpected filter mode.");
351320
return (int)GitErrorCode.Ambiguous;
352321
}
353-
354-
if (result == (int)GitErrorCode.Ok)
355-
{
356-
// have to flush the write-to stream to enable the read stream to get access to the bits
357-
output.Flush();
358-
result = WriteToNextFilter(reader);
359-
}
360322
}
361323

362324
// clean up after outselves
@@ -372,37 +334,5 @@ unsafe int StreamWriteCallback(IntPtr stream, IntPtr buffer, UIntPtr len)
372334

373335
return result;
374336
}
375-
376-
private unsafe int WriteToNextFilter(Stream output)
377-
{
378-
// 64K is optimal buffer size per https://technet.microsoft.com/en-us/library/cc938632.aspx
379-
const int BufferSize = 64 * 1024;
380-
381-
Debug.Assert(output != null, "output parameter is null");
382-
Debug.Assert(output.CanRead, "output.CanRead parameter equals false");
383-
384-
int result = 0;
385-
byte[] bytes = new byte[BufferSize];
386-
IntPtr bytesPtr = Marshal.AllocHGlobal(BufferSize);
387-
try
388-
{
389-
int read = 0;
390-
while ((read = output.Read(bytes, 0, bytes.Length)) > 0)
391-
{
392-
Marshal.Copy(bytes, 0, bytesPtr, read);
393-
if ((result = nextStream.write(nextPtr, bytesPtr, (UIntPtr)read)) != (int)GitErrorCode.Ok)
394-
{
395-
Proxy.giterr_set_str(GitErrorCategory.Filter, "Filter write to next stream failed");
396-
break;
397-
}
398-
}
399-
}
400-
finally
401-
{
402-
Marshal.FreeHGlobal(bytesPtr);
403-
}
404-
405-
return result;
406-
}
407337
}
408338
}

LibGit2Sharp/LibGit2Sharp.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
<Compile Include="Core\Platform.cs" />
7676
<Compile Include="Core\Handles\ConflictIteratorSafeHandle.cs" />
7777
<Compile Include="Core\GitWriteStream.cs" />
78+
<Compile Include="Core\WriteStream.cs" />
7879
<Compile Include="DescribeOptions.cs" />
7980
<Compile Include="DescribeStrategy.cs" />
8081
<Compile Include="Core\GitDescribeFormatOptions.cs" />

0 commit comments

Comments
 (0)