From 212591d3b4754fd115263351bf8d0670b4607639 Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Fri, 6 Nov 2020 10:08:20 -0800 Subject: [PATCH 1/3] Use shared storage for module names in gcdump --- src/MemoryGraph/graph.cs | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/src/MemoryGraph/graph.cs b/src/MemoryGraph/graph.cs index 0225eb884..efc5eb6ab 100644 --- a/src/MemoryGraph/graph.cs +++ b/src/MemoryGraph/graph.cs @@ -500,13 +500,38 @@ public virtual void ToStream(Serializer serializer) { serializer.Write(m_totalSize); serializer.Write((int)RootIndex); - // Write out the Types + + // Write out the module names for types + var moduleNames = new Dictionary(); + foreach (var type in m_types) + { + if (type.ModuleName is null) + continue; + + if (!moduleNames.ContainsKey(type.ModuleName)) + { + // Index 0 is implicitly null, so start with 1 for the first non-null value + moduleNames.Add(type.ModuleName, moduleNames.Count + 1); + } + } + + serializer.Write(moduleNames.Count); + foreach (var pair in moduleNames) + { + // Dictionary iterates in insertion order + serializer.Write(pair.Key); + } + + // Write out the Types serializer.Write(m_types.Count); for (int i = 0; i < m_types.Count; i++) { serializer.Write(m_types[i].Name); serializer.Write(m_types[i].Size); - serializer.Write(m_types[i].ModuleName); + if (m_types[i].ModuleName is null) + serializer.Write(0); + else + serializer.Write(moduleNames[m_types[i].ModuleName]); } // Write out the Nodes @@ -558,6 +583,14 @@ public void FromStream(Deserializer deserializer) deserializer.Read(out m_totalSize); RootIndex = (NodeIndex)deserializer.ReadInt(); + // Read in the module names + var moduleNamesCount = deserializer.ReadInt(); + var moduleNames = new string[moduleNamesCount + 1]; + for (int i = 0; i < moduleNamesCount; i++) + { + moduleNames[i + 1] = deserializer.ReadString(); + } + // Read in the Types TypeInfo info = new TypeInfo(); int typeCount = deserializer.ReadInt(); @@ -566,7 +599,7 @@ public void FromStream(Deserializer deserializer) { deserializer.Read(out info.Name); deserializer.Read(out info.Size); - deserializer.Read(out info.ModuleName); + info.ModuleName = moduleNames[deserializer.ReadInt()]; m_types.Add(info); } From 571685df7bf1f5343280553717e58d861e615004 Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Fri, 6 Nov 2020 10:22:31 -0800 Subject: [PATCH 2/3] Apply differential compression when writing m_nodes --- .../SegmentedMemoryStreamReader.cs | 6 +++++- .../SegmentedMemoryStreamWriter.cs | 2 +- src/MemoryGraph/graph.cs | 20 +++++++++++++++---- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/FastSerialization/SegmentedMemoryStreamReader.cs b/src/FastSerialization/SegmentedMemoryStreamReader.cs index 5239d2aeb..6b47ff76d 100644 --- a/src/FastSerialization/SegmentedMemoryStreamReader.cs +++ b/src/FastSerialization/SegmentedMemoryStreamReader.cs @@ -6,7 +6,7 @@ namespace FastSerialization { - public class SegmentedMemoryStreamReader + public class SegmentedMemoryStreamReader : IStreamReader { const int BlockCopyCapacity = 10 * 1024 * 1024; @@ -127,6 +127,10 @@ public string ReadString() } return sb.ToString(); } + void IStreamReader.Read(byte[] data, int offset, int length) + { + throw new NotImplementedException(); + } /// /// Implementation of IStreamReader /// diff --git a/src/FastSerialization/SegmentedMemoryStreamWriter.cs b/src/FastSerialization/SegmentedMemoryStreamWriter.cs index 7094be52a..ea4adb55d 100644 --- a/src/FastSerialization/SegmentedMemoryStreamWriter.cs +++ b/src/FastSerialization/SegmentedMemoryStreamWriter.cs @@ -6,7 +6,7 @@ namespace FastSerialization { - public class SegmentedMemoryStreamWriter + public class SegmentedMemoryStreamWriter : IStreamWriter { public SegmentedMemoryStreamWriter() : this(64) { } public SegmentedMemoryStreamWriter(int initialSize) diff --git a/src/MemoryGraph/graph.cs b/src/MemoryGraph/graph.cs index efc5eb6ab..7988fe90e 100644 --- a/src/MemoryGraph/graph.cs +++ b/src/MemoryGraph/graph.cs @@ -536,9 +536,14 @@ public virtual void ToStream(Serializer serializer) // Write out the Nodes serializer.Write(m_nodes.Count); + int previousLabel = 0; for (int i = 0; i < m_nodes.Count; i++) { - serializer.Write((int)m_nodes[i]); + // Apply differential compression to the label, and then write it as a compressed integer + int currentLabel = (int)m_nodes[i]; + int difference = unchecked(currentLabel - previousLabel); + Node.WriteCompressedInt(serializer.Writer, difference); + previousLabel = currentLabel; } // Write out the Blob stream. @@ -607,9 +612,14 @@ public void FromStream(Deserializer deserializer) int nodeCount = deserializer.ReadInt(); m_nodes = new SegmentedList(SegmentSize, nodeCount); + int previousLabel = 0; for (int i = 0; i < nodeCount; i++) { - m_nodes.Add((StreamLabel)(uint)deserializer.ReadInt()); + // Read the label as a compressed differential integer + int difference = Node.ReadCompressedInt(deserializer.Reader); + int currentLabel = unchecked(previousLabel + difference); + m_nodes.Add((StreamLabel)currentLabel); + previousLabel = currentLabel; } // Read in the Blob stream. @@ -869,7 +879,8 @@ protected internal Node(Graph graph) } // Node information is stored in a compressed form because we have alot of them. - internal static int ReadCompressedInt(SegmentedMemoryStreamReader reader) + internal static int ReadCompressedInt(T reader) + where T : IStreamReader { int ret = 0; byte b = reader.ReadByte(); @@ -910,7 +921,8 @@ internal static int ReadCompressedInt(SegmentedMemoryStreamReader reader) return ret; } - internal static void WriteCompressedInt(SegmentedMemoryStreamWriter writer, int value) + internal static void WriteCompressedInt(T writer, int value) + where T : IStreamWriter { if (value << 25 >> 25 == value) { From 3a46016aaf046867873c34c1764de0544c380fdb Mon Sep 17 00:00:00 2001 From: Sam Harwell Date: Fri, 6 Nov 2020 10:32:38 -0800 Subject: [PATCH 3/3] Apply differential compression when writing m_nodeAddresses --- src/MemoryGraph/MemoryGraph.cs | 91 ++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 4 deletions(-) diff --git a/src/MemoryGraph/MemoryGraph.cs b/src/MemoryGraph/MemoryGraph.cs index 7e5ee2e95..0c1ec0f55 100644 --- a/src/MemoryGraph/MemoryGraph.cs +++ b/src/MemoryGraph/MemoryGraph.cs @@ -120,12 +120,77 @@ void IFastSerializable.ToStream(Serializer serializer) base.ToStream(serializer); // Write out the Memory addresses of each object serializer.Write(m_nodeAddresses.Count); - for (int i = 0; i < m_nodeAddresses.Count; i++) + + // Write m_nodeAddresses as a sequence of groups of addresses near each other. The following assumptions are + // made for this process: + // + // 1. It is common for multiple objects to have addresses within ushort.MaxValue of each other + // 2. It is common for an element of m_nodeAddresses to have the address 0 + // 3. The address at index 'i+1' will never be the same value as index 'i', unless that value is 0 + // + // Assumption (3) allows '0' values in m_nodeAddresses to be written with the differential value '0', which + // is efficient and does not interrupt the grouping of a segment of otherwise-similar addresses. + int offset = 0; + foreach (var pair in GroupNodeAddresses(m_nodeAddresses)) { - serializer.Write((long)m_nodeAddresses[i]); + // A group is written as: + // + // 1. Int32: The number of elements in the group + // 2. Int64: The address of the first element in the group + // 3. UInt16 (repeat N times, where N = #Group - 1): + // a. 0, if the nth element of the group has the address 0 + // b. Otherwise, the offset of the nth relative to the address of the first element in the group + serializer.Write(pair.Value); + serializer.Write((long)pair.Key); + Debug.Assert(pair.Key == m_nodeAddresses[offset]); + + for (int i = 1; i < pair.Value; i++) + { + Address current = m_nodeAddresses[i + offset]; + if (current == 0) + { + serializer.Write((short)0); + continue; + } + + ushort relativeAddress = (ushort)(current - pair.Key); + serializer.Write(unchecked((short)relativeAddress)); + } + + offset += pair.Value; } serializer.WriteTagged(Is64Bit); + + IEnumerable> GroupNodeAddresses(SegmentedList
nodeAddresses) + { + if (nodeAddresses.Count == 0) + yield break; + + var baseAddress = nodeAddresses[0]; + var startIndex = 0; + for (int i = 1; i < nodeAddresses.Count; i++) + { + var current = nodeAddresses[i]; + if (current == 0) + { + continue; + } + + if (unchecked(current - baseAddress) <= ushort.MaxValue) + { + continue; + } + + var count = i - startIndex; + yield return new KeyValuePair(baseAddress, count); + + baseAddress = current; + startIndex = i; + } + + yield return new KeyValuePair(baseAddress, nodeAddresses.Count - startIndex); + } } void IFastSerializable.FromStream(Deserializer deserializer) @@ -135,9 +200,27 @@ void IFastSerializable.FromStream(Deserializer deserializer) int addressCount = deserializer.ReadInt(); m_nodeAddresses = new SegmentedList
(SegmentSize, addressCount); - for (int i = 0; i < addressCount; i++) + // See ToStream above for a description of the differential compression process + int offset = 0; + while (offset < addressCount) { - m_nodeAddresses.Add((Address)deserializer.ReadInt64()); + int groupCount = deserializer.ReadInt(); + Address baseAddress = (Address)deserializer.ReadInt64(); + m_nodeAddresses.Add(baseAddress); + for (int i = 1; i < groupCount; i++) + { + ushort relativeAddress = unchecked((ushort)deserializer.ReadInt16()); + if (relativeAddress == 0) + { + m_nodeAddresses.Add(0); + } + else + { + m_nodeAddresses.Add(baseAddress + relativeAddress); + } + } + + offset += groupCount; } bool is64bit = false;