/breezy/3.0

To get this branch, use:
bzr branch https://code.breezy-vcs.org/breezy/3.0

« back to all changes in this revision

Viewing changes to breezy/plugins/fastimport/processors/info_processor.py

  • Committer: Breezy landing bot
  • Author(s): Jelmer Vernooij
  • Date: 2018-05-07 12:03:11 UTC
  • mfrom: (6929.13.6 fast-import-cmds)
  • Revision ID: breezy.the.bot@gmail.com-20180507120311-x2v52qsrqj93zi52
Remove the fast-import-{query,info,filter} commands.

Merged from https://code.launchpad.net/~jelmer/brz/fast-import-cmds/+merge/342461

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008 Canonical Ltd
2
 
#
3
 
# This program is free software; you can redistribute it and/or modify
4
 
# it under the terms of the GNU General Public License as published by
5
 
# the Free Software Foundation; either version 2 of the License, or
6
 
# (at your option) any later version.
7
 
#
8
 
# This program is distributed in the hope that it will be useful,
9
 
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
 
# GNU General Public License for more details.
12
 
#
13
 
# You should have received a copy of the GNU General Public License
14
 
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
15
 
 
16
 
"""Import processor that dump stats about the input (and doesn't import)."""
17
 
 
18
 
from __future__ import absolute_import
19
 
 
20
 
from .. import (
21
 
    reftracker,
22
 
    )
23
 
from ..helpers import (
24
 
    invert_dict,
25
 
    invert_dictset,
26
 
    )
27
 
from fastimport import (
28
 
    commands,
29
 
    processor,
30
 
    )
31
 
import stat
32
 
 
33
 
 
34
 
class InfoProcessor(processor.ImportProcessor):
35
 
    """An import processor that dumps statistics about the input.
36
 
 
37
 
    No changes to the current repository are made.
38
 
 
39
 
    As well as providing useful information about an import
40
 
    stream before importing it, this processor is useful for
41
 
    benchmarking the speed at which data can be extracted from
42
 
    the source.
43
 
    """
44
 
 
45
 
    def __init__(self, params=None, verbose=0, outf=None):
46
 
        processor.ImportProcessor.__init__(self, params, verbose,
47
 
            outf=outf)
48
 
 
49
 
    def pre_process(self):
50
 
        # Init statistics
51
 
        self.cmd_counts = {}
52
 
        for cmd in commands.COMMAND_NAMES:
53
 
            self.cmd_counts[cmd] = 0
54
 
        self.file_cmd_counts = {}
55
 
        for fc in commands.FILE_COMMAND_NAMES:
56
 
            self.file_cmd_counts[fc] = 0
57
 
        self.parent_counts = {}
58
 
        self.max_parent_count = 0
59
 
        self.committers = set()
60
 
        self.separate_authors_found = False
61
 
        self.symlinks_found = False
62
 
        self.executables_found = False
63
 
        self.sha_blob_references = False
64
 
        self.lightweight_tags = 0
65
 
        # Blob usage tracking
66
 
        self.blobs = {}
67
 
        for usage in ['new', 'used', 'unknown', 'unmarked']:
68
 
            self.blobs[usage] = set()
69
 
        self.blob_ref_counts = {}
70
 
        # Head tracking
71
 
        self.reftracker = reftracker.RefTracker()
72
 
        # Stuff to cache: a map from mark to # of times that mark is merged
73
 
        self.merges = {}
74
 
        # Stuff to cache: these are maps from mark to sets
75
 
        self.rename_old_paths = {}
76
 
        self.copy_source_paths = {}
77
 
 
78
 
    def post_process(self):
79
 
        # Dump statistics
80
 
        cmd_names = commands.COMMAND_NAMES
81
 
        fc_names = commands.FILE_COMMAND_NAMES
82
 
        self._dump_stats_group("Command counts",
83
 
            [(c, self.cmd_counts[c]) for c in cmd_names], str)
84
 
        self._dump_stats_group("File command counts", 
85
 
            [(c, self.file_cmd_counts[c]) for c in fc_names], str)
86
 
 
87
 
        # Commit stats
88
 
        if self.cmd_counts['commit']:
89
 
            p_items = []
90
 
            for i in range(self.max_parent_count + 1):
91
 
                if i in self.parent_counts:
92
 
                    count = self.parent_counts[i]
93
 
                    p_items.append(("parents-%d" % i, count))
94
 
            merges_count = len(self.merges)
95
 
            p_items.append(('total revisions merged', merges_count))
96
 
            flags = {
97
 
                'separate authors found': self.separate_authors_found,
98
 
                'executables': self.executables_found,
99
 
                'symlinks': self.symlinks_found,
100
 
                'blobs referenced by SHA': self.sha_blob_references,
101
 
                }
102
 
            self._dump_stats_group("Parent counts", p_items, str)
103
 
            self._dump_stats_group("Commit analysis", flags.items(), _found)
104
 
            heads = invert_dictset(self.reftracker.heads)
105
 
            self._dump_stats_group("Head analysis", heads.items(), None,
106
 
                                    _iterable_as_config_list)
107
 
            # note("\t%d\t%s" % (len(self.committers), 'unique committers'))
108
 
            self._dump_stats_group("Merges", self.merges.items(), None)
109
 
            # We only show the rename old path and copy source paths when -vv
110
 
            # (verbose=2) is specified. The output here for mysql's data can't
111
 
            # be parsed currently so this bit of code needs more work anyhow ..
112
 
            if self.verbose >= 2:
113
 
                self._dump_stats_group("Rename old paths",
114
 
                    self.rename_old_paths.items(), len,
115
 
                    _iterable_as_config_list)
116
 
                self._dump_stats_group("Copy source paths",
117
 
                    self.copy_source_paths.items(), len,
118
 
                    _iterable_as_config_list)
119
 
 
120
 
        # Blob stats
121
 
        if self.cmd_counts['blob']:
122
 
            # In verbose mode, don't list every blob used
123
 
            if self.verbose:
124
 
                del self.blobs['used']
125
 
            self._dump_stats_group("Blob usage tracking",
126
 
                self.blobs.items(), len, _iterable_as_config_list)
127
 
        if self.blob_ref_counts:
128
 
            blobs_by_count = invert_dict(self.blob_ref_counts)
129
 
            blob_items = sorted(blobs_by_count.items())
130
 
            self._dump_stats_group("Blob reference counts",
131
 
                blob_items, len, _iterable_as_config_list)
132
 
 
133
 
        # Other stats
134
 
        if self.cmd_counts['reset']:
135
 
            reset_stats = {
136
 
                'lightweight tags': self.lightweight_tags,
137
 
                }
138
 
            self._dump_stats_group("Reset analysis", reset_stats.items())
139
 
 
140
 
    def _dump_stats_group(self, title, items, normal_formatter=None,
141
 
        verbose_formatter=None):
142
 
        """Dump a statistics group.
143
 
        
144
 
        In verbose mode, do so as a config file so
145
 
        that other processors can load the information if they want to.
146
 
        :param normal_formatter: the callable to apply to the value
147
 
          before displaying it in normal mode
148
 
        :param verbose_formatter: the callable to apply to the value
149
 
          before displaying it in verbose mode
150
 
        """
151
 
        if self.verbose:
152
 
            self.outf.write("[%s]\n" % (title,))
153
 
            for name, value in items:
154
 
                if verbose_formatter is not None:
155
 
                    value = verbose_formatter(value)
156
 
                if type(name) == str:
157
 
                    name = name.replace(' ', '-')
158
 
                self.outf.write("%s = %s\n" % (name, value))
159
 
            self.outf.write("\n")
160
 
        else:
161
 
            self.outf.write("%s:\n" % (title,))
162
 
            for name, value in items:
163
 
                if normal_formatter is not None:
164
 
                    value = normal_formatter(value)
165
 
                self.outf.write("\t%s\t%s\n" % (value, name))
166
 
 
167
 
    def progress_handler(self, cmd):
168
 
        """Process a ProgressCommand."""
169
 
        self.cmd_counts[cmd.name] += 1
170
 
 
171
 
    def blob_handler(self, cmd):
172
 
        """Process a BlobCommand."""
173
 
        self.cmd_counts[cmd.name] += 1
174
 
        if cmd.mark is None:
175
 
            self.blobs['unmarked'].add(cmd.id)
176
 
        else:
177
 
            self.blobs['new'].add(cmd.id)
178
 
            # Marks can be re-used so remove it from used if already there.
179
 
            # Note: we definitely do NOT want to remove it from multi if
180
 
            # it's already in that set.
181
 
            try:
182
 
                self.blobs['used'].remove(cmd.id)
183
 
            except KeyError:
184
 
                pass
185
 
 
186
 
    def checkpoint_handler(self, cmd):
187
 
        """Process a CheckpointCommand."""
188
 
        self.cmd_counts[cmd.name] += 1
189
 
 
190
 
    def commit_handler(self, cmd):
191
 
        """Process a CommitCommand."""
192
 
        self.cmd_counts[cmd.name] += 1
193
 
        self.committers.add(cmd.committer)
194
 
        if cmd.author is not None:
195
 
            self.separate_authors_found = True
196
 
        for fc in cmd.iter_files():
197
 
            self.file_cmd_counts[fc.name] += 1
198
 
            if isinstance(fc, commands.FileModifyCommand):
199
 
                if fc.mode & 0111:
200
 
                    self.executables_found = True
201
 
                if stat.S_ISLNK(fc.mode):
202
 
                    self.symlinks_found = True
203
 
                if fc.dataref is not None:
204
 
                    if fc.dataref[0] == ':':
205
 
                        self._track_blob(fc.dataref)
206
 
                    else:
207
 
                        self.sha_blob_references = True
208
 
            elif isinstance(fc, commands.FileRenameCommand):
209
 
                self.rename_old_paths.setdefault(cmd.id, set()).add(fc.old_path)
210
 
            elif isinstance(fc, commands.FileCopyCommand):
211
 
                self.copy_source_paths.setdefault(cmd.id, set()).add(fc.src_path)
212
 
 
213
 
        # Track the heads
214
 
        parents = self.reftracker.track_heads(cmd)
215
 
 
216
 
        # Track the parent counts
217
 
        parent_count = len(parents)
218
 
        if self.parent_counts.has_key(parent_count):
219
 
            self.parent_counts[parent_count] += 1
220
 
        else:
221
 
            self.parent_counts[parent_count] = 1
222
 
            if parent_count > self.max_parent_count:
223
 
                self.max_parent_count = parent_count
224
 
 
225
 
        # Remember the merges
226
 
        if cmd.merges:
227
 
            #self.merges.setdefault(cmd.ref, set()).update(cmd.merges)
228
 
            for merge in cmd.merges:
229
 
                if merge in self.merges:
230
 
                    self.merges[merge] += 1
231
 
                else:
232
 
                    self.merges[merge] = 1
233
 
 
234
 
    def reset_handler(self, cmd):
235
 
        """Process a ResetCommand."""
236
 
        self.cmd_counts[cmd.name] += 1
237
 
        if cmd.ref.startswith('refs/tags/'):
238
 
            self.lightweight_tags += 1
239
 
        else:
240
 
            if cmd.from_ is not None:
241
 
                self.reftracker.track_heads_for_ref(
242
 
                    cmd.ref, cmd.from_)
243
 
 
244
 
    def tag_handler(self, cmd):
245
 
        """Process a TagCommand."""
246
 
        self.cmd_counts[cmd.name] += 1
247
 
 
248
 
    def feature_handler(self, cmd):
249
 
        """Process a FeatureCommand."""
250
 
        self.cmd_counts[cmd.name] += 1
251
 
        feature = cmd.feature_name
252
 
        if feature not in commands.FEATURE_NAMES:
253
 
            self.warning("feature %s is not supported - parsing may fail"
254
 
                % (feature,))
255
 
 
256
 
    def _track_blob(self, mark):
257
 
        if mark in self.blob_ref_counts:
258
 
            self.blob_ref_counts[mark] += 1
259
 
            pass
260
 
        elif mark in self.blobs['used']:
261
 
            self.blob_ref_counts[mark] = 2
262
 
            self.blobs['used'].remove(mark)
263
 
        elif mark in self.blobs['new']:
264
 
            self.blobs['used'].add(mark)
265
 
            self.blobs['new'].remove(mark)
266
 
        else:
267
 
            self.blobs['unknown'].add(mark)
268
 
 
269
 
def _found(b):
270
 
    """Format a found boolean as a string."""
271
 
    return ['no', 'found'][b]
272
 
 
273
 
def _iterable_as_config_list(s):
274
 
    """Format an iterable as a sequence of comma-separated strings.
275
 
    
276
 
    To match what ConfigObj expects, a single item list has a trailing comma.
277
 
    """
278
 
    items = sorted(s)
279
 
    if len(items) == 1:
280
 
        return "%s," % (items[0],)
281
 
    else:
282
 
        return ", ".join(items)