webgraph_swh

Python bindings for Software Heritage graph access.

1from ._webgraph_swh import *
2
3__doc__ = _webgraph_swh.__doc__
4if hasattr(_webgraph_swh, "__all__"):
5    __all__ = _webgraph_swh.__all__
class SwhGraph:

A bidirectional Software Heritage graph with node properties.

Loads the graph and all available properties (maps, persons, strings, timestamps) from the given base path. Node IDs are integers in [0 . . num_nodes).

SwhGraph(path: str)
def num_nodes(self) -> int:

Return the number of nodes in the graph.

def num_arcs(self) -> int:

Return the number of arcs in the graph.

def outdegree(self, node: int) -> int:

Return the number of successors of the given node.

Raises IndexError if node is out of range.

def indegree(self, node: int) -> int:

Return the number of predecessors of the given node.

Raises IndexError if node is out of range.

def predecessors(self, node: int) -> PySuccessorsIterator:

Return an iterator over the predecessors of the given node.

Raises IndexError if node is out of range.

def successors(self, node: int) -> PySuccessorsIterator:

Return an iterator over the successors of the given node.

Raises IndexError if node is out of range.

def committer_id(self, node: int) -> int | None:

Return the committer person ID, or None if not available.

Raises IndexError if node is out of range.

def author_id(self, node: int) -> int | None:

Return the author person ID, or None if not available.

Raises IndexError if node is out of range.

def node_type(self, node: int) -> PyNodeType:

Return the node type as a PyNodeType enum value.

Raises IndexError if node is out of range.

def committer_timestamp(self, node: int) -> int | None:

Return the committer timestamp (seconds since epoch), or None.

Raises IndexError if node is out of range.

def author_timestamp(self, node: int) -> int | None:

Return the author timestamp (seconds since epoch), or None.

Raises IndexError if node is out of range.

def swhid(self, node: int) -> str:

Return the SWHID of the given node as a string.

Raises IndexError if node is out of range.

def message(self, node: int) -> str | None:

Return the commit/tag message, or None if not available.

Raises IndexError if node is out of range.

def tag_name(self, node: int) -> str | None:

Return the tag name, or None if not a release or not available.

Raises IndexError if node is out of range.

def outdegrees(self) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.uint32]]:

Return a numpy uint32 array of outdegrees for all nodes, computed in parallel. The array is indexed by node ID.

def indegrees(self) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.uint32]]:

Return a numpy uint32 array of indegrees for all nodes, computed in parallel. The array is indexed by node ID.

def top_k_out( self, k: int) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.uint64]]:

Return a (k, 2) array: column 0 = node IDs, column 1 = outdegrees.

def top_k_in( self, k: int) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.uint64]]:

Return a (k, 2) array: column 0 = node IDs, column 1 = indegrees.

def subgraph(self, node_types: str) -> FilteredSwhGraph:

Return a FilteredSwhGraph restricted to the given node types.

The constraint string is a comma-separated list of type abbreviations (cnt, dir, ori, rel, rev, snp) or * for all types.

Example::

revrel = g.subgraph("rev,rel")
def forward_graph(self) -> BvGraph:

Load the forward BvGraph from the same base path.

Returns a webgraph.BvGraph instance.

def backward_graph(self) -> BvGraph:

Load the backward (transposed) BvGraph from the same base path.

Returns a webgraph.BvGraph instance.

basepath: str

The base path from which the graph was loaded.

class FilteredSwhGraph:

A view of an SwhGraph restricted to specific node types.

Created by SwhGraph.subgraph(). Node IDs are not renumbered.

def num_nodes(self) -> int:

Return the number of nodes in the underlying (unfiltered) graph.

def outdegree(self, node: int) -> int:

Return the number of successors matching the node-type constraint.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def indegree(self, node: int) -> int:

Return the number of predecessors matching the node-type constraint.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def successors(self, node: int) -> PySuccessorsIterator:

Return an iterator over successors matching the node-type constraint.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def predecessors(self, node: int) -> PySuccessorsIterator:

Return an iterator over predecessors matching the node-type constraint.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def outdegrees(self) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.uint32]]:

Return a numpy array of filtered outdegrees for all nodes, computed in parallel.

Nodes not matching the constraint have degree 0, so that array[node_id] is the filtered outdegree for matching nodes.

def indegrees(self) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.uint32]]:

Return a numpy array of filtered indegrees for all nodes, computed in parallel.

Nodes not matching the constraint have degree 0, so that array[node_id] is the filtered indegree for matching nodes.

def committer_id(self, node: int) -> int | None:

Return the committer person ID, or None if not available.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def author_id(self, node: int) -> int | None:

Return the author person ID, or None if not available.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def node_type(self, node: int) -> PyNodeType:

Return the node type as a PyNodeType enum value.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def committer_timestamp(self, node: int) -> int | None:

Return the committer timestamp (seconds since epoch), or None.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def author_timestamp(self, node: int) -> int | None:

Return the author timestamp (seconds since epoch), or None.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def swhid(self, node: int) -> str:

Return the SWHID of the given node as a string.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def message(self, node: int) -> str | None:

Return the commit/tag message, or None if not available.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def tag_name(self, node: int) -> str | None:

Return the tag name, or None if not a release or not available.

Raises IndexError if node is out of range, or ValueError if the node does not match the constraint.

def top_k_out( self, k: int) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.uint64]]:

Return a (k, 2) array: column 0 = node IDs, column 1 = outdegrees.

def top_k_in( self, k: int) -> numpy.ndarray[tuple[typing.Any, ...], numpy.dtype[numpy.uint64]]:

Return a (k, 2) array: column 0 = node IDs, column 1 = indegrees.

def has_node(self, node: int) -> bool:

Return whether the given node matches the node-type constraint.

class ContributorNamesMap:

Sparse map from contributor IDs to display names.

ContributorNamesMap(path: str)
def get_name(self, contributor_id: int) -> str | None:

Return the display name for the given contributor ID, or None.

class PyNodeType:

SWH node types.

Integer values match the encoding used in the SWH graph: Content=0, Directory=1, Origin=2, Release=3, Revision=4, Snapshot=5.

Content = PyNodeType.Content
Directory = PyNodeType.Directory
Release = PyNodeType.Release
Revision = PyNodeType.Revision
Snapshot = PyNodeType.Snapshot
class PySuccessorsIterator:

Iterator over node IDs (successors or predecessors).