Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
r""" Centrality
This module is meant for all functions related to centrality in networks.
.. csv-table:: :class: contentstable :widths: 30, 70 :delim: |
:func:`centrality_betweenness` | Return the centrality betweenness of `G` :func:`centrality_closeness_top_k` | Return the k most closeness central vertices of `G`
Functions --------- """ from __future__ import print_function, absolute_import
from libc.string cimport memset from libc.stdint cimport uint32_t from cysignals.memory cimport check_allocarray, sig_free from cysignals.signals cimport sig_check
include "sage/data_structures/bitset.pxi" from sage.graphs.base.static_sparse_graph cimport * from sage.libs.gmp.mpq cimport * from sage.rings.rational cimport Rational from sage.ext.memory_allocator cimport MemoryAllocator
ctypedef fused numerical_type: mpq_t double
cimport cython
r""" Return the centrality betweenness of `G`
The centrality betweenness of a vertex `v\in G` is defined by:
.. MATH::
c(v) = \sum_{s\neq v \neq t} \frac{\#\{\text{shortest } st-\text{paths containing v}\}} {\#\{\text{shortest } st-\text{paths}\}}
For more information, see the :wikipedia:`Betweenness_centrality`.
INPUT:
- ``G`` -- a (di)graph
- ``exact`` (boolean, default: ``False``) -- whether to compute over rationals or on ``double`` C variables.
- ``normalize`` (boolean; default: ``True``) -- whether to renormalize the values by dividing them by `\binom {n-1} 2` (for graphs) or `2\binom {n-1} 2` (for digraphs).
ALGORITHM:
To compute `c(v)`, we fix `s` and define `c_s(v)` as the centrality of `v` *due to* `s`, obtained from the formula above by running the sum over `t` only. We obtain `c(v)=\sum_{s\neq v} c_s(v)`.
For every vertex `s`, we compute the value of `c_s(v)` for all `v`, using the following remark (see [Brandes01]_):
Let `v_1,...,v_k` be the out-neighbors of `v` such that `dist(s,v_i)=dist(s,v)+1`. Then
.. MATH::
c_s(v) = \sum_{1\leq i \leq k} c_s(v_i) \frac{\#\{\text{shortest } sv_i-\text{paths}\}} {\#\{\text{shortest } sv -\text{paths}\}}
The number of shortest paths between `s` and every other vertex can be computed with a slightly modified BFS. While running this BFS we can also store the list of the vertices `v_1,...,v_k` associated with each `v`.
EXAMPLES::
sage: from sage.graphs.centrality import centrality_betweenness sage: centrality_betweenness(digraphs.Circuit(6)) # abs tol 1e-10 {0: 0.5, 1: 0.5, 2: 0.5, 3: 0.5, 4: 0.5, 5: 0.5} sage: centrality_betweenness(graphs.CycleGraph(6)) # abs tol 1e-10 {0: 0.2, 1: 0.2, 2: 0.2, 3: 0.2, 4: 0.2, 5: 0.2}
Exact computations::
sage: graphs.PetersenGraph().centrality_betweenness(exact=True) {0: 1/12, 1: 1/12, 2: 1/12, 3: 1/12, 4: 1/12, 5: 1/12, 6: 1/12, 7: 1/12, 8: 1/12, 9: 1/12}
TESTS:
Compare with NetworkX::
sage: import networkx sage: g = graphs.RandomGNP(100,.2) sage: nw = networkx.betweenness_centrality(g.networkx_graph(copy=False)) sage: sg = centrality_betweenness(g) sage: max(abs(nw[x]-sg[x]) for x in g) # abs tol 1e-10 0
Stupid cases::
sage: centrality_betweenness(Graph()) {} sage: centrality_betweenness(Graph(2)) {0: 0.0, 1: 0.0} sage: centrality_betweenness(Graph(2),exact=1) {0: 0, 1: 0}
REFERENCES:
.. [Brandes01] Ulrik Brandes, A faster algorithm for betweenness centrality, Journal of Mathematical Sociology 25.2 (2001): 163-177, http://www.inf.uni-konstanz.de/algo/publications/b-fabc-01.pdf """ else:
@cython.cdivision(True) r""" Return the centrality betweenness of G (C implementation)
INPUT:
- ``G`` -- a graph
- ``_`` -- this variable is ignored, only its type matters. If it is of type `mpq_t` then computations are made on `Q`, if it is ``double`` the computations are made on ``double``.
- ``normalize`` (boolean; default: ``True``) -- whether to renormalize the values by dividing them by `\binom {n-1} 2` (for graphs) or `2\binom {n-1} 2` (for digraphs).
For more information, see the documentation of ``centrality_betweenness``. """ # Trivial case
# A copy of G, for faster neighbor enumeration cdef short_digraph g
# A second copy, to remember the edges used during the BFS (see doc) cdef short_digraph bfs_dag
cdef bitset_t seen # Vertices whose neighbors have been explored cdef bitset_t next_layer # Unexplored neighbors of vertices in 'seen'
cdef uint32_t * queue # BFS queue cdef uint32_t * degrees # degree[v] = nb of vertices which discovered v
cdef numerical_type * n_paths_from_source cdef numerical_type * betweenness_source cdef numerical_type * betweenness cdef numerical_type mpq_tmp
cdef int layer_current_beginning cdef int layer_current_end cdef int layer_next_end
cdef int source,i,j,u,v cdef uint32_t * p_tmp
if numerical_type is mpq_t:
if numerical_type is double: else:
if numerical_type is double: else:
# initialize data
# The number of shortest paths from 'source' to every other vertex. # # It is a BFS. The graph is explored layer by layer.
# Looking for all non-discovered neighbors of some vertex of the # current layer.
# List the neighbors of u
# Is it a new vertex ?
# Is it the first time we see it ?
# update the count of paths and the BFS dag. if numerical_type is double: else:
# 'next_layer' becomes 'current_layer'
# Compute the betweenness from the number of paths # # We enumerate vertices in reverse order of discovery. if numerical_type is double: else:
# update betweenness from betweenness_source if numerical_type is double: else:
if numerical_type is double: else:
finally:
else:
cdef void _estimate_reachable_vertices_dir(short_digraph g, int* reachL, int* reachU): r""" For each vertex ``v``, bounds the number of vertices reachable from ``v``.
The lower bound is stored in reachL[v], while the upper bound is stored in reachU[v]. These two arrays must be pre-allocated and they must have size at least `n`, where `n` is the number of nodes of `g`.
The estimate works as follows: first, we compute the graph of strongly connected components `\mathcal{G=(V,E)}`, then, for each SCC C, we set:
.. MATH::
L(C)=|C|+\max_{(C,C') \in \mathcal{E}}L(C') \\ U(C)=|C|+\max_{(C,C') \in \mathcal{E}}L(C')
By analyzing strongly connected components in reverse topological order, we are sure that, as soon as we process component `C`, all components `C'` appearing on the right hand side have already been processed. A further improvement on these bounds is obtained by exactly computing the number of vertices reachable from the biggest strongly connected component, and handle this component separately.
Then, for each vertex ``v``, we set ``reachL[v]=L(C)``, where ``C`` is the strongly connected component containing ``v``.
INPUT:
``g`` (short_digraph): the input graph;
OUTPUT:
``reachL``, ``reachU``: two arrays that should be allocated outside this function and that should have size at least ``g.n``. At the end, ``reachL[v]`` (resp., ``reachU[v]``) will contain the lower (resp., upper) bound on the number of reachable vertices from ``v``. """ cdef short_digraph sccgraph
# We need uint64_t because these values may become much bigger than g.n, # up to g.n^2, during the computation. Only at the end, we set reachL and # reachU as the maximum between g.n and the computed value (so that they # can be converted to int without overflow).
# Variables used in BFS from the largest strongly connected component cdef uint32_t startq, endq cdef uint32_t *neigh_start cdef uint32_t *neigh_end
# Compute scc_sizes
# Compute maxscc maxscc = i
# BFS to compute number of reachable vertices for the biggest SCC.
w = neigh_start[0] if not reached[w]: reached[w] = 1 nreach_maxscc += scc_sizes[w] q[endq] = w endq += 1 neigh_start += 1
# Dynamic programming to estimate number of reachable vertices for other # SCCs
# Note that this might become much bigger than g.n, up to g.n*g.n. # Hence we used uint64_t, and only at the end we take the minimum # between this value and g.n (since g.n is an upper bound on # the number of reachable vertices).
cdef void _compute_reachable_vertices_undir(short_digraph g, int* reachable): r""" For each vertex ``v``, computes the number of vertices reachable from ``v``.
The number of vertices reachable from ``v`` (which is the size of the connected component containing ``v``) is stored in variable ``reachable[v]``. The array ``reachable`` is assumed to be allocated outside this function, and it is assumed to have size at least ``g.n``. """ cdef int i
cdef int v, w cdef uint32_t *neigh_start cdef uint32_t *neigh_end cdef uint32_t startq, endq cdef list currentcc
# BFS from i
cdef void _sort_vertices_degree(short_digraph g, int *sorted_verts): r""" Sorts vertices in decreasing order of degree.
Uses counting sort, since degrees are between `0` and `n-1`: the running time is then `O(n)`. """ cdef int d, v
# Otherwise, segmentation fault return
r""" Computes the k vertices with largest closeness centrality.
The algorithm is based on performing a breadth-first-search (BFS) from each vertex, and to use bounds in order to cut these BFSes as soon as possible. If k is small, it is much more efficient than computing all centralities with :meth:`~sage.graphs.generic_graph.GenericGraph.centrality_closeness`. Conversely, if k is close to the number of nodes, the running-time is approximately the same (it might even be a bit longer, because more computations are needed). For more information, see [BCM15]_. The algorithm does not work on weighted graphs.
INPUT:
- ``G`` a Sage Graph or DiGraph;
- ``k`` (integer, default: 1): the algorithm will return the ``k`` vertices with largest closeness centrality. This value should be between 1 and the number of vertices with positive (out)degree, because the closeness centrality is not defined for vertices with (out)degree 0. If ``k`` is bigger than this value, the output will contain all vertices of positive (out)degree.
- ``verbose`` (integer, default: 0): an integer defining how "verbose" the algorithm should be. If 0, nothing is printed, if 1, we print only the performance ratio at the end of the algorithm, if 2, we print partial results every 1000 visits, if 3, we print partial results after every visit.
OUTPUT:
An ordered list of ``k`` pairs ``(closv, v)``, where ``v`` is one of the ``k`` most central vertices, and ``closv`` is its closeness centrality. If ``k`` is bigger than the number of vertices with positive (out)degree, the list might be smaller.
REFERENCES:
.. [BCM15] Michele Borassi, Pierluigi Crescenzi, and Andrea Marino, *Fast and Simple Computation of Top-k Closeness Centralities*. :arxiv:`1507.01490`
EXAMPLES::
sage: from sage.graphs.centrality import centrality_closeness_top_k sage: g = graphs.PathGraph(10) sage: centrality_closeness_top_k(g, 4, 1) Final performance ratio: 0.711111111111 [(0.36, 5), (0.36, 4), (0.3333333333333333, 6), (0.3333333333333333, 3)] sage: g = digraphs.Path(10) sage: centrality_closeness_top_k(g, 5, 1) Final performance ratio: 0.422222222222 [(0.2, 0), (0.19753086419753085, 1), (0.19444444444444442, 2), (0.19047619047619047, 3), (0.18518518518518517, 4)]
TESTS:
If ``k`` or ``verbose`` is not an integer::
sage: from sage.graphs.centrality import centrality_closeness_top_k sage: g = digraphs.Path(10) sage: centrality_closeness_top_k(g, 'abc', 1) Traceback (most recent call last): ... TypeError: an integer is required sage: centrality_closeness_top_k(g, 1, 'abc') Traceback (most recent call last): ... TypeError: an integer is required
If ``k`` is bigger than the number of nodes::
sage: from sage.graphs.centrality import centrality_closeness_top_k sage: g = graphs.PathGraph(5) sage: centrality_closeness_top_k(g, 10, 0) [(0.6666666666666666, 2), (0.5714285714285714, 3), (0.5714285714285714, 1), (0.4, 4), (0.4, 0)]
Empty graph::
sage: from sage.graphs.centrality import centrality_closeness_top_k sage: g = Graph() sage: centrality_closeness_top_k(g, 10, 0) [] sage: g = Graph(10) sage: centrality_closeness_top_k(g, 10, 0) []
The result is correct::
sage: from sage.graphs.centrality import centrality_closeness_top_k sage: import random sage: n = 20 sage: m = random.randint(1, n*(n-1) / 2) sage: k = random.randint(1, n) sage: g = graphs.RandomGNM(n,m) sage: topk = centrality_closeness_top_k(g, k) sage: centr = g.centrality_closeness(algorithm='BFS') sage: sorted_centr = sorted(centr.values(), reverse=True) sage: assert(len(topk)==min(k, len(sorted_centr))) sage: for i in range(len(topk)): ....: assert(abs(topk[i][0] - sorted_centr[i]) < 1e-12)
Directed case::
sage: from sage.graphs.centrality import centrality_closeness_top_k sage: import random sage: n = 20 sage: m = random.randint(1, n*(n-1)) sage: k = random.randint(1, n) sage: g = digraphs.RandomDirectedGNM(n,m) sage: topk = centrality_closeness_top_k(g, k) sage: centr = g.centrality_closeness(algorithm='BFS') sage: sorted_centr = sorted(centr.values(), reverse=True) sage: assert(len(topk)==min(k, len(sorted_centr))) sage: for i in range(len(topk)): ....: assert(abs(topk[i][0] - sorted_centr[i]) < 1e-12) """
return []
cdef short_digraph sd # Copying the whole graph to obtain the list of neighbors quicker than by # calling out_neighbors. This data structure is well documented in the # module sage.graphs.base.static_sparse_graph cdef int *reachU cdef int d, nd, x, v, w cdef long f, gamma cdef double tildefL, tildefU cdef bint stopped cdef uint32_t * p_tmp
else:
# We start a BFSCut from x:
# We reset variable seen:
# We are at level 0, and gamma is the number of arcs exiting level 0 # (hence, deg(x)).
# The graph is explored layer by layer.
# We update our estimate of the farness of v. # The estimate sets distance d+1 to gamma vertices (which is an # upper bound on the number of vertices at distance d+1 from v), # and distance d+2 to all other vertices reachable from x.
# Looking for all non-discovered neighbors of some vertex of the # current layer.
# List the neighbors of u
# Is it a new vertex ? farness[x] = n stopped = True break # 'next_layer' becomes 'current_layer'
print("Visit {} from {}:".format(nvis, x)) print(" Lower bound: {}".format(1 / kth)) print(" Perf. ratio: {}".format(visited / (nvis * <double> (sd.neighbors[sd.n]-sd.edges))))
|