8000 Reinsertion fix, add max_num_vertices argument by thomas0299 · Pull Request #10 · NetworkDismantling/review · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Reinsertion fix, add max_num_vertices argument #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ You can find them in their sub-folders.
#### All algorithms except FINDER and GDM

```bash
conda create -n dismantling python=3.9 boost boost-cpp graph-tool dill tqdm numpy scipy pandas seaborn matplotlib -c anaconda -c conda-forge
conda create -n dismantling python=3.9 boost boost-cpp graph-tool dill tqdm numpy scipy pandas seaborn matplotlib parse pyyaml -c anaconda -c conda-forge

conda activate dismantling
```
Expand All @@ -154,7 +154,7 @@ Please refer to the [PyTorch installation matrix](https://pytorch.org/get-starte

Just as an example, if your host machine supports CUDA (11.8), the command should look like the following:
```bash
conda create --name gdm boost boost-cpp graph-tool dill tqdm numpy scipy pandas seaborn matplotlib python pytorch torchvision torchaudio pytorch-cuda=11.8 pyg -c pyg -c pytorch -c nvidia -c conda-forge -c anaconda
conda create --name gdm boost boost-cpp graph-tool dill tqdm numpy scipy pandas seaborn matplotlib python parse pyyaml pytorch torchvision torchaudio pytorch-cuda=11.8 pyg -c pyg -c pytorch -c nvidia -c conda-forge -c anaconda

conda activate gdm
```
Expand Down
2 changes: 1 addition & 1 deletion network_dismantling/CoreGDM/core_network_dismantler.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def test(args, model, early_stopping_dict: dict = None, networks_provider=None,
"rem_num": np.inf,
})

removals, prediction_time, dismantle_time = dismantler(network=network.copy(),
removals, prediction_time, dismantle_time, _ = dismantler(network=network.copy(),
node_generator=predictor,
generator_args=generator_args,
stop_condition=stop_condition,
Expand Down
2 changes: 1 addition & 1 deletion network_dismantling/GDM/network_dismantler.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def test(args, model, networks_provider, print_model=True, logger=logging.getLog
f"Aiming to reach LCC size {stop_condition} ({stop_condition * 100 / network_size:.3f}%)"
)

removals, prediction_time, dismantle_time = dismantler(network, predictor, generator_args, stop_condition)
removals, prediction_time, dismantle_time, _ = dismantler(network, predictor, generator_args, stop_condition)

peak_slcc = max(removals, key=itemgetter(4))

Expand Down
1 change: 1 addition & 0 deletions network_dismantling/GDM/python_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ def _GDM(
# df=new_df_runs,
test_networks={f"{network_name}": network},
logger=logger,
threshold=args.threshold,
)
except FileNotFoundError as e:
raise RuntimeError(
Expand Down
52 changes: 28 additions & 24 deletions network_dismantling/GDM/reinsert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# You should have received a copy of the GNU General Public License
# along with GDM. If not, see <http://www.gnu.org/licenses/>.

import re
import logging
from ast import literal_eval
from errno import ENOSPC
Expand Down Expand Up @@ -292,9 +293,9 @@ def main(

run.drop(run_columns, inplace=True, errors="ignore")
# Get the removals
removals = run.pop("removals")
removals = literal_eval(removals)

original_removals = run.pop("removals")
original_removals = re.sub(r'np\.float64\(([^)]+)\)', r'\1', original_removals)
original_removals = literal_eval(original_removals)
# Remove the columns that are not needed
run.drop(run_columns,
inplace=True,
Expand All @@ -316,18 +317,18 @@ def main(

if threshold is None:
threshold = run.get("threshold",
removals[-1][3],
original_removals[-1][3],
)

stop_condition = int(np.ceil(threshold * network.num_vertices()))
generator_args = {
"removals": list(map(itemgetter(1), removals)),
"removals": list(map(itemgetter(1), original_removals)),
"stop_condition": stop_condition,
"network_name": network_name,
"logger": logger,
}

removals, _, _ = dismantler(
removals, _, _, last_lcc_size = dismantler(
network=network.copy(),
predictor=predictor,
generator_args=generator_args,
Expand All @@ -336,22 +337,6 @@ def main(
logger=logger,
)

peak_slcc = max(removals, key=itemgetter(4))

_run = {
"network": network_name,
"removals": removals,
"slcc_peak_at": peak_slcc[0],
"lcc_size_at_peak": peak_slcc[3],
"slcc_size_at_peak": peak_slcc[4],
"r_auc": simpson(list(r[3] for r in removals), dx=1),
"rem_num": len(removals),
"threshold": threshold,
}

for key, value in _run.items():
run[key] = value

# Check if something is wrong with the removals
if removals[-1][2] == 0:
# for removal in removals:
Expand All @@ -368,10 +353,29 @@ def main(
last_valid_index = i
else:
break

logger.error(f"Last valid index: {last_valid_index}: {removals[last_valid_index]}")
raise RuntimeError(f"Had to remove too many nodes ({len(removals)})")
removals = removals[:last_valid_index+1]
# raise RuntimeError(f"Had to remove too many nodes ({len(removals)})")

if last_lcc_size > stop_condition: # careful size of LCC not actual %, stop condition is number of nodes not threshold
raise RuntimeError(f"Reinsertion did not ensure a proper dismantling with {last_lcc_size=} > {stop_condition=}.")

peak_slcc = max(removals, key=itemgetter(4))

_run = {
"network": network_name,
"removals": removals,
"slcc_peak_at": peak_slcc[0],
"lcc_size_at_peak": peak_slcc[3],
"slcc_size_at_peak": peak_slcc[4],
"r_auc": simpson(list(r[3] for r in removals), dx=1),
"rem_num": len(removals),
"threshold": threshold,
}

for key, value in _run.items():
run[key] = value

all_runs.append(run)

run_df = pd.DataFrame(data=[run], columns=network_df.columns)
Expand Down
2 changes: 1 addition & 1 deletion network_dismantling/GDM/training_data_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def training_data_extractor(g, threshold=None,
degree = g.get_out_degrees(vertices)

logger.debug("Computing Max degree")
max_degree = np.max(degree)
max_degree = np.max(degree) if len(degree) > 0 else 1

logger.debug("Normalizing the degree")
degree = np.divide(degree, max_degree)
Expand Down
10 changes: 5 additions & 5 deletions network_dismantling/common/dismantlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def threshold_dismantler(
logger.debug("EARLY STOPPING")
break

return removals, None, None
return removals, None, None, None


# TODO REMOVE THIS FROM THE REVIEW. IT IS NOT USED!
Expand Down Expand Up @@ -183,7 +183,7 @@ def kcore_lcc_threshold_dismantler(
# print("EARLY STOPPING")
break

return removals, None, None
return removals, None, None, None


def lcc_threshold_dismantler(
Expand Down Expand Up @@ -270,7 +270,7 @@ def lcc_threshold_dismantler(
generator.close()
break

return removals, None, None
return removals, None, None, None


def lcc_peak_dismantler(
Expand Down Expand Up @@ -380,7 +380,7 @@ def lcc_peak_dismantler(

generator.close()

return removals, None, None # prediction_time, dismantle_time
return removals, None, None, None # prediction_time, dismantle_time


def enqueued(original_function=None,
Expand Down Expand Up @@ -454,7 +454,7 @@ def wrapper(
generator_args["sorting_function"] = function

# kwargs["generator_args"] = generator_args
removals, prediction_time, dismantle_time = dismantler(
removals, prediction_time, dismantle_time, _ = dismantler(
network=network,
predictor=predictor,
generator_args=generator_args,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def _threshold_dismantler(

del predictions_dict

return removals, prediction_time, dismantle_time
return removals, prediction_time, dismantle_time, lcc_size


def lcc_threshold_dismantler(
Expand Down Expand Up @@ -227,7 +227,7 @@ def _iterative_threshold_dismantler(network, predictor, generator_args, stop_con
f"{network_name}: iterative external dismantler returned in {dismantle_time}s"
)

return removals, None, None
return removals, None, None, lcc_size


def iterative_threshold_dismantler(network, predictor, generator_args, stop_condition):
Expand Down
11 changes: 11 additions & 0 deletions network_dismantling/dismantler.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,9 @@ def main(args, logger=logging.getLogger("dummy")):
f"Dismantling {network_name} according to {display_name}. "
f"Aiming to LCC size {stop_condition} ({stop_condition / network_size:.3f})"
)

if network_size > args.max_num_vertices:
continue
# logger.debug(f"dismantling_method_kwargs: {dismantling_method_kwargs}")

generator_args["executor"] = executor
Expand Down Expand Up @@ -657,6 +660,14 @@ def get_df_columns():
help="Number of jobs.",
)

parser.add_argument(
"-mnv",
"--max_num_vertices",
type=int,
default=np.inf,
help="Filter for network size based on number of vertices.",
)

# parser.add_argument(
# "-sa",
# "--simultaneous_access",
Expand Down
2 changes: 1 addition & 1 deletion network_dismantling/heuristics/dismantler.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def main(args):
"STATIC" if mode is True else "DYNAMIC") + " " + display_name,
stop_condition,
stop_condition / network_size))
removals, prediction_time, dismantle_time = dismantler(network=network.copy(),
removals, prediction_time, dismantle_time, _ = dismantler(network=network.copy(),
predictor=generator,
generator_args=generator_args,
stop_condition=stop_condition,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def main(
"logger": logger,
}

removals, _, _ = dismantler(
removals, _, _, _ = dismantler(
network=network.copy(),
predictor=predictor,
generator_args=generator_args,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def main(
"logger": logger,
}

removals, _, _ = dismantler(
removals, _, _, _ = dismantler(
network=network.copy(),
predictor=predictor,
generator_args=generator_args,
Expand Down
0