<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://docs.scinet.utoronto.ca/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=Pinto</id>
	<title>SciNet Users Documentation - User contributions [en]</title>
	<link rel="self" type="application/atom+xml" href="https://docs.scinet.utoronto.ca/api.php?action=feedcontributions&amp;feedformat=atom&amp;user=Pinto"/>
	<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php/Special:Contributions/Pinto"/>
	<updated>2026-05-06T01:26:21Z</updated>
	<subtitle>User contributions</subtitle>
	<generator>MediaWiki 1.35.12</generator>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7643</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7643"/>
		<updated>2026-04-07T02:12:20Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Up3 | OnDemand|https://docs.alliancecan.ca/wiki/Trillium_Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | S4H | S4H}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Mon Apr 06, 2026, 10:00 pm:''' We will have to reschedule the HPSS update. This attempt didn't work as expected&lt;br /&gt;
&lt;br /&gt;
'''Mon Apr 06, 2026, 8:00 pm:''' HPSS scheduled maintenance: update of HPSS to v11.3_u4 and hsi-htar to v11.3_u1 (bug fixes)&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7640</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7640"/>
		<updated>2026-04-07T02:12:01Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Up3 | OnDemand|https://docs.alliancecan.ca/wiki/Trillium_Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{partial | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{down | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | S4H | S4H}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Mon Apr 06, 2026, 10:00 pm:''' We will have to reschedule the HPSS update. This attempt didn't work as expected&lt;br /&gt;
&lt;br /&gt;
'''Mon Apr 06, 2026, 8:00 pm:''' HPSS scheduled maintenance: update of HPSS to v11.3_u4 and hsi-htar to v11.3_u1 (bug fixes)&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7637</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7637"/>
		<updated>2026-04-06T13:14:32Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Up3 | OnDemand|https://docs.alliancecan.ca/wiki/Trillium_Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{partial | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{down | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | S4H | S4H}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Mon Apr 06, 2026, 8:00 pm:''' HPSS scheduled maintenance: update of HPSS to v11.3_u4 and hsi-htar to v11.3_u1 (bug fixes)&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7628</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7628"/>
		<updated>2026-04-02T17:54:58Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Up3 | OnDemand|https://docs.alliancecan.ca/wiki/Trillium_Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | S4H | S4H}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Mon Apr 06, 2026, 8:00 pm:''' HPSS scheduled maintenance: update of HPSS to v11.3_u4 and hsi-htar to v11.3_u1 (bug fixes)&lt;br /&gt;
&lt;br /&gt;
'''Wed Mar 25, 2026, 5:00 pm:''' Trillium is operational again.&lt;br /&gt;
&lt;br /&gt;
'''Wed Mar 25, 2026, 9:00 am:''' Teach is operational again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Mar 24, 2026, 8:45 pm:''' Open OnDemand is operational again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Mar 24, 2026, 1:00 pm:''' External connectivity is back. &lt;br /&gt;
&lt;br /&gt;
'''Tue Mar 24, 2026, 12:05 pm:''' External connectivity to the data centre was lost. &lt;br /&gt;
&lt;br /&gt;
'''Tue Mar 24, 2026, 7:00 am:''' Maintenance has started.&lt;br /&gt;
&lt;br /&gt;
'''Mon Mar 16, 2026. 13:30pm''' Recovering.  Almost all systems are up again. Please resubmit your jobs that crashed. &lt;br /&gt;
&lt;br /&gt;
'''Mon Mar 16, 2026. 12:00pm''' Power glitch at the data centre caused compute nodes to go down.&lt;br /&gt;
&lt;br /&gt;
'''Thu Mar 12, 2026, 4:15 pm''' Connection to Trillium are operational again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Mar 12, 2026, 1:00 pm''' We've had some login issues particularly for Trillium-GPU. We're investigating.&lt;br /&gt;
&lt;br /&gt;
'''Downtime Announcement:'''  The winter cooling tower maintenance for the SciNet data centre will take place on March 24 and 25, 2026, starting at 7:00 a.m. on the 24th.  All SciNet systems (Trillium, OnDemand, Balam, S4H, Teach, as well as hosted equipment) will have their compute nodes shut down. Login nodes, file systems, and the HPSS system will remain available, and&lt;br /&gt;
jobs will be held in the queue until maintenance is complete.  Starting 7am on Mar 23, users are encouraged to submit small and short jobs that may be scheduled before the maintenance begins.&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb 20, 2026, 11:35 pm:''' Power glitch, ~480 compute nodes rebooted. Regional power quality has been quite poor lately ([https://www.yorkregion.com/news/road-salt-blamed-for-power-outages/article_1a36d25d-5f97-56ee-a0c7-c49c7b732d38.html 1],&lt;br /&gt;
[https://www.yorkregion.com/news/power-company-executive-responds-to-york-region-outages/article_c4d072e7-2892-5c9c-8deb-ac5e1936779c.html 2]).&lt;br /&gt;
&lt;br /&gt;
'''Thu Feb 19, 2026, 3:00 pm:''' Systems restored. Please report issues to support@scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Tue Feb 17, 2026, 8:40 am:''' Power outage at the data centre.  Cooling issues have developed as a result.  Major systems (Trillium, S4H) are expected to be down until sometime Thursday. Login nodes and file systems will remain accessible.&lt;br /&gt;
&lt;br /&gt;
'''Mon Feb 16, 2026, 8:40 pm:''' Electricity is unstable in the data centre area due to severe snowfall.&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 29, 2026, 1:40 pm:''' All services are operational again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 29, 2026, 12:00 pm:''' The Trillium and Open OnDemand compute nodes are operational again. We are still working on bringing Balam, Neptune and S4H nodes up again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 29, 2026, 10:00 am:''' There was a power glitch at the data centre overnight. The login nodes are accessible but the compute nodes are down.  &lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7625</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7625"/>
		<updated>2026-03-26T13:51:50Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Up3 | OnDemand|https://docs.alliancecan.ca/wiki/Trillium_Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | S4H | S4H}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Wed Mar 25, 2026, 5:00 pm:''' Trillium is operational again.&lt;br /&gt;
&lt;br /&gt;
'''Wed Mar 25, 2026, 9:00 am:''' Teach is operational again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Mar 24, 2026, 8:45 pm:''' Open OnDemand is operational again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Mar 24, 2026, 1:00 pm:''' External connectivity is back. &lt;br /&gt;
&lt;br /&gt;
'''Tue Mar 24, 2026, 12:05 pm:''' External connectivity to the data centre was lost. &lt;br /&gt;
&lt;br /&gt;
'''Tue Mar 24, 2026, 7:00 am:''' Maintenance has started.&lt;br /&gt;
&lt;br /&gt;
'''Mon Mar 16, 2026. 13:30pm''' Recovering.  Almost all systems are up again. Please resubmit your jobs that crashed. &lt;br /&gt;
&lt;br /&gt;
'''Mon Mar 16, 2026. 12:00pm''' Power glitch at the data centre caused compute nodes to go down.&lt;br /&gt;
&lt;br /&gt;
'''Thu Mar 12, 2026, 4:15 pm''' Connection to Trillium are operational again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Mar 12, 2026, 1:00 pm''' We've had some login issues particularly for Trillium-GPU. We're investigating.&lt;br /&gt;
&lt;br /&gt;
'''Downtime Announcement:'''  The winter cooling tower maintenance for the SciNet data centre will take place on March 24 and 25, 2026, starting at 7:00 a.m. on the 24th.  All SciNet systems (Trillium, OnDemand, Balam, S4H, Teach, as well as hosted equipment) will have their compute nodes shut down. Login nodes, file systems, and the HPSS system will remain available, and&lt;br /&gt;
jobs will be held in the queue until maintenance is complete.  Starting 7am on Mar 23, users are encouraged to submit small and short jobs that may be scheduled before the maintenance begins.&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb 20, 2026, 11:35 pm:''' Power glitch, ~480 compute nodes rebooted. Regional power quality has been quite poor lately ([https://www.yorkregion.com/news/road-salt-blamed-for-power-outages/article_1a36d25d-5f97-56ee-a0c7-c49c7b732d38.html 1],&lt;br /&gt;
[https://www.yorkregion.com/news/power-company-executive-responds-to-york-region-outages/article_c4d072e7-2892-5c9c-8deb-ac5e1936779c.html 2]).&lt;br /&gt;
&lt;br /&gt;
'''Thu Feb 19, 2026, 3:00 pm:''' Systems restored. Please report issues to support@scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Tue Feb 17, 2026, 8:40 am:''' Power outage at the data centre.  Cooling issues have developed as a result.  Major systems (Trillium, S4H) are expected to be down until sometime Thursday. Login nodes and file systems will remain accessible.&lt;br /&gt;
&lt;br /&gt;
'''Mon Feb 16, 2026, 8:40 pm:''' Electricity is unstable in the data centre area due to severe snowfall.&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 29, 2026, 1:40 pm:''' All services are operational again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 29, 2026, 12:00 pm:''' The Trillium and Open OnDemand compute nodes are operational again. We are still working on bringing Balam, Neptune and S4H nodes up again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 29, 2026, 10:00 am:''' There was a power glitch at the data centre overnight. The login nodes are accessible but the compute nodes are down.  &lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7493</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7493"/>
		<updated>2026-01-17T04:12:02Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Up3 | OnDemand|https://docs.alliancecan.ca/wiki/Trillium_Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | S4H | S4H}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 16, 2026, 11:00 pm:''' HPSS is back online, and accessible via alliancecan#hpss Globus endpoint. &lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 15, 2026, 10:00 pm:''' HPSS will undergo maintenance on Friday morning, Jan/16/2025, , including alliancecan#hpss Globus endpoint &lt;br /&gt;
&lt;br /&gt;
'''Tue Jan 6, 2026, 10:15 am:''' OnDemand has been fixed and is working again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jan 5, 2026, 9:00 pm:''' The authentication mechanism of OnDemand is not working.&lt;br /&gt;
&lt;br /&gt;
'''Wed Dec 31, 2025, 12:40 pm:''' We believe the problem has now been resolved.  Please let us know if you still experience login problems or aborted jobs.&lt;br /&gt;
&lt;br /&gt;
'''Tue Dec 30, 2025, 2:10 pm:''' We are experiencing problems with authentication, resulting in failed logins, OOD errors, and aborted jobs (with &amp;quot;prolog error&amp;quot;).  Please bear with us, as we are very short-staffed during the holiday break.  We will post updates here.&lt;br /&gt;
&lt;br /&gt;
'''Tue Dec 3, 2025, 11:30 am:''' Open OnDemand is fully operational again.&lt;br /&gt;
&lt;br /&gt;
'''Sat Nov 29, 2025, 00:40 am:''' There has been a problem with the water chiller. Some systems are offline.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 12:55 pm:''' Balam is back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 10:00 am:''' Open OnDemand is back online.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 11:00 pm:''' Most of the work is done, data movers, Globus, and HPSS are back online. Remaining services will be worked on tomorrow.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 8:30 am:''' Scheduled network maintenance. Trillium cluster is *not* affected.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 17:30 am:''' Balam maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 7:00 am:''' Balam maintenance day.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:55 pm:''' Trillium inbound connections through trillium.alliancecan.ca or trillium.scinet.utoronto.ca are working again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:05 pm:''' Trillium is experiencing external network issues for both incoming traffic. Please try: ssh USERNAME@tri-login01.scinet.utoronto.ca in the meantime.&lt;br /&gt;
 &lt;br /&gt;
'''Thu Oct 06, 2025, 8:00 pm:''' HPSS is fully functional. You may submit archive jobs from trillium login nodes, datamovers and robots.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. Directory tree now follows the other Alliance clusters. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7490</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7490"/>
		<updated>2026-01-16T03:08:19Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Up3 | OnDemand|https://docs.alliancecan.ca/wiki/Trillium_Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | S4H | S4H}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 15, 2026, 10:00 pm:''' HPSS will undergo maintenance on Friday morning, Jan/16/2025, , including alliancecan#hpss Globus endpoint &lt;br /&gt;
&lt;br /&gt;
'''Tue Jan 6, 2026, 10:15 am:''' OnDemand has been fixed and is working again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jan 5, 2026, 9:00 pm:''' The authentication mechanism of OnDemand is not working.&lt;br /&gt;
&lt;br /&gt;
'''Wed Dec 31, 2025, 12:40 pm:''' We believe the problem has now been resolved.  Please let us know if you still experience login problems or aborted jobs.&lt;br /&gt;
&lt;br /&gt;
'''Tue Dec 30, 2025, 2:10 pm:''' We are experiencing problems with authentication, resulting in failed logins, OOD errors, and aborted jobs (with &amp;quot;prolog error&amp;quot;).  Please bear with us, as we are very short-staffed during the holiday break.  We will post updates here.&lt;br /&gt;
&lt;br /&gt;
'''Tue Dec 3, 2025, 11:30 am:''' Open OnDemand is fully operational again.&lt;br /&gt;
&lt;br /&gt;
'''Sat Nov 29, 2025, 00:40 am:''' There has been a problem with the water chiller. Some systems are offline.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 12:55 pm:''' Balam is back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 10:00 am:''' Open OnDemand is back online.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 11:00 pm:''' Most of the work is done, data movers, Globus, and HPSS are back online. Remaining services will be worked on tomorrow.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 8:30 am:''' Scheduled network maintenance. Trillium cluster is *not* affected.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 17:30 am:''' Balam maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 7:00 am:''' Balam maintenance day.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:55 pm:''' Trillium inbound connections through trillium.alliancecan.ca or trillium.scinet.utoronto.ca are working again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:05 pm:''' Trillium is experiencing external network issues for both incoming traffic. Please try: ssh USERNAME@tri-login01.scinet.utoronto.ca in the meantime.&lt;br /&gt;
 &lt;br /&gt;
'''Thu Oct 06, 2025, 8:00 pm:''' HPSS is fully functional. You may submit archive jobs from trillium login nodes, datamovers and robots.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. Directory tree now follows the other Alliance clusters. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7487</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7487"/>
		<updated>2026-01-16T03:07:26Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Up3 | OnDemand|https://docs.alliancecan.ca/wiki/Trillium_Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | S4H | S4H}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 15, 2026, 10:00 pm:''' HPSS will undergo maintenance on Friday morning, Jan/16/2025&lt;br /&gt;
&lt;br /&gt;
'''Tue Jan 6, 2026, 10:15 am:''' OnDemand has been fixed and is working again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jan 5, 2026, 9:00 pm:''' The authentication mechanism of OnDemand is not working.&lt;br /&gt;
&lt;br /&gt;
'''Wed Dec 31, 2025, 12:40 pm:''' We believe the problem has now been resolved.  Please let us know if you still experience login problems or aborted jobs.&lt;br /&gt;
&lt;br /&gt;
'''Tue Dec 30, 2025, 2:10 pm:''' We are experiencing problems with authentication, resulting in failed logins, OOD errors, and aborted jobs (with &amp;quot;prolog error&amp;quot;).  Please bear with us, as we are very short-staffed during the holiday break.  We will post updates here.&lt;br /&gt;
&lt;br /&gt;
'''Tue Dec 3, 2025, 11:30 am:''' Open OnDemand is fully operational again.&lt;br /&gt;
&lt;br /&gt;
'''Sat Nov 29, 2025, 00:40 am:''' There has been a problem with the water chiller. Some systems are offline.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 12:55 pm:''' Balam is back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 10:00 am:''' Open OnDemand is back online.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 11:00 pm:''' Most of the work is done, data movers, Globus, and HPSS are back online. Remaining services will be worked on tomorrow.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 8:30 am:''' Scheduled network maintenance. Trillium cluster is *not* affected.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 17:30 am:''' Balam maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 7:00 am:''' Balam maintenance day.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:55 pm:''' Trillium inbound connections through trillium.alliancecan.ca or trillium.scinet.utoronto.ca are working again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:05 pm:''' Trillium is experiencing external network issues for both incoming traffic. Please try: ssh USERNAME@tri-login01.scinet.utoronto.ca in the meantime.&lt;br /&gt;
 &lt;br /&gt;
'''Thu Oct 06, 2025, 8:00 pm:''' HPSS is fully functional. You may submit archive jobs from trillium login nodes, datamovers and robots.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. Directory tree now follows the other Alliance clusters. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=HPSS&amp;diff=7454</id>
		<title>HPSS</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=HPSS&amp;diff=7454"/>
		<updated>2025-12-24T00:28:13Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* HTAR */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{|align=right&lt;br /&gt;
|align=center|'''Topology Overview'''&lt;br /&gt;
|-&lt;br /&gt;
|[[Image:HPSS-overview.jpg|right|x600px]]&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
== High Performance Storage System ==&lt;br /&gt;
&lt;br /&gt;
The High Performance Storage System ([https://www.hpss-collaboration.org/index.shtml HPSS] [https://en.wikipedia.org/wiki/High_Performance_Storage_System wikipedia]) is a tape-backed hierarchical storage system that provides a significant portion of the allocated storage space at SciNet. It is a repository for archiving data that is not being actively used. Data can be returned to the active shared parallel filesystem on Trillium (&amp;quot;VAST&amp;quot;) when it is needed.  &lt;br /&gt;
&lt;br /&gt;
In the context of the {{Alliance}} Resource Allocations, it is called 'nearline'.&lt;br /&gt;
&lt;br /&gt;
Since this system is intended for large data storage, it is accessible only to groups who have been awarded storage space at SciNet beyond 5TB in the yearly RAC resource allocation round. However, upon request, any user may be awarded access to HPSS, up to 2TB per group, so that you may get familiar with the system (just email support@scinet.utoronto.ca)&lt;br /&gt;
&lt;br /&gt;
Access and transfer of data into and out of HPSS is done under the control of the user, whose interaction is expected to be scripted and submitted as a batch job, using one or more of the following utilities:&lt;br /&gt;
* [https://www.racf.bnl.gov/Facility/HPSS/Documentation/HSI/doc_intro.html HSI] is a client with an ftp-like functionality which can be used to archive and retrieve large files. It is also useful for browsing the contents of HPSS.&lt;br /&gt;
* [https://www.sdcc.bnl.gov/sites/default/files/2021-09/htar.txt HTAR] is a utility that creates tar formatted archives directly into HPSS. It also creates a separate index file (.idx) that can be accessed and browsed quickly.&lt;br /&gt;
* [https://support.scinet.utoronto.ca/wiki/index.php/ISH ISH] is a TUI utility that can perform an inventory of the files and directories in your tarballs.&lt;br /&gt;
&lt;br /&gt;
We're currently running HPSS v 11.2.&lt;br /&gt;
&lt;br /&gt;
== Why should I use and trust HPSS? ==&lt;br /&gt;
* HPSS is a 25 year-old collaboration between IBM and the DoE labs in the US, and is used by about 45 facilities in the [http://www.top500.org “Top 500”] HPC list (plus some black-sites).&lt;br /&gt;
* Over 2.5 ExaBytes of combined storage world-wide.&lt;br /&gt;
* The top 3 sites in the World report (fall 2017) having 360PB, 220PB and 125PB in production (ECMWF, UKMO and BNL)&lt;br /&gt;
* Environment Canada also adopted HPSS in 2017 to store  Nav Canada data as well as to serve as their own archive. Currently has 2 X 100PB capacity installed. &lt;br /&gt;
* The SciNet HPSS system has been providing nearline capacity for important research data in Canada since early 2011, already at 10PB levels in 2018&lt;br /&gt;
* Very reliable, data redundancy and data insurance built-in (dual copies of everything are kept on tapes at SciNet)&lt;br /&gt;
* Data on cache and tapes can be geo-distributed for further resilience and HA.&lt;br /&gt;
* Highly scalable; current performance at SciNet - after the hardware and software upgrades in 2019 - Ingest and Recalls ~150 TB/day aggregated.&lt;br /&gt;
* HSI/HTAR clients also very reliable and used on several HPSS sites. ISH was written at SciNet.&lt;br /&gt;
* [[Media:HPSS_rationale_SNUG.pdf|HPSS fits well with the Storage Capacity Expansion Plan at SciNet]] (pdf presentation)&lt;br /&gt;
&lt;br /&gt;
== Guidelines ==&lt;br /&gt;
* A large portion of the storage capacity of HPSS is provided on tape -- a media that is not suited for storing small files. Files smaller than ~200MB should be grouped into tarballs with '''tar''' or '''htar'''.&lt;br /&gt;
* If you intent to use '''HSI''' or '''Globus''' to ingest material, you should favor tarballs over expanded directory trees. &amp;lt;font color=red&amp;gt;Your average file size on archive should be over 1GB/file&amp;lt;/font&amp;gt;.&lt;br /&gt;
* Optimal performance for aggregated transfers and allocation on tapes is obtained with [[Why not tarballs too large |&amp;lt;font color=red&amp;gt;tarballs of size 1TB or less&amp;lt;/font&amp;gt;]], whether ingested by htar or hsi ([[Why not tarballs too large |&amp;lt;font color=red&amp;gt;for good reasons&amp;lt;/font&amp;gt;]])&lt;br /&gt;
* We strongly urge that you use the sample scripts we are providing as the basis for your job submissions.&lt;br /&gt;
* Make sure to check the exit codes of each of the steps in your scripts, and check any returned logs for errors after any data transfer or tarball creation process.&lt;br /&gt;
&lt;br /&gt;
== &amp;lt;font color=red&amp;gt;'''#### New to the HPSS/nearline/archive System? ####'''&amp;lt;/font&amp;gt; ==&lt;br /&gt;
&lt;br /&gt;
First, nearline, archive, and HPSS all mean the same thing at SciNet.  HPSS is the true name of this storage system.&lt;br /&gt;
&lt;br /&gt;
Second, HPSS is not a regular file system, and as such is not mounted on the regular login, compute, and datamover nodes. Interaction with the HPSS system is therefore different than using simple copy, move, and tar commands.&lt;br /&gt;
&lt;br /&gt;
Before trying to use the system, you need to confirm you are part of a group that already has a HPSS RAC allocation, otherwise, email SciNet support and request an HPSS account (or else you will get &amp;quot;Error - authentication/initialization failed&amp;quot; and 71 exit codes). &lt;br /&gt;
&lt;br /&gt;
THIS set of instructions on the wiki is the best and most compressed &amp;quot;manual&amp;quot; we have on how to use HPSS. It may seem a bit overwhelming at first, because of all the job script templates we make available below (they are here so you don't have to think &lt;br /&gt;
too much, just copy and paste), but if you approach the index at the top as a &amp;quot;case switch&amp;quot; mechanism for what you intend to do, everything falls in place.&lt;br /&gt;
&lt;br /&gt;
== The 3 ways to access HPSS ==&lt;br /&gt;
* Submitting jobs to the [[HPSS#Access_Through_the_Queue_System | Trillium archive partitions]] (archiveshort or archivelong), and using HTAR or HSI on the archive02 node. These tools are optimized for speed and scale, to efficiently deal with small files, and are very resilient to timeouts due to slow tape access. That is our recommendation for large transfers.&lt;br /&gt;
* Using Globus, which can be a very good trade off between the efficiency of HSI/HTAR and the convenience of a WebGUI. You may transfer data between 2 endpoints. For instance:&lt;br /&gt;
  alliancecan#trillium&lt;br /&gt;
  alliancecan#hpss&lt;br /&gt;
* Submitting an interactive job to the [[HPSS#Access_Through_the_Queue_System | VFS partition]] (vfsshort), and using standard linux tools (cp, rsync, ls, cd, ...) to access the VFS based /archive mount point on the vfs02 node. We don't recommend this way, unless you are transferring only a handful of files, or just navigating the naming-space (directory tree similar to the other Trillium file systems).&lt;br /&gt;
  /archive&lt;br /&gt;
&lt;br /&gt;
Try this sequence:&lt;br /&gt;
&lt;br /&gt;
1) [https://docs.scinet.utoronto.ca/index.php/HPSS#Access_Through_an_Interactive_HSI_session take a look around HPSS using an interactive HSI session]&lt;br /&gt;
&lt;br /&gt;
(most linux shell commands have an equivalent in HPSS)&lt;br /&gt;
&lt;br /&gt;
2) [https://docs.scinet.utoronto.ca/index.php/HPSS#Sample_tarball_create archive a small test directory using HTAR]&lt;br /&gt;
&lt;br /&gt;
2a) use step 1) to see what happened&lt;br /&gt;
&lt;br /&gt;
3) [https://docs.scinet.utoronto.ca/index.php/HPSS#Sample_data_offload archive a file using hsi]&lt;br /&gt;
&lt;br /&gt;
3a) use step 1) to see what happened&lt;br /&gt;
&lt;br /&gt;
4) [https://docs.scinet.utoronto.ca/index.php/HPSS#Sample_transferring_directories archive a small test directory using HSI]&lt;br /&gt;
&lt;br /&gt;
4a) use step 1) to see what happened&lt;br /&gt;
&lt;br /&gt;
5) now try the other cases and so on. In a couple of hours you'll be in pretty good shape.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
== Access Through the Queue System  ==&lt;br /&gt;
All access to the archive system is done through the [https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler Trillium queue system].&lt;br /&gt;
&lt;br /&gt;
* Job submissions should be done to the 'archivelong', 'archiveshort' or 'vfsshort' partitions.&lt;br /&gt;
* Short jobs are limited to 1H walltime by default. Long jobs (&amp;gt; 1H) are limited to 72H walltime.&lt;br /&gt;
* Users are limited to only 2 long jobs and 2 short jobs at the same time, and 10 jobs total on the each queue.&lt;br /&gt;
* There can only be 5 long jobs running at any given time overall. Remaining submissions will be placed on hold for the time being. So far we have not seen a need for overall limit on short jobs.&lt;br /&gt;
* Jobs to the 'archivelong' and 'archiveshort' partitions must use [[HPSS#HTAR | htar]] or [[HPSS#HSI | hsi]]. These tools are optimized for speed and scale, to efficiently deal with small files, and are very resilient to timeouts due to slow tape access.&lt;br /&gt;
* The 'vfsshort' partition can only be used for interactive jobs, so that you can navigate the VFS based /archive mount point (Virtual File System), and/or make small file transfers to/from HPSS using standard linux tools (ls, cd, cp, rsync, etc). This access is always limited to 1 hour.&lt;br /&gt;
&lt;br /&gt;
You can submit your job like this:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
 sbatch jobscript.sh (to 'archivelong' or 'archiveshort')&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
And you can get an interactive session like this:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
 salloc -p archiveshort&lt;br /&gt;
    OR&lt;br /&gt;
 salloc -p vfsshort&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
[https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler You may learn SLURM in more details here.]&lt;br /&gt;
&lt;br /&gt;
The status of pending jobs can be monitored with squeue specifying the archive partition:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
squeue -p archiveshort&lt;br /&gt;
  OR&lt;br /&gt;
squeue -p archivelong&lt;br /&gt;
  OR&lt;br /&gt;
squeue -p vfsshort&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== Access Through an Interactive HSI session  ==&lt;br /&gt;
* You may want to acquire an interactive shell, start an HSI session and navigate the archive naming-space. Keep in mind, you're restricted to 1H.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
pinto@tri-login02:~$ salloc -p archiveshort -t 1:00:00&lt;br /&gt;
salloc: Granted job allocation 50918&lt;br /&gt;
salloc: Waiting for resource configuration&lt;br /&gt;
salloc: Nodes hpss-archive02-ib are ready for job&lt;br /&gt;
hpss-archive02-ib:~$&lt;br /&gt;
&lt;br /&gt;
hpss-archive02-ib:~$ hsi    (DON'T FORGET TO START HSI)&lt;br /&gt;
******************************************************************&lt;br /&gt;
*     Welcome to HPSS@SciNet - High Perfomance Storage System    *&lt;br /&gt;
*                                                                * &lt;br /&gt;
*            INFO: THIS IS THE NEW 7.5.1 HPSS SYSTEM!            *&lt;br /&gt;
*                                                                *&lt;br /&gt;
*        Contact Information: support@scinet.utoronto.ca         *&lt;br /&gt;
*  NOTE: do not transfer SMALL FILES with HSI. Use HTAR instead  *&lt;br /&gt;
*              CHECK THE INTEGRITY OF YOUR TARBALLS              *&lt;br /&gt;
****************************************************************** &lt;br /&gt;
[HSI]/archive/scinet/pinto-&amp;gt; ls&lt;br /&gt;
&lt;br /&gt;
[HSI]/archive/scinet/pinto-&amp;gt; cd &amp;lt;some directory&amp;gt;&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
NOTE: the VFS based /archive mount point is not visible on the archive02 node with the standard linux prompt. You must use HSI (or get an interactive session on the vfsshort partition).&lt;br /&gt;
&lt;br /&gt;
=== Scripted File Transfers ===&lt;br /&gt;
File transfers in and out of the HPSS should be scripted into jobs and submitted to the ''archivelong'' partition or the ''archiveshort'' partition. See generic example below:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash -l&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong &lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J htar_create_tarball_in_hpss&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
echo &amp;quot;Creating a htar of finished-job1/ directory tree into HPSS&amp;quot;&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
 &lt;br /&gt;
DEST=$ARCHIVE/finished-job1.tar&lt;br /&gt;
 &lt;br /&gt;
# htar WILL overwrite an existing file with the same name so check beforehand.&lt;br /&gt;
&lt;br /&gt;
hsi ls $DEST &amp;amp;&amp;gt; /dev/null&lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
if [ $status == 0 ]; then   &lt;br /&gt;
    echo 'File $DEST already exists. Nothing has been done'&lt;br /&gt;
    exit 1&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
cd $SCRATCH/workarea/ &lt;br /&gt;
htar -Humask=0137 -cpf $ARCHIVE/finished-job1.tar finished-job1/ &lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HTAR returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
'''Note:''' Always trap the execution of your jobs for abnormal terminations, and be sure to return the exit code&lt;br /&gt;
&lt;br /&gt;
=== Job Dependencies ===&lt;br /&gt;
&lt;br /&gt;
Typically data will be recalled to /scratch when it is needed for analysis. Job dependencies can be constructed so that analysis jobs wait in the queue for data recalls before starting. The qsub flag is&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
--dependency=&amp;lt;type:JOBID&amp;gt;&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
where JOBID is the job number of the archive recalling job that must finish successfully before the analysis job can start.&lt;br /&gt;
&lt;br /&gt;
Here is a short cut for generating the dependency (lookup [https://docs.scinet.utoronto.ca/index.php/HPSS#Sample_data_recall data-recall.sh samples]):&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
hpss-archive02-ib:~$ sbatch -d afterok:$(sbatch --parsable data-recall.sh) job-to-work-on-recalled-data.sh&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== HTAR ==&lt;br /&gt;
''' Please aggregate small files (&amp;lt;~200MB) into tarballs or htar files. '''&lt;br /&gt;
&lt;br /&gt;
''' [[Why not tarballs too large |&amp;lt;font color=red&amp;gt;Keep your tarballs to size 1TB or less&amp;lt;/font&amp;gt;]], whether ingested by htar or hsi ([[Why not tarballs too large | &amp;lt;font color=red&amp;gt;WHY?&amp;lt;/font&amp;gt;]])'''&lt;br /&gt;
&lt;br /&gt;
HTAR is a utility that is used for aggregating a set of files and directories, by using a sophisticated multithreaded buffering scheme to write files directly from VAST into HPSS, creating an archive file that conforms to the POSIX TAR specification, thereby achieving a high rate of performance. HTAR does not do gzip compression, however it already has a built-in checksum algorithm.&lt;br /&gt;
&lt;br /&gt;
'''Caution'''&lt;br /&gt;
* Unlike with cput/cget in HSI, &amp;quot;prompt before overwrite&amp;quot;, this is not the default with (h)tar. Be careful not to unintentionally overwrite a previous htar destination file in HPSS. There could be a similar situation when extracting material back into VAST and overwriting the originals. Be sure to double-check the logic in your scripts.&lt;br /&gt;
* Check the HTAR exit code and log file before removing any files from the VAST active filesystems.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 === HTAR Usage ===&lt;br /&gt;
* To write the ''file1'' and ''file2'' files to a new archive called ''files.tar'' in the default HPSS home directory, and preserve mask attributes (-p), enter:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    htar -cpf files.tar file1 file2&lt;br /&gt;
OR&lt;br /&gt;
    htar -cpf $ARCHIVE/files.tar file1 file2&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* To write a ''subdirA'' to a new archive called ''subdirA.tar'' in the default HPSS home directory, enter:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    htar -cpf subdirA.tar subdirA/&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* To extract all files from the archive file called ''proj1.tar'' in HPSS into the ''project1/src'' directory in VAST, and use the time of extraction as the modification time, enter:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    cd  project1/src&lt;br /&gt;
    htar -xpmf proj1.tar&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* To display the names of the files in the ''out.tar'' archive file within the HPSS home directory, enter (the out.tar.idx file will be queried):&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    htar -vtf out.tar&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* To ensure that both the htar and the .idx files have read permissions to other members in your group use the umask option&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    htar -Humask=0137 ....&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
For more details please check the '''[http://www.mgleicher.us/GEL/htar/ HTAR - Introduction]''' or the '''[http://www.mgleicher.us/GEL/htar/htar_man_page.html HTAR Man Page]''' online&lt;br /&gt;
&lt;br /&gt;
 &lt;br /&gt;
==== Sample tarball create ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash -l&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong &lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J htar_create_tarball_in_hpss&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
&lt;br /&gt;
DEST=$ARCHIVE/finished-job1.tar&lt;br /&gt;
&lt;br /&gt;
# htar WILL overwrite an existing file with the same name so check beforehand.&lt;br /&gt;
 &lt;br /&gt;
hsi ls $DEST &amp;amp;&amp;gt; /dev/null&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
if [ $status == 0 ]; then   &lt;br /&gt;
    echo 'File $DEST already exists. Nothing has been done'&lt;br /&gt;
    exit 1&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
cd $SCRATCH/workarea/ &lt;br /&gt;
htar -Humask=0137 -cpf $DEST finished-job1/ &lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HTAR returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
'''Note:''' If you attempt to start a transfer with any files larger than 68GB the whole HTAR session will fail, and you'll get a notification listing all those files, so that you can transfer them with HSI. &lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
----------------------------------------&lt;br /&gt;
INFO: File too large for htar to handle: finished-job1/file1 (86567185745 bytes)&lt;br /&gt;
INFO: File too large for htar to handle: finished-job1/file2 (71857244579 bytes)&lt;br /&gt;
ERROR: 2 oversize member files found - please correct and retry&lt;br /&gt;
ERROR: [FATAL] error(s) generating filename list &lt;br /&gt;
HTAR: HTAR FAILED&lt;br /&gt;
###WARNING  htar returned non-zero exit status&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Sample tarball list ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash -l&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong &lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J htar_list_tarball_in_hpss&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
&lt;br /&gt;
DEST=$ARCHIVE/finished-job1.tar&lt;br /&gt;
&lt;br /&gt;
htar -tvf $DEST&lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HTAR returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Sample tarball extract ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong &lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J htar_extract_tarball_from_hpss&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
 &lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
 &lt;br /&gt;
cd $SCRATCH/recalled-from-hpss&lt;br /&gt;
htar -xpmf $ARCHIVE/finished-job1.tar&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HTAR returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== HSI ==&lt;br /&gt;
&lt;br /&gt;
HSI may be the primary client with which some users will interact with HPSS. It provides an ftp-like interface for archiving and retrieving tarballs or [https://docs.scinet.utoronto.ca/index.php/HPSS#Sample_transferring_directories directory trees]. In addition it provides a number of shell-like commands that are useful for examining and manipulating the contents in HPSS. The most commonly used commands will be:&lt;br /&gt;
{|border=&amp;quot;1&amp;quot; cellpadding=&amp;quot;10&amp;quot; cellspacing=&amp;quot;0&amp;quot;&lt;br /&gt;
|-&lt;br /&gt;
  | cput &lt;br /&gt;
  | Conditionally saves or replaces a HPSSpath file to VASTpath if the VAST version is new or has been updated&lt;br /&gt;
 cput [options] VASTpath [: HPSSpath]&lt;br /&gt;
|-&lt;br /&gt;
  | cget &lt;br /&gt;
  | Conditionally retrieves a copy of a file from HPSS to VAST only if a VAST version does not already exist. &lt;br /&gt;
 cget [options] [VASTpath :] HPSSpath&lt;br /&gt;
|-&lt;br /&gt;
  | cd,mkdir,ls,rm,mv&lt;br /&gt;
  | Operate as one would expect on the contents of HPSS.&lt;br /&gt;
|-&lt;br /&gt;
  | lcd,lls&lt;br /&gt;
  | ''Local'' commands to VAST&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
*There are 3 distinctions about HSI that you should keep in mind, and that can generate a bit of confusion when you're first learning how to use it:&lt;br /&gt;
** HSI doesn't currently support renaming directories paths during transfers on-the-fly, therefore the syntax for cput/cget may not work as one would expect in some scenarios, requiring some workarounds.&lt;br /&gt;
** HSI has an operator &amp;quot;:&amp;quot; which separates the VASTpath and HPSSpath, and must be surrounded by whitespace (one or more space characters)&lt;br /&gt;
** The order for referring to files in HSI syntax is different from FTP. In HSI the general format is always the same, VAST first, HPSS second, cput or cget:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
     VASTfile : HPSSfile&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
For example, when using HSI to store the tarball file from VAST into HPSS, then recall it to VAST, the following commands could be used:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    cput tarball-in-VAST : tarball-in-HPSS&lt;br /&gt;
    cget tarball-recalled : tarball-in-HPSS&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
unlike with FTP, where the following syntax would be used:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    put tarball-in-VAST tarball-in-HPSS &lt;br /&gt;
    get tarball-in-HPSS tarball-recalled&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
* Simple commands can be executed on a single line.&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi &amp;quot;mkdir LargeFilesDir; cd LargeFilesDir; cput tarball-in-VAST : tarball-in-HPSS&amp;quot;&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* More complex sequences can be performed using an except such as this:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi &amp;lt;&amp;lt;EOF&lt;br /&gt;
      mkdir LargeFilesDir&lt;br /&gt;
      cd LargeFilesDir&lt;br /&gt;
      cput tarball-in-VAST : tarball-in-HPSS&lt;br /&gt;
      lcd $SCRATCH/LargeFilesDir2/&lt;br /&gt;
      cput -Ruph *  &lt;br /&gt;
    end&lt;br /&gt;
    EOF&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* The commands below are equivalent, but we recommend that you always use full path, and organize the contents of HPSS, where the default HSI directory placement is $ARCHIVE:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi cput tarball&lt;br /&gt;
    hsi cput tarball : tarball&lt;br /&gt;
    hsi cput $SCRATCH/tarball : $ARCHIVE/tarball&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* There are no known issues renaming files on-the-fly:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi cput $SCRATCH/tarball1 : $ARCHIVE/tarball2&lt;br /&gt;
    hsi cget $SCRATCH/tarball3 : $ARCHIVE/tarball2&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* However the syntax forms such as the ones below will fail, since they rename the directory paths.&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
   hsi cput -Ruph $SCRATCH/LargeFilesDir : $ARCHIVE/LargeFilesDir     (FAILS)&lt;br /&gt;
OR&lt;br /&gt;
   hsi cget -Ruph $SCRATCH/LargeFilesDir : $ARCHIVE/LargeFilesDir2    (FAILS)&lt;br /&gt;
OR&lt;br /&gt;
   hsi cput -Ruph $SCRATCH/LargeFilesDir/* : $ARCHIVE/LargeFilesDir2  (FAILS)&lt;br /&gt;
OR&lt;br /&gt;
   hsi cget -Ruph $SCRATCH/LargeFilesDir : $ARCHIVE/LargeFilesDir     (FAILS)&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
One workaround is the following 2-steps process, where you do a &amp;quot;lcd &amp;quot; in VAST first, and recursively transfer the whole directory (-R), keeping the same name. You may use '-u' option to resume a previously disrupted session, and the '-p' to  preserve timestamp, and '-h' to keep the links.&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi &amp;lt;&amp;lt;EOF&lt;br /&gt;
      lcd $SCRATCH&lt;br /&gt;
      cget -Ruph LargeFilesDir&lt;br /&gt;
    end&lt;br /&gt;
    EOF&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Another workaround is do a &amp;quot;lcd&amp;quot; into the VASTpath first and a &amp;quot;cd&amp;quot; in the HPSSpath, but transfer the files individually with the '*' wild character. This option lets you change the directory name:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi &amp;lt;&amp;lt;EOF&lt;br /&gt;
      lcd $SCRATCH/LargeFilesDir&lt;br /&gt;
      mkdir $ARCHIVE/LargeFilesDir2&lt;br /&gt;
      cd $ARCHIVE/LargeFilesDir2&lt;br /&gt;
      cput -Ruph *  &lt;br /&gt;
    end&lt;br /&gt;
    EOF&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Documentation === &lt;br /&gt;
Complete documentation on HSI is available from the Gleicher Enterprises links below. You may peruse those links and come with alternative syntax forms. You may even be already familiar with HPSS/HSI from other HPC facilities, that may or not have procedures similar to ours. HSI doesn't always work as expected when you go outside of our recommended syntax, so '''we strongly urge that you use the sample scripts we are providing as the basis''' for your job submissions&lt;br /&gt;
* [http://www.mgleicher.us/index.html/hsi/hsi_reference_manual_2/introduction.html HSI Introduction] (original site inactive)&lt;br /&gt;
* [http://www.mgleicher.us/index.html/hsi/hsi_man_page.html man hsi] (original site inactive)&lt;br /&gt;
* [https://docs.scinet.utoronto.ca/index.php/HSI_help hsi help]&lt;br /&gt;
* [http://www.mgleicher.us/index.html/hsi/hsi-exit-codes.html exit codes] (original site inactive)&lt;br /&gt;
'''Note:''' HSI returns the highest-numbered exit code, in case of multiple operations in the same hsi session. You may use '/scinet/hpss/bin/exit2msg $status' to translate those codes into intelligible messages&lt;br /&gt;
&lt;br /&gt;
=== Typical Usage Scripts===&lt;br /&gt;
The most common interactions will be ''putting'' data into HPSS, examining the contents (ls,ish), and ''getting'' data back onto VAST for inspection or analysis.&lt;br /&gt;
&lt;br /&gt;
==== Sample data offload ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-offload.sh&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong &lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J offload&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# individual tarballs already exist&lt;br /&gt;
&lt;br /&gt;
/usr/local/bin/hsi  -v &amp;lt;&amp;lt;EOF1&lt;br /&gt;
mkdir put-away&lt;br /&gt;
cd put-away&lt;br /&gt;
cput $SCRATCH/workarea/finished-job1.tar.gz : finished-job1.tar.gz&lt;br /&gt;
end&lt;br /&gt;
EOF1&lt;br /&gt;
status=$?&lt;br /&gt;
if [ ! $status == 0 ];then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
/usr/local/bin/hsi  -v &amp;lt;&amp;lt;EOF2&lt;br /&gt;
mkdir put-away&lt;br /&gt;
cd put-away&lt;br /&gt;
cput $SCRATCH/workarea/finished-job2.tar.gz : finished-job2.tar.gz&lt;br /&gt;
end&lt;br /&gt;
EOF2&lt;br /&gt;
status=$?&lt;br /&gt;
if [ ! $status == 0 ];then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
trap - TERM INT&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Note:''' as in the above example, we recommend that you capture the (highest-numbered) exit code for each hsi session independently. And remember, you may improve your exit code verbosity by adding the excerpt below to your scripts:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
if [ ! $status == 0 ];then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Sample data list ====&lt;br /&gt;
A very trivial way to list the contents of HPSS would be to just submit the HSI 'ls' command.&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-list.sh&lt;br /&gt;
#SBATCH -t 1:00:00&lt;br /&gt;
#SBATCH -p archiveshort&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J hpss_ls&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
/usr/local/bin/hsi -v &amp;lt;&amp;lt;EOF&lt;br /&gt;
cd put-away&lt;br /&gt;
ls -R&lt;br /&gt;
end&lt;br /&gt;
EOF&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
''Warning: if you have a lot of files, the ls command will take a long time to complete. For instance, about 400,000 files can be listed in about an hour. Adjust the walltime accordingly, and be on the safe side.''&lt;br /&gt;
&lt;br /&gt;
However, we provide a much more useful and convenient way to explore the contents of HPSS with the inventory shell [[ISH]]. This example creates an index of all the files in a user's portion of the namespace. The list is placed in the directory /home/$(whoami)/.ish_register that can be inspected from the login nodes.&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-list.sh&lt;br /&gt;
#SBATCH -t 1:00:00&lt;br /&gt;
#SBATCH -p archiveshort&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J hpss_index&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
INDEX_DIR=$HOME/.ish_register&lt;br /&gt;
if ! [ -e &amp;quot;$INDEX_DIR&amp;quot; ]; then&lt;br /&gt;
  mkdir -p $INDEX_DIR&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
export ISHREGISTER=&amp;quot;$INDEX_DIR&amp;quot;&lt;br /&gt;
/scinet/hpss/bin/ish hindex&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
''Note: the above warning on collecting the listing for many files applies here too.''&lt;br /&gt;
&lt;br /&gt;
This index can be browsed or searched with ISH on the development nodes.&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
hpss-archive02-ib:~$  /scinet/hpss/bin/ish ~/.ish_register/hpss.igz &lt;br /&gt;
[ish]hpss.igz&amp;gt; help&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
ISH is a powerful tool that is also useful for creating and browsing indices of tar and htar archives, so please look at the [[ISH|documentation]] or built in help.&lt;br /&gt;
&lt;br /&gt;
==== Sample data recall ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-recall.sh&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J recall_files&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
mkdir -p $SCRATCH/recalled-from-hpss&lt;br /&gt;
&lt;br /&gt;
# individual tarballs previously organized in HPSS inside the put-away-on-2010/ folder&lt;br /&gt;
hsi  -v &amp;lt;&amp;lt; EOF&lt;br /&gt;
cget $SCRATCH/recalled-from-hpss/Jan-2010-jobs.tar.gz : $ARCHIVE/put-away-on-2010/Jan-2010-jobs.tar.gz&lt;br /&gt;
cget $SCRATCH/recalled-from-hpss/Feb-2010-jobs.tar.gz : $ARCHIVE/put-away-on-2010/Feb-2010-jobs.tar.gz&lt;br /&gt;
end&lt;br /&gt;
EOF&lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
trap - TERM INT&lt;br /&gt;
&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
We should emphasize that a single ''cget'' of multiple files (rather than several separate gets) allows HSI to do optimization, as in the following example:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-recall.sh&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J recall_files_optimized&lt;br /&gt;
#SBATCH --mail-type=AL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
mkdir -p $SCRATCH/recalled-from-hpss&lt;br /&gt;
&lt;br /&gt;
# individual tarballs previously organized in HPSS inside the put-away-on-2010/ folder&lt;br /&gt;
hsi  -v &amp;lt;&amp;lt; EOF&lt;br /&gt;
lcd $SCRATCH/recalled-from-hpss/&lt;br /&gt;
cd $ARCHIVE/put-away-on-2010/&lt;br /&gt;
cget Jan-2010-jobs.tar.gz Feb-2010-jobs.tar.gz&lt;br /&gt;
end&lt;br /&gt;
EOF&lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Sample transferring directories ===&lt;br /&gt;
&amp;lt;font color=red&amp;gt;Remember, it's not possible to rename directories or paths on-the-fly:&amp;lt;/font&amp;gt;&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
hsi cget -Ruph $SCRATCH/LargeFiles-recalled : $ARCHIVE/LargeFiles    (FAILS)&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
One workaround is transfer the whole directory (and sub-directories) recursively:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-recall.sh&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J recall_directories&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
mkdir -p $SCRATCH/recalled&lt;br /&gt;
&lt;br /&gt;
hsi  -v &amp;lt;&amp;lt; EOF&lt;br /&gt;
lcd $SCRATCH/recalled&lt;br /&gt;
cd $ARCHIVE/&lt;br /&gt;
cget -Ruph LargeFiles&lt;br /&gt;
end&lt;br /&gt;
EOF&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
Another workaround is to transfer files and subdirectories individually with the &amp;quot;*&amp;quot; wild character:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-recall.sh&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J recall_directories&lt;br /&gt;
#SBATCH --mail-type=AL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
mkdir -p $SCRATCH/LargeFiles-recalled&lt;br /&gt;
&lt;br /&gt;
hsi  -v &amp;lt;&amp;lt; EOF&lt;br /&gt;
lcd $SCRATCH/LargeFiles-recalled&lt;br /&gt;
cd $ARCHIVE/LargeFiles&lt;br /&gt;
cget -Ruph *&lt;br /&gt;
end&lt;br /&gt;
EOF&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
* For more details please check the '''[http://www.mgleicher.us/GEL/hsi/ HSI Introduction]''', the '''[http://www.mgleicher.us/GEL/hsi/hsi_man_page.html HSI Man Page]''' or the or the [https://support.scinet.utoronto.ca/wiki/index.php/HSI_help '''hsi help''']&lt;br /&gt;
&lt;br /&gt;
== [[ISH|ISH]] ==&lt;br /&gt;
=== [[ISH|Documentation and Usage]] ===&lt;br /&gt;
&lt;br /&gt;
== File and directory management ==&lt;br /&gt;
=== Moving/renaming ===&lt;br /&gt;
* you may use 'mv' or 'cp' in the same way as the linux version.&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J deletion_script&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
echo &amp;quot;HPSS file and directory management&amp;quot;&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
&lt;br /&gt;
/usr/local/bin/hsi -v &amp;lt;&amp;lt;EOF1&lt;br /&gt;
    mkdir $ARCHIVE/2011&lt;br /&gt;
    mv $ARCHIVE/oldjobs $ARCHIVE/2011&lt;br /&gt;
    cp -r $ARCHIVE/almostfinished/*done $ARCHIVE/2011&lt;br /&gt;
end&lt;br /&gt;
EOF1&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Deletions ===&lt;br /&gt;
==== Recommendations ====&lt;br /&gt;
* Be careful with the use of 'cd' commands to non-existing directories before the 'rm' command. Results may be unpredictable&lt;br /&gt;
* Avoid the use of the stand alone wild character '''*'''. If necessary, whenever possible have it bound to common patterns, such as '*.tmp', so to limit unintentional mis-happens&lt;br /&gt;
* Avoid using relative paths, even the env variable $ARCHIVE. Better to explicitly expand the full paths in your scripts&lt;br /&gt;
* Avoid using recursive/looped deletion instructions on $SCRATCH contents from the archive job scripts. Even on $ARCHIVE contents, it may be better to do it as an independent job submission, after you have verified that the original ingestion into HPSS finished without any issues.&lt;br /&gt;
&lt;br /&gt;
==== Typical example ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J deletion_script&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
echo &amp;quot;Deletion of an outdated directory tree into HPSS&amp;quot;&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
&lt;br /&gt;
/usr/local/bin/hsi -v &amp;lt;&amp;lt;EOF1&lt;br /&gt;
    rm /archive/scinet/pinto/*.tmp&lt;br /&gt;
    rm -R /archive/scinet/pinto/obsolete&lt;br /&gt;
end&lt;br /&gt;
EOF1&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Deleting with an interactive HSI session ====&lt;br /&gt;
* You may feel more comfortable acquiring an interactive shell, starting an HSI session and proceeding with your deletions that way. Keep in mind, you're restricted to 1H.&lt;br /&gt;
&lt;br /&gt;
* After using the ''sbatch -p archiveshort'' command you'll get a standard shell prompt on an archive execution node (hpss-archive02), as you would on any compute node. However you will need to run '''HSI''' or '''HTAR''' to access resources on HPSS. &lt;br /&gt;
&lt;br /&gt;
* HSI will give you a prompt very similar to a standard shell, where your can navigate around using commands such 'ls', 'cd', 'pwd', etc ... NOTE: not every bash command has an equivalent on HSI -  for instance, you can not 'vi' or 'cat'.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
pinto@nia-login07:~$ salloc -p archiveshort -t 1:00:00&lt;br /&gt;
salloc: Granted job allocation 50359&lt;br /&gt;
salloc: Waiting for resource configuration&lt;br /&gt;
salloc: Nodes hpss-archive02-ib are ready for job&lt;br /&gt;
&lt;br /&gt;
hpss-archive02-ib:~$ hsi&lt;br /&gt;
******************************************************************&lt;br /&gt;
*     Welcome to HPSS@SciNet - High Perfomance Storage System    *&lt;br /&gt;
*                                                                * &lt;br /&gt;
*            INFO: THIS IS THE NEW 7.5.1 HPSS SYSTEM!            *&lt;br /&gt;
*                                                                *&lt;br /&gt;
*        Contact Information: support@scinet.utoronto.ca         *&lt;br /&gt;
*  NOTE: do not transfer SMALL FILES with HSI. Use HTAR instead  *&lt;br /&gt;
*              CHECK THE INTEGRITY OF YOUR TARBALLS              *&lt;br /&gt;
******************************************************************&lt;br /&gt;
&lt;br /&gt;
[HSI]/archive/scinet/pinto-&amp;gt; rm -R junk&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== HPSS for the 'Watchmaker' ==&lt;br /&gt;
=== Efficient alternative to htar ===&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J tar_create_tarball_in_hpss_with_hsi_by_piping&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
&lt;br /&gt;
# When using a pipeline like this&lt;br /&gt;
set -o pipefail &lt;br /&gt;
&lt;br /&gt;
# to put (cput will fail)&lt;br /&gt;
tar -c $SCRATCH/mydir | hsi put - : $ARCHIVE/mydir.tar&lt;br /&gt;
status=$?&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'TAR+HSI+piping returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
# to immediately generate an index&lt;br /&gt;
ish hindex $ARCHIVE/mydir.tar&lt;br /&gt;
status=$?&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'ISH returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
# to get&lt;br /&gt;
#cd $SCRATCH&lt;br /&gt;
#hsi cget - : $ARCHIVE/mydir.tar | tar -xv &lt;br /&gt;
#status=$?&lt;br /&gt;
# if [ ! $status == 0 ]; then&lt;br /&gt;
#   echo 'TAR+HSI+piping returned non-zero code.'&lt;br /&gt;
#   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
#   exit $status&lt;br /&gt;
#else&lt;br /&gt;
#   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
#fi&lt;br /&gt;
&lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
'''Notes:''' &lt;br /&gt;
* Combining commands in this fashion, besides being HPSS-friendly, should not be that noticeably slower than the recursive put with HSI that stores each file one by one. However, reading the files back from tape in this format will be many times faster. It would also overcome the current 68GB limit on the size of stored files that we have with htar.&lt;br /&gt;
* To top things off, we recommend indexing with ish (in the same script) immediately after the tarball creation , while it resides in the HPSS cache. It would be as if htar was used.&lt;br /&gt;
* To ensure that an error at any stage of the pipeline shows up in the returned status use: ''set -o pipefail'' (The default is to return the status of the last command in the pipeline and this is not what you want.)&lt;br /&gt;
* Optimal performance for aggregated transfers and allocation on tapes is obtained with [[Why not tarballs too large |&amp;lt;font color=red&amp;gt;tarballs of size 500GB or less&amp;lt;/font&amp;gt;]], whether ingested by htar or hsi ([[Why not tarballs too large | &amp;lt;font color=red&amp;gt;WHY?&amp;lt;/font&amp;gt;]]). Be sure to check the contents of the directory tree with 'du' for the total amount of data before  sending them to the tar+HSI piping.&lt;br /&gt;
&lt;br /&gt;
=== Multi-threaded gzip'ed compression with pigz ===&lt;br /&gt;
We compiled multi-threaded implementation of gzip called pigz (http://zlib.net/pigz/). It's now part of the &amp;quot;extras&amp;quot; module. It can also be used on any compute or devel nodes. This makes the execution of the previous version of the script much quicker than if you were to use 'tar -cfz'. In addition, by piggy-backing ISH to the end of the script, it will know what to do with the just created mydir.tar.gz compressed tarball.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J tar_create_compressed_tarball_in_hpss_with_hsi_by_piping&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
 &lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
&lt;br /&gt;
# When using a pipeline like this&lt;br /&gt;
set -o pipefail &lt;br /&gt;
&lt;br /&gt;
# to put (cput will fail)&lt;br /&gt;
tar -c $SCRATCH/mydir | pigz | hsi put - : $ARCHIVE/mydir.tar.gz&lt;br /&gt;
status=$?&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'TAR+PIGZ+HSI+piping returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
NOTE: Do not use this type of pipeline to serialize recalls with HSI inside a for loop. This is a very inefficient way for HPSS to handle this type of operation, since it forces the system to fetch the tarballs using your sequence on the loop, or a sequence based on a numeric/alphabetic/chronological order from the tapes. Files are not necessarily written to tapes in the same sequence in which they are ingested. There will be many unnecessary mounts/dismounts involved, and a lot of fast forward and rewinds, depending on where the files are on the tapes, which can be extremely stressful to the hardware. This effect is know a &amp;quot;shoe-shinning&amp;quot;.&lt;br /&gt;
&lt;br /&gt;
Instead, provide HSI with a full list of tarballs to be recalled, all at once, and the HSI will sort the list in the most convenient order possible, in the sequence that the files are laid out on the tapes, and fetch all the files from each tape at once, in one go.&lt;br /&gt;
Afterwards, once all the tarballs are back onto your $SCRATCH, you may use a loop to unpigz the tarballs&lt;br /&gt;
&lt;br /&gt;
=== Content Verification ===&lt;br /&gt;
&lt;br /&gt;
==== HTAR CRC checksums ====&lt;br /&gt;
Specifies that HTAR should generate CRC checksums when creating the archive.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J htar_create_tarball_in_hpss_with_checksum_verification&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
 &lt;br /&gt;
cd $SCRATCH/workarea&lt;br /&gt;
&lt;br /&gt;
# to put&lt;br /&gt;
htar -Humask=0137 -cpf $ARCHIVE/finished-job1.tar -Hcrc -Hverify=1 finished-job1/&lt;br /&gt;
&lt;br /&gt;
# to get&lt;br /&gt;
#mkdir $SCRATCH/verification&lt;br /&gt;
#cd $SCRATCH/verification&lt;br /&gt;
#htar -Hcrc -xvpmf $ARCHIVE/finished-job1.tar &lt;br /&gt;
&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HTAR returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Current HSI version - Checksum built-in ====&lt;br /&gt;
&lt;br /&gt;
MD5 is the standard Hashing Algorithm for the HSI build at SciNet. For hsi ingestions with the '-c on' option you should be able to query the md5 hash with the hsi command 'hashli'. That value is stored as an UDA (User Defined Attribute) for each file (a feature of HPSS starting with 7.4)&lt;br /&gt;
&lt;br /&gt;
[http://www.mgleicher.us/GEL/hsi/hsi/hsi_reference_manual_2/checksum-feature.html More usage details here]&lt;br /&gt;
&lt;br /&gt;
The checksum algorithm is very CPU-intensive. Although the checksum code is compiled with a high level of compiler optimization, transfer rates can be significantly reduced when checksum creation or verification is in effect. The amount of degradation in transfer rates depends on several factors, such as  processor speed, network transfer speed, and speed of the local filesystem (VAST).&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J MD5_checksum_verified_transfer&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
thefile=&amp;lt;VASTpath&amp;gt;&lt;br /&gt;
storedfile=&amp;lt;HPSSpath&amp;gt;&lt;br /&gt;
&lt;br /&gt;
# Generate checksum on fly (-c on)&lt;br /&gt;
hsi -q put -c on $thefile : $storedfile&lt;br /&gt;
pid=$!&lt;br /&gt;
&lt;br /&gt;
# Check the exit code of the HSI process  &lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
# verify checksum&lt;br /&gt;
hsi lshash $storedfile&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
# get the file back with checksum&lt;br /&gt;
hsi get -c on $storedfile&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Prior to HSI version 4.0.1.1 ====&lt;br /&gt;
&lt;br /&gt;
This will checksum the contents of the HPSSpath against the original VASTpath after the transfer has finished.&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J checksum_verified_transfer&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
thefile=&amp;lt;VASTpath&amp;gt;&lt;br /&gt;
storedfile=&amp;lt;HPSSpath&amp;gt;&lt;br /&gt;
&lt;br /&gt;
# Generate checksum on fly using a named pipe so that file is only read from VAST once&lt;br /&gt;
mkfifo /tmp/NPIPE&lt;br /&gt;
cat $thefile  | tee /tmp/NPIPE | hsi -q put - : $storedfile &amp;amp;&lt;br /&gt;
pid=$!&lt;br /&gt;
md5sum /tmp/NPIPE |tee /tmp/$fname.md5&lt;br /&gt;
rm -f  /tmp/NPIPE&lt;br /&gt;
&lt;br /&gt;
# Check the exit code of the HSI process  &lt;br /&gt;
wait $pid&lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
# change filename to stdin in checksum file&lt;br /&gt;
sed -i.1 &amp;quot;s+/tmp/NPIPE+-+&amp;quot; /tmp/$fname.md5&lt;br /&gt;
&lt;br /&gt;
# verify checksum&lt;br /&gt;
hsi -q get - : $storedfile  | md5sum -c  /tmp/$fname.md5&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Splitting tarballs ===&lt;br /&gt;
&lt;br /&gt;
By knowing the size of your files or directories you can decide how to divide and organize them in different archives if necessary, so not to create huge tarballs to start with. &lt;br /&gt;
&lt;br /&gt;
However, if you find it more convenient to bundle a whole directory into a big tarball, you can still divide it into small chunks prior to ingesting them into HPSS, by using the following syntax:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;code&amp;gt;split -b &amp;lt;Size-in-GB&amp;gt; &amp;lt;tar-file-name&amp;gt; &amp;lt;prefix-name&amp;gt;&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;console&amp;quot;&amp;gt;&lt;br /&gt;
$ split -b 500GB results.tar small-chunk&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
The option '''b''' is invoked to fix the size of the small chunks; and '''prefix-name''' is the base name for the small chunks. The above command will split '''results.tar''' into 500 GB chunks in current working directory. The small chunks will starts from: small-chunka small-chunkb small-chunkc small-chunkd  .... etc. You may then ingest them into HPSS using any of the 3 methods describe above (HSI, VFS, Globus).&lt;br /&gt;
&lt;br /&gt;
To rebuild the original tarball, recall the individual chunks from HPSS and use the &amp;lt;code&amp;gt;cat&amp;lt;/code&amp;gt; command as follows:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;console&amp;quot;&amp;gt;&lt;br /&gt;
$ cat small-chunk* &amp;gt; your_archive_name.tar&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
In case you want to append numbers in place of alphabets, use '''-d''' option in above split command.&lt;br /&gt;
&lt;br /&gt;
== Access to HPSS using Globus ==&lt;br /&gt;
* You may now transfer data between SciNet's HPSS and an external source&lt;br /&gt;
* Follow the link below &lt;br /&gt;
  https://globus.alliancecan.ca&lt;br /&gt;
: Enter your {{Alliance}} username and password (and use your MFA).&lt;br /&gt;
* In the 'File Manager' tab, enter ''''alliancecan#hpss'''' as one of the Endpoints. If you are seeing 'Missing required data_access_consent',  authenticate this endpoint by clicking on 'Continue', enter your username and password once more.&lt;br /&gt;
* You may read more about {{Alliance}}} Globus Portal here:&lt;br /&gt;
  https://docs.alliancecan.ca/wiki/Globus&lt;br /&gt;
&lt;br /&gt;
== User provided Content/Suggestions ==&lt;br /&gt;
&lt;br /&gt;
== [[HPSS-by-pomes|Packing up large data sets and putting them on HPSS]] ==&lt;br /&gt;
(Pomés group recommendations)&lt;br /&gt;
&lt;br /&gt;
[[Data Management|BACK TO Data Management]]&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7358</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7358"/>
		<updated>2025-12-04T15:21:57Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | S4H | S4H}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | Teach|Teach}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Tue Dec 3, 2025, 11:30 am:''' Open OnDemand is fully operational again.&lt;br /&gt;
&lt;br /&gt;
'''Sat Nov 29, 2025, 00:40 am:''' There has been a problem with the water chiller. Some systems are offline.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 12:55 pm:''' Balam is back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 10:00 am:''' Open OnDemand is back online.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 11:00 pm:''' Most of the work is done, data movers, Globus, and HPSS are back online. Remaining services will be worked on tomorrow.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 8:30 am:''' Scheduled network maintenance. Trillium cluster is *not* affected.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 17:30 am:''' Balam maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 7:00 am:''' Balam maintenance day.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:55 pm:''' Trillium inbound connections through trillium.alliancecan.ca or trillium.scinet.utoronto.ca are working again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:05 pm:''' Trillium is experiencing external network issues for both incoming traffic. Please try: ssh USERNAME@tri-login01.scinet.utoronto.ca in the meantime.&lt;br /&gt;
 &lt;br /&gt;
'''Thu Oct 06, 2025, 8:00 pm:''' HPSS is fully functional. You may submit archive jobs from trillium login nodes, datamovers and robots.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. Directory tree now follows the other Alliance clusters. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7352</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7352"/>
		<updated>2025-12-04T14:33:12Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down | Teach|Teach}}&lt;br /&gt;
|{{Down | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up3 | Scheduler|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up3 | Login Nodes|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up  | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up3 | Cvmfs|https://docs.alliancecan.ca/wiki/Standard_software_environments}}&lt;br /&gt;
|{{Up | S4H | S4H}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Tue Dec 3, 2025, 11:30 am:''' Open OnDemand is fully operational again.&lt;br /&gt;
&lt;br /&gt;
'''Sat Nov 29, 2025, 00:40 am:''' There has been a problem with the water chiller. Some systems are offline.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 12:55 pm:''' Balam is back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 10:00 am:''' Open OnDemand is back online.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 11:00 pm:''' Most of the work is done, data movers, Globus, and HPSS are back online. Remaining services will be worked on tomorrow.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 8:30 am:''' Scheduled network maintenance. Trillium cluster is *not* affected.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 17:30 am:''' Balam maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 7:00 am:''' Balam maintenance day.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:55 pm:''' Trillium inbound connections through trillium.alliancecan.ca or trillium.scinet.utoronto.ca are working again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:05 pm:''' Trillium is experiencing external network issues for both incoming traffic. Please try: ssh USERNAME@tri-login01.scinet.utoronto.ca in the meantime.&lt;br /&gt;
 &lt;br /&gt;
'''Thu Oct 06, 2025, 8:00 pm:''' HPSS is fully functional. You may submit archive jobs from trillium login nodes, datamovers and robots.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. Directory tree now follows the other Alliance clusters. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7334</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7334"/>
		<updated>2025-12-01T18:47:09Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down | Teach|Teach}}&lt;br /&gt;
|{{Down | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up3 | Scheduler|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | HPSS|HPSS}}&lt;br /&gt;
|{{Up3 | Login Nodes|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up  | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up3 | Cvmfs|https://docs.alliancecan.ca/wiki/Standard_software_environments}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Sat Nov 29, 2025, 00:40 am:''' There has been a problem with the water chiller. Some systems are offline.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 12:55 pm:''' Balam is back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 10:00 am:''' Open OnDemand is back online.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 11:00 pm:''' Most of the work is done, data movers, Globus, and HPSS are back online. Remaining services will be worked on tomorrow.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 8:30 am:''' Scheduled network maintenance. Trillium cluster is *not* affected.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 17:30 am:''' Balam maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 7:00 am:''' Balam maintenance day.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:55 pm:''' Trillium inbound connections through trillium.alliancecan.ca or trillium.scinet.utoronto.ca are working again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:05 pm:''' Trillium is experiencing external network issues for both incoming traffic. Please try: ssh USERNAME@tri-login01.scinet.utoronto.ca in the meantime.&lt;br /&gt;
 &lt;br /&gt;
'''Thu Oct 06, 2025, 8:00 pm:''' HPSS is fully functional. You may submit archive jobs from trillium login nodes, datamovers and robots.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. Directory tree now follows the other Alliance clusters. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7331</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7331"/>
		<updated>2025-12-01T15:47:47Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down | Teach|Teach}}&lt;br /&gt;
|{{Down | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up3 | Scheduler|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | HPSS|HPSS}}&lt;br /&gt;
|{{Up3 | Login Nodes|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up  | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | Balam|Balam}}&lt;br /&gt;
|{{Up3 | Cvmfs|https://docs.alliancecan.ca/wiki/Standard_software_environments}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Sat Nov 29, 2025, 00:40 am:''' There has been a problem with the water chiller. Some systems are offline.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 12:55 pm:''' Balam is back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 10:00 am:''' Open OnDemand is back online.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 11:00 pm:''' Most of the work is done, data movers, Globus, and HPSS are back online. Remaining services will be worked on tomorrow.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 8:30 am:''' Scheduled network maintenance. Trillium cluster is *not* affected.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 17:30 am:''' Balam maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 7:00 am:''' Balam maintenance day.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:55 pm:''' Trillium inbound connections through trillium.alliancecan.ca or trillium.scinet.utoronto.ca are working again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:05 pm:''' Trillium is experiencing external network issues for both incoming traffic. Please try: ssh USERNAME@tri-login01.scinet.utoronto.ca in the meantime.&lt;br /&gt;
 &lt;br /&gt;
'''Thu Oct 06, 2025, 8:00 pm:''' HPSS is fully functional. You may submit archive jobs from trillium login nodes, datamovers and robots.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. Directory tree now follows the other Alliance clusters. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7328</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7328"/>
		<updated>2025-12-01T15:47:26Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down | Teach|Teach}}&lt;br /&gt;
|{{Up | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up3 | Scheduler|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Down | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | HPSS|HPSS}}&lt;br /&gt;
|{{Up3 | Login Nodes|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up  | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | Balam|Balam}}&lt;br /&gt;
|{{Up3 | Cvmfs|https://docs.alliancecan.ca/wiki/Standard_software_environments}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Sat Nov 29, 2025, 00:40 am:''' There has been a problem with the water chiller. Some systems are offline.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 12:55 pm:''' Balam is back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 5, 2025, 10:00 am:''' Open OnDemand is back online.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 11:00 pm:''' Most of the work is done, data movers, Globus, and HPSS are back online. Remaining services will be worked on tomorrow.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 4, 2025, 8:30 am:''' Scheduled network maintenance. Trillium cluster is *not* affected.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 17:30 am:''' Balam maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 21, 2025, 7:00 am:''' Balam maintenance day.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:55 pm:''' Trillium inbound connections through trillium.alliancecan.ca or trillium.scinet.utoronto.ca are working again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 15, 2025, 3:05 pm:''' Trillium is experiencing external network issues for both incoming traffic. Please try: ssh USERNAME@tri-login01.scinet.utoronto.ca in the meantime.&lt;br /&gt;
 &lt;br /&gt;
'''Thu Oct 06, 2025, 8:00 pm:''' HPSS is fully functional. You may submit archive jobs from trillium login nodes, datamovers and robots.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. Directory tree now follows the other Alliance clusters. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=HPSS&amp;diff=7214</id>
		<title>HPSS</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=HPSS&amp;diff=7214"/>
		<updated>2025-10-18T16:48:29Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* HTAR */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;{|align=right&lt;br /&gt;
|align=center|'''Topology Overview'''&lt;br /&gt;
|-&lt;br /&gt;
|[[Image:HPSS-overview.jpg|right|x600px]]&lt;br /&gt;
|-&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
== High Performance Storage System ==&lt;br /&gt;
&lt;br /&gt;
The High Performance Storage System ([https://www.hpss-collaboration.org/index.shtml HPSS] [https://en.wikipedia.org/wiki/High_Performance_Storage_System wikipedia]) is a tape-backed hierarchical storage system that provides a significant portion of the allocated storage space at SciNet. It is a repository for archiving data that is not being actively used. Data can be returned to the active shared parallel filesystem on Trillium (&amp;quot;VAST&amp;quot;) when it is needed.  &lt;br /&gt;
&lt;br /&gt;
In the context of the {{Alliance}} Resource Allocations, it is called 'nearline'.&lt;br /&gt;
&lt;br /&gt;
Since this system is intended for large data storage, it is accessible only to groups who have been awarded storage space at SciNet beyond 5TB in the yearly RAC resource allocation round. However, upon request, any user may be awarded access to HPSS, up to 2TB per group, so that you may get familiar with the system (just email support@scinet.utoronto.ca)&lt;br /&gt;
&lt;br /&gt;
Access and transfer of data into and out of HPSS is done under the control of the user, whose interaction is expected to be scripted and submitted as a batch job, using one or more of the following utilities:&lt;br /&gt;
* [https://www.racf.bnl.gov/Facility/HPSS/Documentation/HSI/doc_intro.html HSI] is a client with an ftp-like functionality which can be used to archive and retrieve large files. It is also useful for browsing the contents of HPSS.&lt;br /&gt;
* [https://www.sdcc.bnl.gov/sites/default/files/2021-09/htar.txt HTAR] is a utility that creates tar formatted archives directly into HPSS. It also creates a separate index file (.idx) that can be accessed and browsed quickly.&lt;br /&gt;
* [https://support.scinet.utoronto.ca/wiki/index.php/ISH ISH] is a TUI utility that can perform an inventory of the files and directories in your tarballs.&lt;br /&gt;
&lt;br /&gt;
We're currently running HPSS v 11.2.&lt;br /&gt;
&lt;br /&gt;
== Why should I use and trust HPSS? ==&lt;br /&gt;
* HPSS is a 25 year-old collaboration between IBM and the DoE labs in the US, and is used by about 45 facilities in the [http://www.top500.org “Top 500”] HPC list (plus some black-sites).&lt;br /&gt;
* Over 2.5 ExaBytes of combined storage world-wide.&lt;br /&gt;
* The top 3 sites in the World report (fall 2017) having 360PB, 220PB and 125PB in production (ECMWF, UKMO and BNL)&lt;br /&gt;
* Environment Canada also adopted HPSS in 2017 to store  Nav Canada data as well as to serve as their own archive. Currently has 2 X 100PB capacity installed. &lt;br /&gt;
* The SciNet HPSS system has been providing nearline capacity for important research data in Canada since early 2011, already at 10PB levels in 2018&lt;br /&gt;
* Very reliable, data redundancy and data insurance built-in (dual copies of everything are kept on tapes at SciNet)&lt;br /&gt;
* Data on cache and tapes can be geo-distributed for further resilience and HA.&lt;br /&gt;
* Highly scalable; current performance at SciNet - after the hardware and software upgrades in 2019 - Ingest and Recalls ~150 TB/day aggregated.&lt;br /&gt;
* HSI/HTAR clients also very reliable and used on several HPSS sites. ISH was written at SciNet.&lt;br /&gt;
* [[Media:HPSS_rationale_SNUG.pdf|HPSS fits well with the Storage Capacity Expansion Plan at SciNet]] (pdf presentation)&lt;br /&gt;
&lt;br /&gt;
== Guidelines ==&lt;br /&gt;
* A large portion of the storage capacity of HPSS is provided on tape -- a media that is not suited for storing small files. Files smaller than ~200MB should be grouped into tarballs with '''tar''' or '''htar'''.&lt;br /&gt;
* If you intent to use '''HSI''' or '''Globus''' to ingest material, you should favor tarballs over expanded directory trees. &amp;lt;font color=red&amp;gt;Your average file size on archive should be over 1GB/file&amp;lt;/font&amp;gt;.&lt;br /&gt;
* Optimal performance for aggregated transfers and allocation on tapes is obtained with [[Why not tarballs too large |&amp;lt;font color=red&amp;gt;tarballs of size 1TB or less&amp;lt;/font&amp;gt;]], whether ingested by htar or hsi ([[Why not tarballs too large |&amp;lt;font color=red&amp;gt;for good reasons&amp;lt;/font&amp;gt;]])&lt;br /&gt;
* We strongly urge that you use the sample scripts we are providing as the basis for your job submissions.&lt;br /&gt;
* Make sure to check the exit codes of each of the steps in your scripts, and check any returned logs for errors after any data transfer or tarball creation process.&lt;br /&gt;
&lt;br /&gt;
== &amp;lt;font color=red&amp;gt;'''#### New to the HPSS/nearline/archive System? ####'''&amp;lt;/font&amp;gt; ==&lt;br /&gt;
&lt;br /&gt;
First, nearline, archive, and HPSS all mean the same thing at SciNet.  HPSS is the true name of this storage system.&lt;br /&gt;
&lt;br /&gt;
Second, HPSS is not a regular file system, and as such is not mounted on the regular login, compute, and datamover nodes. Interaction with the HPSS system is therefore different than using simple copy, move, and tar commands.&lt;br /&gt;
&lt;br /&gt;
Before trying to use the system, you need to confirm you are part of a group that already has a HPSS RAC allocation, otherwise, email SciNet support and request an HPSS account (or else you will get &amp;quot;Error - authentication/initialization failed&amp;quot; and 71 exit codes). &lt;br /&gt;
&lt;br /&gt;
THIS set of instructions on the wiki is the best and most compressed &amp;quot;manual&amp;quot; we have on how to use HPSS. It may seem a bit overwhelming at first, because of all the job script templates we make available below (they are here so you don't have to think &lt;br /&gt;
too much, just copy and paste), but if you approach the index at the top as a &amp;quot;case switch&amp;quot; mechanism for what you intend to do, everything falls in place.&lt;br /&gt;
&lt;br /&gt;
== The 3 ways to access HPSS ==&lt;br /&gt;
* Submitting jobs to the [[HPSS#Access_Through_the_Queue_System | Trillium archive partitions]] (archiveshort or archivelong), and using HTAR or HSI on the archive02 node. These tools are optimized for speed and scale, to efficiently deal with small files, and are very resilient to timeouts due to slow tape access. That is our recommendation for large transfers.&lt;br /&gt;
* Using Globus, which can be a very good trade off between the efficiency of HSI/HTAR and the convenience of a WebGUI. You may transfer data between 2 endpoints. For instance:&lt;br /&gt;
  alliancecan#trillium&lt;br /&gt;
  alliancecan#hpss&lt;br /&gt;
* Submitting an interactive job to the [[HPSS#Access_Through_the_Queue_System | VFS partition]] (vfsshort), and using standard linux tools (cp, rsync, ls, cd, ...) to access the VFS based /archive mount point on the vfs02 node. We don't recommend this way, unless you are transferring only a handful of files, or just navigating the naming-space (directory tree similar to the other Trillium file systems).&lt;br /&gt;
  /archive&lt;br /&gt;
&lt;br /&gt;
Try this sequence:&lt;br /&gt;
&lt;br /&gt;
1) [https://docs.scinet.utoronto.ca/index.php/HPSS#Access_Through_an_Interactive_HSI_session take a look around HPSS using an interactive HSI session]&lt;br /&gt;
&lt;br /&gt;
(most linux shell commands have an equivalent in HPSS)&lt;br /&gt;
&lt;br /&gt;
2) [https://docs.scinet.utoronto.ca/index.php/HPSS#Sample_tarball_create archive a small test directory using HTAR]&lt;br /&gt;
&lt;br /&gt;
2a) use step 1) to see what happened&lt;br /&gt;
&lt;br /&gt;
3) [https://docs.scinet.utoronto.ca/index.php/HPSS#Sample_data_offload archive a file using hsi]&lt;br /&gt;
&lt;br /&gt;
3a) use step 1) to see what happened&lt;br /&gt;
&lt;br /&gt;
4) [https://docs.scinet.utoronto.ca/index.php/HPSS#Sample_transferring_directories archive a small test directory using HSI]&lt;br /&gt;
&lt;br /&gt;
4a) use step 1) to see what happened&lt;br /&gt;
&lt;br /&gt;
5) now try the other cases and so on. In a couple of hours you'll be in pretty good shape.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
== Access Through the Queue System  ==&lt;br /&gt;
All access to the archive system is done through the [https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler Trillium queue system].&lt;br /&gt;
&lt;br /&gt;
* Job submissions should be done to the 'archivelong', 'archiveshort' or 'vfsshort' partitions.&lt;br /&gt;
* Short jobs are limited to 1H walltime by default. Long jobs (&amp;gt; 1H) are limited to 72H walltime.&lt;br /&gt;
* Users are limited to only 2 long jobs and 2 short jobs at the same time, and 10 jobs total on the each queue.&lt;br /&gt;
* There can only be 5 long jobs running at any given time overall. Remaining submissions will be placed on hold for the time being. So far we have not seen a need for overall limit on short jobs.&lt;br /&gt;
* Jobs to the 'archivelong' and 'archiveshort' partitions must use [[HPSS#HTAR | htar]] or [[HPSS#HSI | hsi]]. These tools are optimized for speed and scale, to efficiently deal with small files, and are very resilient to timeouts due to slow tape access.&lt;br /&gt;
* The 'vfsshort' partition can only be used for interactive jobs, so that you can navigate the VFS based /archive mount point (Virtual File System), and/or make small file transfers to/from HPSS using standard linux tools (ls, cd, cp, rsync, etc). This access is always limited to 1 hour.&lt;br /&gt;
&lt;br /&gt;
You can submit your job like this:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
 sbatch jobscript.sh (to 'archivelong' or 'archiveshort')&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
And you can get an interactive session like this:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
 salloc -p archiveshort&lt;br /&gt;
    OR&lt;br /&gt;
 salloc -p vfsshort&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
[https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler You may learn SLURM in more details here.]&lt;br /&gt;
&lt;br /&gt;
The status of pending jobs can be monitored with squeue specifying the archive partition:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
squeue -p archiveshort&lt;br /&gt;
  OR&lt;br /&gt;
squeue -p archivelong&lt;br /&gt;
  OR&lt;br /&gt;
squeue -p vfsshort&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== Access Through an Interactive HSI session  ==&lt;br /&gt;
* You may want to acquire an interactive shell, start an HSI session and navigate the archive naming-space. Keep in mind, you're restricted to 1H.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
pinto@tri-login02:~$ salloc -p archiveshort -t 1:00:00&lt;br /&gt;
salloc: Granted job allocation 50918&lt;br /&gt;
salloc: Waiting for resource configuration&lt;br /&gt;
salloc: Nodes hpss-archive02-ib are ready for job&lt;br /&gt;
hpss-archive02-ib:~$&lt;br /&gt;
&lt;br /&gt;
hpss-archive02-ib:~$ hsi    (DON'T FORGET TO START HSI)&lt;br /&gt;
******************************************************************&lt;br /&gt;
*     Welcome to HPSS@SciNet - High Perfomance Storage System    *&lt;br /&gt;
*                                                                * &lt;br /&gt;
*            INFO: THIS IS THE NEW 7.5.1 HPSS SYSTEM!            *&lt;br /&gt;
*                                                                *&lt;br /&gt;
*        Contact Information: support@scinet.utoronto.ca         *&lt;br /&gt;
*  NOTE: do not transfer SMALL FILES with HSI. Use HTAR instead  *&lt;br /&gt;
*              CHECK THE INTEGRITY OF YOUR TARBALLS              *&lt;br /&gt;
****************************************************************** &lt;br /&gt;
[HSI]/archive/scinet/pinto-&amp;gt; ls&lt;br /&gt;
&lt;br /&gt;
[HSI]/archive/scinet/pinto-&amp;gt; cd &amp;lt;some directory&amp;gt;&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
NOTE: the VFS based /archive mount point is not visible on the archive02 node with the standard linux prompt. You must use HSI (or get an interactive session on the vfsshort partition).&lt;br /&gt;
&lt;br /&gt;
=== Scripted File Transfers ===&lt;br /&gt;
File transfers in and out of the HPSS should be scripted into jobs and submitted to the ''archivelong'' partition or the ''archiveshort'' partition. See generic example below:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash -l&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong &lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J htar_create_tarball_in_hpss&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
echo &amp;quot;Creating a htar of finished-job1/ directory tree into HPSS&amp;quot;&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
 &lt;br /&gt;
DEST=$ARCHIVE/finished-job1.tar&lt;br /&gt;
 &lt;br /&gt;
# htar WILL overwrite an existing file with the same name so check beforehand.&lt;br /&gt;
&lt;br /&gt;
hsi ls $DEST &amp;amp;&amp;gt; /dev/null&lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
if [ $status == 0 ]; then   &lt;br /&gt;
    echo 'File $DEST already exists. Nothing has been done'&lt;br /&gt;
    exit 1&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
cd $SCRATCH/workarea/ &lt;br /&gt;
htar -Humask=0137 -cpf $ARCHIVE/finished-job1.tar finished-job1/ &lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HTAR returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
'''Note:''' Always trap the execution of your jobs for abnormal terminations, and be sure to return the exit code&lt;br /&gt;
&lt;br /&gt;
=== Job Dependencies ===&lt;br /&gt;
&lt;br /&gt;
Typically data will be recalled to /scratch when it is needed for analysis. Job dependencies can be constructed so that analysis jobs wait in the queue for data recalls before starting. The qsub flag is&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
--dependency=&amp;lt;type:JOBID&amp;gt;&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
where JOBID is the job number of the archive recalling job that must finish successfully before the analysis job can start.&lt;br /&gt;
&lt;br /&gt;
Here is a short cut for generating the dependency (lookup [https://docs.scinet.utoronto.ca/index.php/HPSS#Sample_data_recall data-recall.sh samples]):&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
hpss-archive02-ib:~$ sbatch -d afterok:$(sbatch --parsable data-recall.sh) job-to-work-on-recalled-data.sh&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== HTAR ==&lt;br /&gt;
''' Please aggregate small files (&amp;lt;~200MB) into tarballs or htar files. '''&lt;br /&gt;
&lt;br /&gt;
''' [[Why not tarballs too large |&amp;lt;font color=red&amp;gt;Keep your tarballs to size 1TB or less&amp;lt;/font&amp;gt;]], whether ingested by htar or hsi ([[Why not tarballs too large | &amp;lt;font color=red&amp;gt;WHY?&amp;lt;/font&amp;gt;]])'''&lt;br /&gt;
&lt;br /&gt;
HTAR is a utility that is used for aggregating a set of files and directories, by using a sophisticated multithreaded buffering scheme to write files directly from VAST into HPSS, creating an archive file that conforms to the POSIX TAR specification, thereby achieving a high rate of performance. HTAR does not do gzip compression, however it already has a built-in checksum algorithm.&lt;br /&gt;
&lt;br /&gt;
'''Caution'''&lt;br /&gt;
* Files larger than 68 GB cannot be stored in an HTAR archive (unfortunately old 32-bit limitation). If you attempt to start a transfer with any files larger than 68GB the whole HTAR session will fail, and you'll get a notification listing all those files, so that you can transfer them with HSI.&lt;br /&gt;
* Files with pathnames too long will be skipped (greater than 100 characters), so as to conform with TAR protocol [[(POSIX 1003.1 USTAR)]] -- Note that the HTAR will erroneously indicate success, however will produce exit code 70. For now, you can check for this type of error by &amp;quot;grep WARNING my.output&amp;quot; after the job has completed.&lt;br /&gt;
* With the consolidation and centralization of all {{Alliance}} accounts under CCDB, the new uid/gid numbers are now in the 3000000-6000000 range, in breach of same above TAR posix protocol, so htar will issue a benign  notification similar to this: ''ERROR: [Uint32_tToOctal]Octal field [thb_uid] overflow - width=8 value=3000195''. Not to worry if the jot still gets a successful exit signal from the execution script.&lt;br /&gt;
* Unlike with cput/cget in HSI, &amp;quot;prompt before overwrite&amp;quot;, this is not the default with (h)tar. Be careful not to unintentionally overwrite a previous htar destination file in HPSS. There could be a similar situation when extracting material back into VAST and overwriting the originals. Be sure to double-check the logic in your scripts.&lt;br /&gt;
* Check the HTAR exit code and log file before removing any files from the VAST active filesystems.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
 === HTAR Usage ===&lt;br /&gt;
* To write the ''file1'' and ''file2'' files to a new archive called ''files.tar'' in the default HPSS home directory, and preserve mask attributes (-p), enter:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    htar -cpf files.tar file1 file2&lt;br /&gt;
OR&lt;br /&gt;
    htar -cpf $ARCHIVE/files.tar file1 file2&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* To write a ''subdirA'' to a new archive called ''subdirA.tar'' in the default HPSS home directory, enter:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    htar -cpf subdirA.tar subdirA/&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* To extract all files from the archive file called ''proj1.tar'' in HPSS into the ''project1/src'' directory in VAST, and use the time of extraction as the modification time, enter:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    cd  project1/src&lt;br /&gt;
    htar -xpmf proj1.tar&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* To display the names of the files in the ''out.tar'' archive file within the HPSS home directory, enter (the out.tar.idx file will be queried):&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    htar -vtf out.tar&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* To ensure that both the htar and the .idx files have read permissions to other members in your group use the umask option&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    htar -Humask=0137 ....&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
For more details please check the '''[http://www.mgleicher.us/GEL/htar/ HTAR - Introduction]''' or the '''[http://www.mgleicher.us/GEL/htar/htar_man_page.html HTAR Man Page]''' online&lt;br /&gt;
&lt;br /&gt;
 &lt;br /&gt;
==== Sample tarball create ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash -l&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong &lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J htar_create_tarball_in_hpss&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
&lt;br /&gt;
DEST=$ARCHIVE/finished-job1.tar&lt;br /&gt;
&lt;br /&gt;
# htar WILL overwrite an existing file with the same name so check beforehand.&lt;br /&gt;
 &lt;br /&gt;
hsi ls $DEST &amp;amp;&amp;gt; /dev/null&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
if [ $status == 0 ]; then   &lt;br /&gt;
    echo 'File $DEST already exists. Nothing has been done'&lt;br /&gt;
    exit 1&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
cd $SCRATCH/workarea/ &lt;br /&gt;
htar -Humask=0137 -cpf $DEST finished-job1/ &lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HTAR returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
'''Note:''' If you attempt to start a transfer with any files larger than 68GB the whole HTAR session will fail, and you'll get a notification listing all those files, so that you can transfer them with HSI. &lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
----------------------------------------&lt;br /&gt;
INFO: File too large for htar to handle: finished-job1/file1 (86567185745 bytes)&lt;br /&gt;
INFO: File too large for htar to handle: finished-job1/file2 (71857244579 bytes)&lt;br /&gt;
ERROR: 2 oversize member files found - please correct and retry&lt;br /&gt;
ERROR: [FATAL] error(s) generating filename list &lt;br /&gt;
HTAR: HTAR FAILED&lt;br /&gt;
###WARNING  htar returned non-zero exit status&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Sample tarball list ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash -l&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong &lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J htar_list_tarball_in_hpss&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
&lt;br /&gt;
DEST=$ARCHIVE/finished-job1.tar&lt;br /&gt;
&lt;br /&gt;
htar -tvf $DEST&lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HTAR returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Sample tarball extract ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong &lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J htar_extract_tarball_from_hpss&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
 &lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
 &lt;br /&gt;
cd $SCRATCH/recalled-from-hpss&lt;br /&gt;
htar -xpmf $ARCHIVE/finished-job1.tar&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HTAR returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== HSI ==&lt;br /&gt;
&lt;br /&gt;
HSI may be the primary client with which some users will interact with HPSS. It provides an ftp-like interface for archiving and retrieving tarballs or [https://docs.scinet.utoronto.ca/index.php/HPSS#Sample_transferring_directories directory trees]. In addition it provides a number of shell-like commands that are useful for examining and manipulating the contents in HPSS. The most commonly used commands will be:&lt;br /&gt;
{|border=&amp;quot;1&amp;quot; cellpadding=&amp;quot;10&amp;quot; cellspacing=&amp;quot;0&amp;quot;&lt;br /&gt;
|-&lt;br /&gt;
  | cput &lt;br /&gt;
  | Conditionally saves or replaces a HPSSpath file to VASTpath if the VAST version is new or has been updated&lt;br /&gt;
 cput [options] VASTpath [: HPSSpath]&lt;br /&gt;
|-&lt;br /&gt;
  | cget &lt;br /&gt;
  | Conditionally retrieves a copy of a file from HPSS to VAST only if a VAST version does not already exist. &lt;br /&gt;
 cget [options] [VASTpath :] HPSSpath&lt;br /&gt;
|-&lt;br /&gt;
  | cd,mkdir,ls,rm,mv&lt;br /&gt;
  | Operate as one would expect on the contents of HPSS.&lt;br /&gt;
|-&lt;br /&gt;
  | lcd,lls&lt;br /&gt;
  | ''Local'' commands to VAST&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
*There are 3 distinctions about HSI that you should keep in mind, and that can generate a bit of confusion when you're first learning how to use it:&lt;br /&gt;
** HSI doesn't currently support renaming directories paths during transfers on-the-fly, therefore the syntax for cput/cget may not work as one would expect in some scenarios, requiring some workarounds.&lt;br /&gt;
** HSI has an operator &amp;quot;:&amp;quot; which separates the VASTpath and HPSSpath, and must be surrounded by whitespace (one or more space characters)&lt;br /&gt;
** The order for referring to files in HSI syntax is different from FTP. In HSI the general format is always the same, VAST first, HPSS second, cput or cget:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
     VASTfile : HPSSfile&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
For example, when using HSI to store the tarball file from VAST into HPSS, then recall it to VAST, the following commands could be used:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    cput tarball-in-VAST : tarball-in-HPSS&lt;br /&gt;
    cget tarball-recalled : tarball-in-HPSS&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
unlike with FTP, where the following syntax would be used:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    put tarball-in-VAST tarball-in-HPSS &lt;br /&gt;
    get tarball-in-HPSS tarball-recalled&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
* Simple commands can be executed on a single line.&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi &amp;quot;mkdir LargeFilesDir; cd LargeFilesDir; cput tarball-in-VAST : tarball-in-HPSS&amp;quot;&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* More complex sequences can be performed using an except such as this:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi &amp;lt;&amp;lt;EOF&lt;br /&gt;
      mkdir LargeFilesDir&lt;br /&gt;
      cd LargeFilesDir&lt;br /&gt;
      cput tarball-in-VAST : tarball-in-HPSS&lt;br /&gt;
      lcd $SCRATCH/LargeFilesDir2/&lt;br /&gt;
      cput -Ruph *  &lt;br /&gt;
    end&lt;br /&gt;
    EOF&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* The commands below are equivalent, but we recommend that you always use full path, and organize the contents of HPSS, where the default HSI directory placement is $ARCHIVE:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi cput tarball&lt;br /&gt;
    hsi cput tarball : tarball&lt;br /&gt;
    hsi cput $SCRATCH/tarball : $ARCHIVE/tarball&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* There are no known issues renaming files on-the-fly:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi cput $SCRATCH/tarball1 : $ARCHIVE/tarball2&lt;br /&gt;
    hsi cget $SCRATCH/tarball3 : $ARCHIVE/tarball2&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* However the syntax forms such as the ones below will fail, since they rename the directory paths.&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
   hsi cput -Ruph $SCRATCH/LargeFilesDir : $ARCHIVE/LargeFilesDir     (FAILS)&lt;br /&gt;
OR&lt;br /&gt;
   hsi cget -Ruph $SCRATCH/LargeFilesDir : $ARCHIVE/LargeFilesDir2    (FAILS)&lt;br /&gt;
OR&lt;br /&gt;
   hsi cput -Ruph $SCRATCH/LargeFilesDir/* : $ARCHIVE/LargeFilesDir2  (FAILS)&lt;br /&gt;
OR&lt;br /&gt;
   hsi cget -Ruph $SCRATCH/LargeFilesDir : $ARCHIVE/LargeFilesDir     (FAILS)&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
One workaround is the following 2-steps process, where you do a &amp;quot;lcd &amp;quot; in VAST first, and recursively transfer the whole directory (-R), keeping the same name. You may use '-u' option to resume a previously disrupted session, and the '-p' to  preserve timestamp, and '-h' to keep the links.&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi &amp;lt;&amp;lt;EOF&lt;br /&gt;
      lcd $SCRATCH&lt;br /&gt;
      cget -Ruph LargeFilesDir&lt;br /&gt;
    end&lt;br /&gt;
    EOF&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Another workaround is do a &amp;quot;lcd&amp;quot; into the VASTpath first and a &amp;quot;cd&amp;quot; in the HPSSpath, but transfer the files individually with the '*' wild character. This option lets you change the directory name:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
    hsi &amp;lt;&amp;lt;EOF&lt;br /&gt;
      lcd $SCRATCH/LargeFilesDir&lt;br /&gt;
      mkdir $ARCHIVE/LargeFilesDir2&lt;br /&gt;
      cd $ARCHIVE/LargeFilesDir2&lt;br /&gt;
      cput -Ruph *  &lt;br /&gt;
    end&lt;br /&gt;
    EOF&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Documentation === &lt;br /&gt;
Complete documentation on HSI is available from the Gleicher Enterprises links below. You may peruse those links and come with alternative syntax forms. You may even be already familiar with HPSS/HSI from other HPC facilities, that may or not have procedures similar to ours. HSI doesn't always work as expected when you go outside of our recommended syntax, so '''we strongly urge that you use the sample scripts we are providing as the basis''' for your job submissions&lt;br /&gt;
* [http://www.mgleicher.us/index.html/hsi/hsi_reference_manual_2/introduction.html HSI Introduction] (original site inactive)&lt;br /&gt;
* [http://www.mgleicher.us/index.html/hsi/hsi_man_page.html man hsi] (original site inactive)&lt;br /&gt;
* [https://docs.scinet.utoronto.ca/index.php/HSI_help hsi help]&lt;br /&gt;
* [http://www.mgleicher.us/index.html/hsi/hsi-exit-codes.html exit codes] (original site inactive)&lt;br /&gt;
'''Note:''' HSI returns the highest-numbered exit code, in case of multiple operations in the same hsi session. You may use '/scinet/hpss/bin/exit2msg $status' to translate those codes into intelligible messages&lt;br /&gt;
&lt;br /&gt;
=== Typical Usage Scripts===&lt;br /&gt;
The most common interactions will be ''putting'' data into HPSS, examining the contents (ls,ish), and ''getting'' data back onto VAST for inspection or analysis.&lt;br /&gt;
&lt;br /&gt;
==== Sample data offload ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-offload.sh&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong &lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J offload&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# individual tarballs already exist&lt;br /&gt;
&lt;br /&gt;
/usr/local/bin/hsi  -v &amp;lt;&amp;lt;EOF1&lt;br /&gt;
mkdir put-away&lt;br /&gt;
cd put-away&lt;br /&gt;
cput $SCRATCH/workarea/finished-job1.tar.gz : finished-job1.tar.gz&lt;br /&gt;
end&lt;br /&gt;
EOF1&lt;br /&gt;
status=$?&lt;br /&gt;
if [ ! $status == 0 ];then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
/usr/local/bin/hsi  -v &amp;lt;&amp;lt;EOF2&lt;br /&gt;
mkdir put-away&lt;br /&gt;
cd put-away&lt;br /&gt;
cput $SCRATCH/workarea/finished-job2.tar.gz : finished-job2.tar.gz&lt;br /&gt;
end&lt;br /&gt;
EOF2&lt;br /&gt;
status=$?&lt;br /&gt;
if [ ! $status == 0 ];then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
trap - TERM INT&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Note:''' as in the above example, we recommend that you capture the (highest-numbered) exit code for each hsi session independently. And remember, you may improve your exit code verbosity by adding the excerpt below to your scripts:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
if [ ! $status == 0 ];then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Sample data list ====&lt;br /&gt;
A very trivial way to list the contents of HPSS would be to just submit the HSI 'ls' command.&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-list.sh&lt;br /&gt;
#SBATCH -t 1:00:00&lt;br /&gt;
#SBATCH -p archiveshort&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J hpss_ls&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
/usr/local/bin/hsi -v &amp;lt;&amp;lt;EOF&lt;br /&gt;
cd put-away&lt;br /&gt;
ls -R&lt;br /&gt;
end&lt;br /&gt;
EOF&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
''Warning: if you have a lot of files, the ls command will take a long time to complete. For instance, about 400,000 files can be listed in about an hour. Adjust the walltime accordingly, and be on the safe side.''&lt;br /&gt;
&lt;br /&gt;
However, we provide a much more useful and convenient way to explore the contents of HPSS with the inventory shell [[ISH]]. This example creates an index of all the files in a user's portion of the namespace. The list is placed in the directory /home/$(whoami)/.ish_register that can be inspected from the login nodes.&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-list.sh&lt;br /&gt;
#SBATCH -t 1:00:00&lt;br /&gt;
#SBATCH -p archiveshort&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J hpss_index&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
INDEX_DIR=$HOME/.ish_register&lt;br /&gt;
if ! [ -e &amp;quot;$INDEX_DIR&amp;quot; ]; then&lt;br /&gt;
  mkdir -p $INDEX_DIR&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
export ISHREGISTER=&amp;quot;$INDEX_DIR&amp;quot;&lt;br /&gt;
/scinet/hpss/bin/ish hindex&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
''Note: the above warning on collecting the listing for many files applies here too.''&lt;br /&gt;
&lt;br /&gt;
This index can be browsed or searched with ISH on the development nodes.&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
hpss-archive02-ib:~$  /scinet/hpss/bin/ish ~/.ish_register/hpss.igz &lt;br /&gt;
[ish]hpss.igz&amp;gt; help&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
ISH is a powerful tool that is also useful for creating and browsing indices of tar and htar archives, so please look at the [[ISH|documentation]] or built in help.&lt;br /&gt;
&lt;br /&gt;
==== Sample data recall ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-recall.sh&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J recall_files&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
mkdir -p $SCRATCH/recalled-from-hpss&lt;br /&gt;
&lt;br /&gt;
# individual tarballs previously organized in HPSS inside the put-away-on-2010/ folder&lt;br /&gt;
hsi  -v &amp;lt;&amp;lt; EOF&lt;br /&gt;
cget $SCRATCH/recalled-from-hpss/Jan-2010-jobs.tar.gz : $ARCHIVE/put-away-on-2010/Jan-2010-jobs.tar.gz&lt;br /&gt;
cget $SCRATCH/recalled-from-hpss/Feb-2010-jobs.tar.gz : $ARCHIVE/put-away-on-2010/Feb-2010-jobs.tar.gz&lt;br /&gt;
end&lt;br /&gt;
EOF&lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
trap - TERM INT&lt;br /&gt;
&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
We should emphasize that a single ''cget'' of multiple files (rather than several separate gets) allows HSI to do optimization, as in the following example:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-recall.sh&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J recall_files_optimized&lt;br /&gt;
#SBATCH --mail-type=AL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
mkdir -p $SCRATCH/recalled-from-hpss&lt;br /&gt;
&lt;br /&gt;
# individual tarballs previously organized in HPSS inside the put-away-on-2010/ folder&lt;br /&gt;
hsi  -v &amp;lt;&amp;lt; EOF&lt;br /&gt;
lcd $SCRATCH/recalled-from-hpss/&lt;br /&gt;
cd $ARCHIVE/put-away-on-2010/&lt;br /&gt;
cget Jan-2010-jobs.tar.gz Feb-2010-jobs.tar.gz&lt;br /&gt;
end&lt;br /&gt;
EOF&lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Sample transferring directories ===&lt;br /&gt;
&amp;lt;font color=red&amp;gt;Remember, it's not possible to rename directories or paths on-the-fly:&amp;lt;/font&amp;gt;&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
hsi cget -Ruph $SCRATCH/LargeFiles-recalled : $ARCHIVE/LargeFiles    (FAILS)&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
One workaround is transfer the whole directory (and sub-directories) recursively:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-recall.sh&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J recall_directories&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
mkdir -p $SCRATCH/recalled&lt;br /&gt;
&lt;br /&gt;
hsi  -v &amp;lt;&amp;lt; EOF&lt;br /&gt;
lcd $SCRATCH/recalled&lt;br /&gt;
cd $ARCHIVE/&lt;br /&gt;
cget -Ruph LargeFiles&lt;br /&gt;
end&lt;br /&gt;
EOF&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
Another workaround is to transfer files and subdirectories individually with the &amp;quot;*&amp;quot; wild character:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# This script is named: data-recall.sh&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J recall_directories&lt;br /&gt;
#SBATCH --mail-type=AL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
mkdir -p $SCRATCH/LargeFiles-recalled&lt;br /&gt;
&lt;br /&gt;
hsi  -v &amp;lt;&amp;lt; EOF&lt;br /&gt;
lcd $SCRATCH/LargeFiles-recalled&lt;br /&gt;
cd $ARCHIVE/LargeFiles&lt;br /&gt;
cget -Ruph *&lt;br /&gt;
end&lt;br /&gt;
EOF&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
* For more details please check the '''[http://www.mgleicher.us/GEL/hsi/ HSI Introduction]''', the '''[http://www.mgleicher.us/GEL/hsi/hsi_man_page.html HSI Man Page]''' or the or the [https://support.scinet.utoronto.ca/wiki/index.php/HSI_help '''hsi help''']&lt;br /&gt;
&lt;br /&gt;
== [[ISH|ISH]] ==&lt;br /&gt;
=== [[ISH|Documentation and Usage]] ===&lt;br /&gt;
&lt;br /&gt;
== File and directory management ==&lt;br /&gt;
=== Moving/renaming ===&lt;br /&gt;
* you may use 'mv' or 'cp' in the same way as the linux version.&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J deletion_script&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
echo &amp;quot;HPSS file and directory management&amp;quot;&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
&lt;br /&gt;
/usr/local/bin/hsi -v &amp;lt;&amp;lt;EOF1&lt;br /&gt;
    mkdir $ARCHIVE/2011&lt;br /&gt;
    mv $ARCHIVE/oldjobs $ARCHIVE/2011&lt;br /&gt;
    cp -r $ARCHIVE/almostfinished/*done $ARCHIVE/2011&lt;br /&gt;
end&lt;br /&gt;
EOF1&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Deletions ===&lt;br /&gt;
==== Recommendations ====&lt;br /&gt;
* Be careful with the use of 'cd' commands to non-existing directories before the 'rm' command. Results may be unpredictable&lt;br /&gt;
* Avoid the use of the stand alone wild character '''*'''. If necessary, whenever possible have it bound to common patterns, such as '*.tmp', so to limit unintentional mis-happens&lt;br /&gt;
* Avoid using relative paths, even the env variable $ARCHIVE. Better to explicitly expand the full paths in your scripts&lt;br /&gt;
* Avoid using recursive/looped deletion instructions on $SCRATCH contents from the archive job scripts. Even on $ARCHIVE contents, it may be better to do it as an independent job submission, after you have verified that the original ingestion into HPSS finished without any issues.&lt;br /&gt;
&lt;br /&gt;
==== Typical example ====&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J deletion_script&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
echo &amp;quot;Deletion of an outdated directory tree into HPSS&amp;quot;&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
&lt;br /&gt;
/usr/local/bin/hsi -v &amp;lt;&amp;lt;EOF1&lt;br /&gt;
    rm /archive/scinet/pinto/*.tmp&lt;br /&gt;
    rm -R /archive/scinet/pinto/obsolete&lt;br /&gt;
end&lt;br /&gt;
EOF1&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Deleting with an interactive HSI session ====&lt;br /&gt;
* You may feel more comfortable acquiring an interactive shell, starting an HSI session and proceeding with your deletions that way. Keep in mind, you're restricted to 1H.&lt;br /&gt;
&lt;br /&gt;
* After using the ''sbatch -p archiveshort'' command you'll get a standard shell prompt on an archive execution node (hpss-archive02), as you would on any compute node. However you will need to run '''HSI''' or '''HTAR''' to access resources on HPSS. &lt;br /&gt;
&lt;br /&gt;
* HSI will give you a prompt very similar to a standard shell, where your can navigate around using commands such 'ls', 'cd', 'pwd', etc ... NOTE: not every bash command has an equivalent on HSI -  for instance, you can not 'vi' or 'cat'.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
pinto@nia-login07:~$ salloc -p archiveshort -t 1:00:00&lt;br /&gt;
salloc: Granted job allocation 50359&lt;br /&gt;
salloc: Waiting for resource configuration&lt;br /&gt;
salloc: Nodes hpss-archive02-ib are ready for job&lt;br /&gt;
&lt;br /&gt;
hpss-archive02-ib:~$ hsi&lt;br /&gt;
******************************************************************&lt;br /&gt;
*     Welcome to HPSS@SciNet - High Perfomance Storage System    *&lt;br /&gt;
*                                                                * &lt;br /&gt;
*            INFO: THIS IS THE NEW 7.5.1 HPSS SYSTEM!            *&lt;br /&gt;
*                                                                *&lt;br /&gt;
*        Contact Information: support@scinet.utoronto.ca         *&lt;br /&gt;
*  NOTE: do not transfer SMALL FILES with HSI. Use HTAR instead  *&lt;br /&gt;
*              CHECK THE INTEGRITY OF YOUR TARBALLS              *&lt;br /&gt;
******************************************************************&lt;br /&gt;
&lt;br /&gt;
[HSI]/archive/scinet/pinto-&amp;gt; rm -R junk&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== HPSS for the 'Watchmaker' ==&lt;br /&gt;
=== Efficient alternative to htar ===&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J tar_create_tarball_in_hpss_with_hsi_by_piping&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
&lt;br /&gt;
# When using a pipeline like this&lt;br /&gt;
set -o pipefail &lt;br /&gt;
&lt;br /&gt;
# to put (cput will fail)&lt;br /&gt;
tar -c $SCRATCH/mydir | hsi put - : $ARCHIVE/mydir.tar&lt;br /&gt;
status=$?&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'TAR+HSI+piping returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
# to immediately generate an index&lt;br /&gt;
ish hindex $ARCHIVE/mydir.tar&lt;br /&gt;
status=$?&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'ISH returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
# to get&lt;br /&gt;
#cd $SCRATCH&lt;br /&gt;
#hsi cget - : $ARCHIVE/mydir.tar | tar -xv &lt;br /&gt;
#status=$?&lt;br /&gt;
# if [ ! $status == 0 ]; then&lt;br /&gt;
#   echo 'TAR+HSI+piping returned non-zero code.'&lt;br /&gt;
#   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
#   exit $status&lt;br /&gt;
#else&lt;br /&gt;
#   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
#fi&lt;br /&gt;
&lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
'''Notes:''' &lt;br /&gt;
* Combining commands in this fashion, besides being HPSS-friendly, should not be that noticeably slower than the recursive put with HSI that stores each file one by one. However, reading the files back from tape in this format will be many times faster. It would also overcome the current 68GB limit on the size of stored files that we have with htar.&lt;br /&gt;
* To top things off, we recommend indexing with ish (in the same script) immediately after the tarball creation , while it resides in the HPSS cache. It would be as if htar was used.&lt;br /&gt;
* To ensure that an error at any stage of the pipeline shows up in the returned status use: ''set -o pipefail'' (The default is to return the status of the last command in the pipeline and this is not what you want.)&lt;br /&gt;
* Optimal performance for aggregated transfers and allocation on tapes is obtained with [[Why not tarballs too large |&amp;lt;font color=red&amp;gt;tarballs of size 500GB or less&amp;lt;/font&amp;gt;]], whether ingested by htar or hsi ([[Why not tarballs too large | &amp;lt;font color=red&amp;gt;WHY?&amp;lt;/font&amp;gt;]]). Be sure to check the contents of the directory tree with 'du' for the total amount of data before  sending them to the tar+HSI piping.&lt;br /&gt;
&lt;br /&gt;
=== Multi-threaded gzip'ed compression with pigz ===&lt;br /&gt;
We compiled multi-threaded implementation of gzip called pigz (http://zlib.net/pigz/). It's now part of the &amp;quot;extras&amp;quot; module. It can also be used on any compute or devel nodes. This makes the execution of the previous version of the script much quicker than if you were to use 'tar -cfz'. In addition, by piggy-backing ISH to the end of the script, it will know what to do with the just created mydir.tar.gz compressed tarball.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J tar_create_compressed_tarball_in_hpss_with_hsi_by_piping&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
 &lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
&lt;br /&gt;
# When using a pipeline like this&lt;br /&gt;
set -o pipefail &lt;br /&gt;
&lt;br /&gt;
# to put (cput will fail)&lt;br /&gt;
tar -c $SCRATCH/mydir | pigz | hsi put - : $ARCHIVE/mydir.tar.gz&lt;br /&gt;
status=$?&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'TAR+PIGZ+HSI+piping returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
NOTE: Do not use this type of pipeline to serialize recalls with HSI inside a for loop. This is a very inefficient way for HPSS to handle this type of operation, since it forces the system to fetch the tarballs using your sequence on the loop, or a sequence based on a numeric/alphabetic/chronological order from the tapes. Files are not necessarily written to tapes in the same sequence in which they are ingested. There will be many unnecessary mounts/dismounts involved, and a lot of fast forward and rewinds, depending on where the files are on the tapes, which can be extremely stressful to the hardware. This effect is know a &amp;quot;shoe-shinning&amp;quot;.&lt;br /&gt;
&lt;br /&gt;
Instead, provide HSI with a full list of tarballs to be recalled, all at once, and the HSI will sort the list in the most convenient order possible, in the sequence that the files are laid out on the tapes, and fetch all the files from each tape at once, in one go.&lt;br /&gt;
Afterwards, once all the tarballs are back onto your $SCRATCH, you may use a loop to unpigz the tarballs&lt;br /&gt;
&lt;br /&gt;
=== Content Verification ===&lt;br /&gt;
&lt;br /&gt;
==== HTAR CRC checksums ====&lt;br /&gt;
Specifies that HTAR should generate CRC checksums when creating the archive.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J htar_create_tarball_in_hpss_with_checksum_verification&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
trap &amp;quot;echo 'Job script not completed';exit 129&amp;quot; TERM INT&lt;br /&gt;
# Note that your initial directory in HPSS will be $ARCHIVE&lt;br /&gt;
 &lt;br /&gt;
cd $SCRATCH/workarea&lt;br /&gt;
&lt;br /&gt;
# to put&lt;br /&gt;
htar -Humask=0137 -cpf $ARCHIVE/finished-job1.tar -Hcrc -Hverify=1 finished-job1/&lt;br /&gt;
&lt;br /&gt;
# to get&lt;br /&gt;
#mkdir $SCRATCH/verification&lt;br /&gt;
#cd $SCRATCH/verification&lt;br /&gt;
#htar -Hcrc -xvpmf $ARCHIVE/finished-job1.tar &lt;br /&gt;
&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
trap - TERM INT&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HTAR returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Current HSI version - Checksum built-in ====&lt;br /&gt;
&lt;br /&gt;
MD5 is the standard Hashing Algorithm for the HSI build at SciNet. For hsi ingestions with the '-c on' option you should be able to query the md5 hash with the hsi command 'hashli'. That value is stored as an UDA (User Defined Attribute) for each file (a feature of HPSS starting with 7.4)&lt;br /&gt;
&lt;br /&gt;
[http://www.mgleicher.us/GEL/hsi/hsi/hsi_reference_manual_2/checksum-feature.html More usage details here]&lt;br /&gt;
&lt;br /&gt;
The checksum algorithm is very CPU-intensive. Although the checksum code is compiled with a high level of compiler optimization, transfer rates can be significantly reduced when checksum creation or verification is in effect. The amount of degradation in transfer rates depends on several factors, such as  processor speed, network transfer speed, and speed of the local filesystem (VAST).&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J MD5_checksum_verified_transfer&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
thefile=&amp;lt;VASTpath&amp;gt;&lt;br /&gt;
storedfile=&amp;lt;HPSSpath&amp;gt;&lt;br /&gt;
&lt;br /&gt;
# Generate checksum on fly (-c on)&lt;br /&gt;
hsi -q put -c on $thefile : $storedfile&lt;br /&gt;
pid=$!&lt;br /&gt;
&lt;br /&gt;
# Check the exit code of the HSI process  &lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
# verify checksum&lt;br /&gt;
hsi lshash $storedfile&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
# get the file back with checksum&lt;br /&gt;
hsi get -c on $storedfile&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
==== Prior to HSI version 4.0.1.1 ====&lt;br /&gt;
&lt;br /&gt;
This will checksum the contents of the HPSSpath against the original VASTpath after the transfer has finished.&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
#SBATCH -t 72:00:00&lt;br /&gt;
#SBATCH -p archivelong&lt;br /&gt;
#SBATCH -N 1&lt;br /&gt;
#SBATCH -J checksum_verified_transfer&lt;br /&gt;
#SBATCH --mail-type=ALL&lt;br /&gt;
&lt;br /&gt;
thefile=&amp;lt;VASTpath&amp;gt;&lt;br /&gt;
storedfile=&amp;lt;HPSSpath&amp;gt;&lt;br /&gt;
&lt;br /&gt;
# Generate checksum on fly using a named pipe so that file is only read from VAST once&lt;br /&gt;
mkfifo /tmp/NPIPE&lt;br /&gt;
cat $thefile  | tee /tmp/NPIPE | hsi -q put - : $storedfile &amp;amp;&lt;br /&gt;
pid=$!&lt;br /&gt;
md5sum /tmp/NPIPE |tee /tmp/$fname.md5&lt;br /&gt;
rm -f  /tmp/NPIPE&lt;br /&gt;
&lt;br /&gt;
# Check the exit code of the HSI process  &lt;br /&gt;
wait $pid&lt;br /&gt;
status=$?&lt;br /&gt;
&lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&lt;br /&gt;
# change filename to stdin in checksum file&lt;br /&gt;
sed -i.1 &amp;quot;s+/tmp/NPIPE+-+&amp;quot; /tmp/$fname.md5&lt;br /&gt;
&lt;br /&gt;
# verify checksum&lt;br /&gt;
hsi -q get - : $storedfile  | md5sum -c  /tmp/$fname.md5&lt;br /&gt;
status=$?&lt;br /&gt;
 &lt;br /&gt;
if [ ! $status == 0 ]; then&lt;br /&gt;
   echo 'HSI returned non-zero code.'&lt;br /&gt;
   /scinet/hpss/bin/exit2msg $status&lt;br /&gt;
   exit $status&lt;br /&gt;
else&lt;br /&gt;
   echo 'TRANSFER SUCCESSFUL'&lt;br /&gt;
fi&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
=== Splitting tarballs ===&lt;br /&gt;
&lt;br /&gt;
By knowing the size of your files or directories you can decide how to divide and organize them in different archives if necessary, so not to create huge tarballs to start with. &lt;br /&gt;
&lt;br /&gt;
However, if you find it more convenient to bundle a whole directory into a big tarball, you can still divide it into small chunks prior to ingesting them into HPSS, by using the following syntax:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;code&amp;gt;split -b &amp;lt;Size-in-GB&amp;gt; &amp;lt;tar-file-name&amp;gt; &amp;lt;prefix-name&amp;gt;&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;console&amp;quot;&amp;gt;&lt;br /&gt;
$ split -b 500GB results.tar small-chunk&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
The option '''b''' is invoked to fix the size of the small chunks; and '''prefix-name''' is the base name for the small chunks. The above command will split '''results.tar''' into 500 GB chunks in current working directory. The small chunks will starts from: small-chunka small-chunkb small-chunkc small-chunkd  .... etc. You may then ingest them into HPSS using any of the 3 methods describe above (HSI, VFS, Globus).&lt;br /&gt;
&lt;br /&gt;
To rebuild the original tarball, recall the individual chunks from HPSS and use the &amp;lt;code&amp;gt;cat&amp;lt;/code&amp;gt; command as follows:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;console&amp;quot;&amp;gt;&lt;br /&gt;
$ cat small-chunk* &amp;gt; your_archive_name.tar&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
In case you want to append numbers in place of alphabets, use '''-d''' option in above split command.&lt;br /&gt;
&lt;br /&gt;
== Access to HPSS using Globus ==&lt;br /&gt;
* You may now transfer data between SciNet's HPSS and an external source&lt;br /&gt;
* Follow the link below &lt;br /&gt;
  https://globus.alliancecan.ca&lt;br /&gt;
: Enter your {{Alliance}} username and password (and use your MFA).&lt;br /&gt;
* In the 'File Manager' tab, enter ''''alliancecan#hpss'''' as one of the Endpoints. If you are seeing 'Missing required data_access_consent',  authenticate this endpoint by clicking on 'Continue', enter your username and password once more.&lt;br /&gt;
* You may read more about {{Alliance}}} Globus Portal here:&lt;br /&gt;
  https://docs.alliancecan.ca/wiki/Globus&lt;br /&gt;
&lt;br /&gt;
== User provided Content/Suggestions ==&lt;br /&gt;
&lt;br /&gt;
== [[HPSS-by-pomes|Packing up large data sets and putting them on HPSS]] ==&lt;br /&gt;
(Pomés group recommendations)&lt;br /&gt;
&lt;br /&gt;
[[Data Management|BACK TO Data Management]]&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7175</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7175"/>
		<updated>2025-10-07T00:07:38Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up3 | Scheduler|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | HPSS|HPSS}}&lt;br /&gt;
|{{Up3 | Login Nodes|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up  | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up3 | Cvmfs|https://docs.alliancecan.ca/wiki/Standard_software_environments}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 06, 2025, 8:00 pm:''' HPSS is fully functional. You may submit archive jobs from trillium login nodes, datamovers and robots.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. Directory tree now follows the other Alliance clusters. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7172</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7172"/>
		<updated>2025-10-03T22:37:44Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up3 | Scheduler|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Partial | HPSS|HPSS}}&lt;br /&gt;
|{{Up3 | Login Nodes|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up  | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up3 | Cvmfs|https://docs.alliancecan.ca/wiki/Standard_software_environments}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. Directory tree now follows the other Alliance clusters. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7169</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7169"/>
		<updated>2025-10-03T22:35:45Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up3 | Scheduler|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Partial | HPSS|HPSS}}&lt;br /&gt;
|{{Up3 | Login Nodes|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up  | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up3 | Cvmfs|https://docs.alliancecan.ca/wiki/Standard_software_environments}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 03, 2025, 6:30 pm:''' HPSS is back online, and already accessible via alliancecan#hpss Globus endpoint. We're still working on job submission via Slurm&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7166</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7166"/>
		<updated>2025-10-03T22:33:08Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up3 | Scheduler|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Partial | HPSS|HPSS}}&lt;br /&gt;
|{{Up3 | Login Nodes|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up  | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up3 | Cvmfs|https://docs.alliancecan.ca/wiki/Standard_software_environments}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7163</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7163"/>
		<updated>2025-10-03T22:26:52Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up3 | Scheduler|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Submitting_jobs_to_the_scheduler}}&lt;br /&gt;
|{{Up3 | File system|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Storage}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up3 | Login Nodes|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up3 | External Network|https://docs.alliancecan.ca/wiki/Trillium_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up  | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up3 | Cvmfs|https://docs.alliancecan.ca/wiki/Standard_software_environments}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 0:00 am:''' Niagara compute nodes are now unavailable for regular users. The login nodes will remain available for a while to allow a few last data transfers, although transfers from the Niagara file systems to Trillium are best done on nia-dm1.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7151</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7151"/>
		<updated>2025-10-01T13:59:20Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Partial | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Partial | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | Cvmfs|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance, including alliancecan#hpss Globus endpoint&lt;br /&gt;
&lt;br /&gt;
'''Thu Sep 18, 2025, 11:30 am:''' Open OnDemand is fully functional again.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 17, 2025, 6:00 pm:''' Niagara is back up as well (including its Globus endpoint).  We are still working on the other systems.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 17, 2025, 1:40 pm:''' Trillium is back up (except for its Globus endpoint).  We are working on the other systems still.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, 5:45 pm:''' Unfortunately, we cannot bring all systems up yet because we are waiting for a spare part for the cooling system that will be brought tomorrow.  In the meantime, we have managed to keep the Trillium login nodes up, but not other systems.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, from 7:00 am to 5:00 pm (EDT):''' The SciNet datacentre will undergo maintenance of several critical parts of the centre.  This will require a full shutdown of all SciNet systems (Trillium, Niagara, Mist, HPSS, Rouge, Teach, as well as hosted equipment). This will also be the time that the Mist cluster gets decommissioned. &lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 12 22:03:17 EDT 2025:''' HPSS software and OS upgrades are finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep  9 17:05:38 EDT 2025:''' Starting tomorrow, Sep/10, and for the following 3 days HPSS will be down for software and OS upgrades. We will strive to finish sooner, at which time we will make the system available to users again.&lt;br /&gt;
&lt;br /&gt;
===Mist/Niagara Decommissioning Schedule===&lt;br /&gt;
&lt;br /&gt;
'''September 4, 2025'''&lt;br /&gt;
* Niagara reduced to 863 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 9, 2025'''&lt;br /&gt;
* Niagara's Open OnDemand decommissioned.&lt;br /&gt;
* Brief data centre connection outage at 9 AM EDT&lt;br /&gt;
* Niagara reduced to 647 compute nodes at end of day.&lt;br /&gt;
&lt;br /&gt;
'''September 11, 2025'''&lt;br /&gt;
* Trillium Open OnDemand goes live.&lt;br /&gt;
&lt;br /&gt;
'''September 16, 2025'''&lt;br /&gt;
* '''Full-day data centre maintenance'''&lt;br /&gt;
* Niagara reduced to 431 compute nodes.&lt;br /&gt;
* Mist decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''September 24, 2025'''&lt;br /&gt;
* Niagara reduced to 215 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 30, 2025'''&lt;br /&gt;
* Niagara decommissioned.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7148</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7148"/>
		<updated>2025-10-01T13:58:05Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Partial | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Up | Teach|Teach}}&lt;br /&gt;
|{{Up | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | Cvmfs|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 01, 2025, 9:30 am:''' HPSS is down for scheduled maintenance.&lt;br /&gt;
&lt;br /&gt;
'''Thu Sep 18, 2025, 11:30 am:''' Open OnDemand is fully functional again.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 17, 2025, 6:00 pm:''' Niagara is back up as well (including its Globus endpoint).  We are still working on the other systems.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 17, 2025, 1:40 pm:''' Trillium is back up (except for its Globus endpoint).  We are working on the other systems still.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, 5:45 pm:''' Unfortunately, we cannot bring all systems up yet because we are waiting for a spare part for the cooling system that will be brought tomorrow.  In the meantime, we have managed to keep the Trillium login nodes up, but not other systems.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, from 7:00 am to 5:00 pm (EDT):''' The SciNet datacentre will undergo maintenance of several critical parts of the centre.  This will require a full shutdown of all SciNet systems (Trillium, Niagara, Mist, HPSS, Rouge, Teach, as well as hosted equipment). This will also be the time that the Mist cluster gets decommissioned. &lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 12 22:03:17 EDT 2025:''' HPSS software and OS upgrades are finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep  9 17:05:38 EDT 2025:''' Starting tomorrow, Sep/10, and for the following 3 days HPSS will be down for software and OS upgrades. We will strive to finish sooner, at which time we will make the system available to users again.&lt;br /&gt;
&lt;br /&gt;
===Mist/Niagara Decommissioning Schedule===&lt;br /&gt;
&lt;br /&gt;
'''September 4, 2025'''&lt;br /&gt;
* Niagara reduced to 863 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 9, 2025'''&lt;br /&gt;
* Niagara's Open OnDemand decommissioned.&lt;br /&gt;
* Brief data centre connection outage at 9 AM EDT&lt;br /&gt;
* Niagara reduced to 647 compute nodes at end of day.&lt;br /&gt;
&lt;br /&gt;
'''September 11, 2025'''&lt;br /&gt;
* Trillium Open OnDemand goes live.&lt;br /&gt;
&lt;br /&gt;
'''September 16, 2025'''&lt;br /&gt;
* '''Full-day data centre maintenance'''&lt;br /&gt;
* Niagara reduced to 431 compute nodes.&lt;br /&gt;
* Mist decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''September 24, 2025'''&lt;br /&gt;
* Niagara reduced to 215 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 30, 2025'''&lt;br /&gt;
* Niagara decommissioned.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Data_Management&amp;diff=7112</id>
		<title>Data Management</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Data_Management&amp;diff=7112"/>
		<updated>2025-09-24T21:24:14Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* Using mmputacl/mmgetacl */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;Understanding the various file systems, and how to use them properly, is critical to optimizing your workflow and being a good SciNet citizen.  This page describes the various Niagara file systems, and how to properly use them.&lt;br /&gt;
&lt;br /&gt;
=Performance=&lt;br /&gt;
The file systems on SciNet, with the exception of archive, are [http://en.wikipedia.org/wiki/IBM_General_Parallel_File_System GPFS], a high-performance file system which provides rapid reads and writes to large datasets in parallel from many nodes.  As a consequence of this design, however, '''the file system performs quite ''poorly'' at accessing data sets which consist of many, small files.'''  For instance, you will find that reading data in from one 16MB file is enormously faster than from 400 40KB files. Such small files are also quite wasteful of space, as the [https://en.wikipedia.org/wiki/Block_(data_storage) blocksize] for the scratch and project filesystems is 16MB. This is something you should keep in mind when planning your input/output strategy for runs on SciNet.&lt;br /&gt;
&lt;br /&gt;
For instance, if you run multi-process jobs, having each process write to a file of its own is not an scalable I/O solution. A directory gets locked by the first process accessing it, so all other processes have to wait for it. Not only has the code just become considerably less parallel, chances are the file system will have a time-out while waiting for your other processes, leading your program to crash mysteriously.&lt;br /&gt;
Consider using MPI-IO (part of the MPI-2 standard), which allows files to be opened simultaneously by different processes, or using a dedicated process for I/O to which all other processes send their data, and which subsequently writes this data to a single file.&lt;br /&gt;
&lt;br /&gt;
= Purpose of each file system =&lt;br /&gt;
&lt;br /&gt;
Niagara, Mist and Rouge accesses several different file systems.  Note that not all of these file systems are available to all users.&lt;br /&gt;
&lt;br /&gt;
== /home ($HOME) ==&lt;br /&gt;
/home is intended primarily for individual user files, common software or small datasets used by others in the same group, provided it does not exceed individual quotas. Otherwise you may consider /scratch or /project. /home is read-only on the compute nodes and has daily backups.&lt;br /&gt;
&lt;br /&gt;
== /scratch ($SCRATCH) ==&lt;br /&gt;
/scratch is to be used primarily for temporary or transient files, checkpoint dumps, for all the results of your computations and simulations, or any material that can be easily recreated or reacquired. You may use scratch as well for any intermediate step in your workflow, provided it does not induce too much I/O (Input/Output) or too many small files on this disk-based storage pool, otherwise you should consider burst buffer (/bb). Once you have your final results, those that you want to keep for the long term, you may migrate them to /project or /archive. /scratch is purged on a regular basis and has no backups.&lt;br /&gt;
&lt;br /&gt;
== /project ($PROJECT) ==&lt;br /&gt;
/project is available to groups whose PIs have a storage allocation, and is intended for common group software, large static datasets, or any material very costly to be reacquired or re-generated by the group, associated with jobs currently running on niagara or mist. &amp;lt;font color=red&amp;gt;Material on /project is expected to remain relatively immutable over time.&amp;lt;/font&amp;gt; '''You should think of $PROJECT as if it was $STATIC.''' Temporary or transient files should be kept on scratch, not project. High data turnover induces stress and unnecessary consumption of tapes on the TSM backup system, long after this material has been deleted, due to backup retention policies and the extra versions kept of the same file. Even renaming top directories is enough to trick the system into assuming a completely new directory tree has been created, and the old one deleted, hence think carefully about your naming convention ahead of time, and stick with it. Users abusing the project file system and using it as scratch will be flagged and contacted. Note that on niagara /project is only available to groups with RAC allocation.&lt;br /&gt;
&lt;br /&gt;
== /bb ($BBUFFER) ==&lt;br /&gt;
/bb, the [[Burst_Buffer| burst buffer]], is a very fast, very high performance alternative to /scratch, made of solid-state drives (SSD). You may request this resource if you anticipate a lot of IOPs (Input/Output Operations) or when you notice your job is not performing well running on scratch or project because of I/O (Input/Output) bottlenecks. See [[Burst_Buffer|here]] for more details.&lt;br /&gt;
&lt;br /&gt;
== /archive ($ARCHIVE) ==&lt;br /&gt;
/archive is available to groups whose PIs have a storage allocation, and on niagara it is the 'nearline' storage pool. It's used if you want to temporarily offload semi-active material from any of the above file systems. In practice users will offload/recall material as part of their regular workflow, or when they hit their quotas on scratch or project. That material can remain on HPSS for a few months to a few years. Note that on niagara /archive is only available to groups with RAC allocation.&lt;br /&gt;
&lt;br /&gt;
== /dev/shm (RAM) ==&lt;br /&gt;
On the Niagara nodes a [[User_Ramdisk | ramdisk]] is available. [[User_Ramdisk | Ramdisk]] is much faster than real disk, and faster than Burst Buffer. Up to 70 percent of the RAM on the node (i.e. 202GB) may be used as a temporary '''local''' file system. This is particularly useful in the early stages of migrating desktop-computing codes to a HPC platform such as Niagara, especially those that use a lot of file I/O (Input/Output). Using a lot of I/O is a bottleneck in large scale computing, especially on parallel file systems (such as the GPFS used on Niagara), since the files are synchronized across the whole network.&lt;br /&gt;
&lt;br /&gt;
== $SLURM_TMPDIR (RAM) ==&lt;br /&gt;
For consistency with the general purpose clusters (Cedar, Graham, Beluga and Narval), the environment variable $SLURM_TMPDIR will be set on Niagara compute jobs. Note that this variable will point to RAMdisk, not to local hard drives. The $SLURM_TMPDIR directory will be empty when your jobs starts and its content gets deleted after the job has finished. &lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--&lt;br /&gt;
== Per-job temporary burst buffer space ($BB_JOB_DIR) ==&lt;br /&gt;
For every job on Niagara, the scheduler creates a temporary directory on the burst buffer called &amp;lt;tt&amp;gt;$BB_JOB_DIR&amp;lt;/tt&amp;gt;.  The &amp;lt;tt&amp;gt;$BB_JOB_DIR&amp;lt;/tt&amp;gt; directory will be empty when your jobs starts and its content gets deleted after the job has finished.  This directory is accessible from all nodes of a job.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;tt&amp;gt;$BB_JOB_DIR&amp;lt;/tt&amp;gt; is intended as a place for applications that generate many small temporary files or that create files that are accessed very frequently (i.e., high IOPS applications), but that do not fit in ramdisk.&lt;br /&gt;
&lt;br /&gt;
It should be emphasized that if the temporary files do fit in ramdisk, then that is generally a better location for them as both the bandwidth and iops of ramdisk far exceeds that of the burst buffer.  To use ramdisk, you can either directly access /dev/shm or use the environment variable &amp;lt;tt&amp;gt;$SLURM_TMPDIR&amp;lt;/tt&amp;gt;. &lt;br /&gt;
--&amp;gt;&lt;br /&gt;
Note that Niagara compute nodes have no local disks, so &amp;lt;tt&amp;gt;$SLURM_TMPDIR&amp;lt;/tt&amp;gt; lives in memory (ramdisk), in contrast to the general purpose systems of the {{Alliance}}, i.e., Cedar, Graham, Beluga and Narval, where this variable points to a directory on a node-local ssd disk.&lt;br /&gt;
&lt;br /&gt;
= Quotas and purging =&lt;br /&gt;
&lt;br /&gt;
You should familiarize yourself with the [[Data_Management#Purpose_of_each_file_system | various file systems]], what purpose they serve, and how to properly use them.  This table summarizes the various file systems.  &lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
! location&lt;br /&gt;
!colspan=&amp;quot;2&amp;quot;| quota&lt;br /&gt;
!align=&amp;quot;right&amp;quot;| block size&lt;br /&gt;
! expiration time&lt;br /&gt;
! backed up&lt;br /&gt;
! on login nodes&lt;br /&gt;
! on compute nodes&lt;br /&gt;
|-&lt;br /&gt;
| $HOME&lt;br /&gt;
|colspan=&amp;quot;2&amp;quot;| 100 GB / 250,000 files per user&lt;br /&gt;
|align=&amp;quot;right&amp;quot;| 1 MB&lt;br /&gt;
| &lt;br /&gt;
| yes&lt;br /&gt;
| yes&lt;br /&gt;
| read-only&lt;br /&gt;
|-&lt;br /&gt;
|rowspan=&amp;quot;6&amp;quot;| $SCRATCH&lt;br /&gt;
|colspan=&amp;quot;2&amp;quot;| 25 TB / 6,000,000 files per user provided group quota is not reached&lt;br /&gt;
|align=&amp;quot;right&amp;quot; rowspan=&amp;quot;6&amp;quot; | 16 MB&lt;br /&gt;
|rowspan=&amp;quot;6&amp;quot;| 2 months&lt;br /&gt;
|rowspan=&amp;quot;6&amp;quot;| no&lt;br /&gt;
|rowspan=&amp;quot;6&amp;quot;| yes&lt;br /&gt;
|rowspan=&amp;quot;6&amp;quot;| yes&lt;br /&gt;
|-&lt;br /&gt;
|align=&amp;quot;right&amp;quot;|groups of up to 4 users&lt;br /&gt;
|align=&amp;quot;right&amp;quot;|50TB for the group&lt;br /&gt;
|-&lt;br /&gt;
|align=&amp;quot;right&amp;quot;|groups of up to 11 users&lt;br /&gt;
|align=&amp;quot;right&amp;quot;|125TB for the group&lt;br /&gt;
|-&lt;br /&gt;
|align=&amp;quot;right&amp;quot;|groups of up to 28 users&lt;br /&gt;
|align=&amp;quot;right&amp;quot;|250TB for the group&lt;br /&gt;
|-&lt;br /&gt;
|align=&amp;quot;right&amp;quot;|groups of up to 60 users&lt;br /&gt;
|align=&amp;quot;right&amp;quot;|400TB for the group&lt;br /&gt;
|-&lt;br /&gt;
|align=&amp;quot;right&amp;quot;|groups with over 60 users&lt;br /&gt;
|align=&amp;quot;right&amp;quot;|500TB for the group&lt;br /&gt;
|-&lt;br /&gt;
| $PROJECT&lt;br /&gt;
|colspan=&amp;quot;2&amp;quot;| by group allocation&lt;br /&gt;
|align=&amp;quot;right&amp;quot;| 16 MB&lt;br /&gt;
| &lt;br /&gt;
| yes&lt;br /&gt;
| yes&lt;br /&gt;
| yes&lt;br /&gt;
|-&lt;br /&gt;
| $ARCHIVE&lt;br /&gt;
|colspan=&amp;quot;2&amp;quot;| by group allocation&lt;br /&gt;
|align=&amp;quot;right&amp;quot;| &lt;br /&gt;
|&lt;br /&gt;
| dual-copy&lt;br /&gt;
| no&lt;br /&gt;
| no&lt;br /&gt;
|-&lt;br /&gt;
| $BBUFFER&lt;br /&gt;
|colspan=&amp;quot;2&amp;quot;| 10 TB per user&lt;br /&gt;
|align=&amp;quot;right&amp;quot;| 1 MB&lt;br /&gt;
| very short&lt;br /&gt;
| no&lt;br /&gt;
| yes&lt;br /&gt;
| yes&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
&amp;lt;ul&amp;gt;&lt;br /&gt;
&amp;lt;li&amp;gt;[https://docs.scinet.utoronto.ca/images/9/9a/Inode_vs._Space_quota_-_v2x.pdf Inode vs. Space quota (PROJECT and SCRATCH)]&amp;lt;/li&amp;gt;&lt;br /&gt;
&amp;lt;li&amp;gt;[https://docs.scinet.utoronto.ca/images/0/0e/Scratch-quota.pdf dynamic quota per group (SCRATCH)]&amp;lt;/li&amp;gt;&lt;br /&gt;
&amp;lt;li&amp;gt;Compute nodes do not have local storage.&amp;lt;/li&amp;gt;&lt;br /&gt;
&amp;lt;li&amp;gt;Archive space is on [[HPSS|HPSS]], and is not accessible on the Niagara login, compute, or datamover nodes.&amp;lt;/li&amp;gt;&lt;br /&gt;
&amp;lt;li&amp;gt;Backup means a recent snapshot, not a replica of all data or version that ever was.&amp;lt;/li&amp;gt;&lt;br /&gt;
&amp;lt;li&amp;gt;&amp;lt;p&amp;gt;&amp;lt;code&amp;gt;$BBUFFER&amp;lt;/code&amp;gt; stands for the [[Burst Buffer]], a faster parallel storage tier for temporary data.&amp;lt;/p&amp;gt;&amp;lt;/li&amp;gt;&amp;lt;/ul&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== How much Disk Space Do I have left? ==&lt;br /&gt;
&lt;br /&gt;
The &amp;lt;tt&amp;gt;'''/scinet/niagara/bin/diskUsage'''&amp;lt;/tt&amp;gt; command, available on the login nodes and datamovers, provides information in a number of ways on the home, scratch, project and archive file systems. For instance, how much disk space is being used by yourself and your group (with the -a option), or how much your usage has changed over a certain period (&amp;quot;delta information&amp;quot;) or you may generate plots of your usage over time. Please see the usage help below for more details.&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
Usage: diskUsage [-h|-?| [-a] [-u &amp;lt;user&amp;gt;]&lt;br /&gt;
       -h|-?: help&lt;br /&gt;
       -a: list usages of all members on the group&lt;br /&gt;
       -u &amp;lt;user&amp;gt;: as another user on your group&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Did you know that you can check which of your directories have more than 1000 files with the &amp;lt;tt&amp;gt;'''/scinet/niagara/bin/topUserDirOver1000list'''&amp;lt;/tt&amp;gt; command and which have more than 1GB of material with the &amp;lt;tt&amp;gt;'''/scinet/niagara/bin/topUserDirOver1GBlist'''&amp;lt;/tt&amp;gt; command?&lt;br /&gt;
&lt;br /&gt;
Note: information on usage and quota is only updated every 3 hours!&lt;br /&gt;
&lt;br /&gt;
== Scratch Disk Purging Policy ==&lt;br /&gt;
&lt;br /&gt;
In order to ensure that there is always sufficient space available for running jobs '''we automatically delete files in /scratch that have not been accessed or modified for more than 2 months by the 15th of each month'''. Note that we recently changed the reference time to be the ''MostRecentOf(atime,ctime)''. This policy is subject to revision depending on its effectiveness. More details about the purging process and how users can check if their files will be deleted follow. If you have files scheduled for deletion you should move them to a more permanent location, such as your departmental server, your /project space or into HPSS (for PIs who have either been allocated storage space by the RAC on project or HPSS).&lt;br /&gt;
&lt;br /&gt;
On the '''first''' of each month, a list of files scheduled for purging is produced, and an email notification is sent to each user on that list. Users also get a shell notification on every login to Niagara. Furthermore, at/or about the '''12th''' of each month a 2nd scan produces a more-current assessment and another email notification is sent. This way users can double check that they have indeed taken care of all the files they needed to relocate before the purging deadline. Those files will be automatically deleted on the '''15th''' of the same month unless they have been accessed or relocated in the interim. If you have files scheduled for deletion then they will be listed in a file in /scratch/t/todelete/current, which has your userid and groupid in the filename. For example, if user xxyz wants to check if they have files scheduled for deletion they can issue the following command on a system which mounts /scratch (e.g. a Niagara login node): '''ls -1 /scratch/t/todelete/current | grep xxyz'''. In the example below, the name of this file indicates that user xxyz is part of group abc, has 9,560 files scheduled for deletion and they take up 1.0TB of space:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
 [xxyz@nia-login03 ~]$ ls -1 /scratch/t/todelete/current |grep xxyz&lt;br /&gt;
 -rw-r----- 1 xxyz     root       1733059 Jan 17 11:46 3110001___xxyz_______abc_________1.00T_____9560files&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
The file itself contains a list of all files scheduled for deletion (in the last column) and can be viewed with standard commands like more/less/cat - e.g. &lt;br /&gt;
&lt;br /&gt;
'''more /scratch/t/todelete/current/3110001___xxyz_______abc_________1.00T_____9560files'''&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
[_inode information__] [uidNumber] [__________atime__________]  [__________ctime__________] [size]    [_____file_path_____]&lt;br /&gt;
659919349 1268424780 0 -u 3199999 -a2019-26-11 08:49:27.745412 -c2019-26-11 08:49:27.739630 -s 234 -- /gpfs/fs0/scratch/...&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Similarly, you can also verify all other users in your group by using the ls command with grep on your group. For example: '''ls -1 /scratch/t/todelete/current | grep abc'''. That will list all other users in the same group that xxyz is part of, and have files to be purged on the 15th. Members of the same group have access to each other's contents.&lt;br /&gt;
&lt;br /&gt;
If you access/read/move/delete some of the candidates between the 1st and the 11th, there won't be any changes in the assessment until the 12th.&lt;br /&gt;
&lt;br /&gt;
If there was an assessment file up until the 11th, but no longer on the 12th, it's because you don't have anything to be purged anymore.&lt;br /&gt;
&lt;br /&gt;
If you access/read/move/delete some or the candidates after the 12th, then you have to check yourself to confirm your files won't be purged on the 15th (see below) &lt;br /&gt;
&lt;br /&gt;
'''NOTE:''' Preparing these assessments takes several hours. If you change the access/modification time of a file in the interim, that will not be detected until the next cycle. A way for you to get immediate feedback is to use the ''''ls -lu'''' command on the file to verify the ctime and ''''ls -lc'''' for the mtime. If the file atime/ctime has been updated in the meantime, come the purging date on the 15th it will no longer be deleted.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;font color=red&amp;gt;&amp;lt;b&amp;gt;Purging on niagara is final. Purged files can not be recovered.&amp;lt;/b&amp;gt;&amp;lt;/font&amp;gt;&lt;br /&gt;
&lt;br /&gt;
= Handling large_collections of files =&lt;br /&gt;
There are a number of situations in which dealing with a format that encapsulates and compacts very large numbers of small files may be more performant and convenient than handling those small files in expanded format directly in the file system. These alternative file formats also save a lot of stress on the backup system, since tape is not designed to deal with a lot of small files. It also help you not hit your inodes quota on the file systems.&lt;br /&gt;
&lt;br /&gt;
Please visit this page for more details:&lt;br /&gt;
&lt;br /&gt;
https://docs.alliancecan.ca/wiki/Handling_large_collections_of_files&lt;br /&gt;
&lt;br /&gt;
= Backup Policy =&lt;br /&gt;
Our backup is based on versions, not on date or age:&lt;br /&gt;
&lt;br /&gt;
* In general we keep the 2 most recent versions of a file, one per day, provided it exists on the file system. Once a file is deleted we expire the oldest version from the backup, and keep the most recent for 60 days. After that grace period then that only version is expired as well.&lt;br /&gt;
&lt;br /&gt;
* We may have 1 or 2 versions of a file on the backup for over 10 years, provided the original has never been deleted from the file system.&lt;br /&gt;
&lt;br /&gt;
* On the other hand we may not have any backup, if the user created the file in the morning and deleted in the afternoon, since the backup system never had a chance to capture the file (it runs once a day around midnight).&lt;br /&gt;
&lt;br /&gt;
* And the user may have generated several versions of a file during the day, however only the most recent version before the backup runs will be captured for that day.&lt;br /&gt;
&lt;br /&gt;
= Moving data =&lt;br /&gt;
&lt;br /&gt;
Data for analysis and final results need to be moved to and from Niagara.  There are several ways to accomplish this.&lt;br /&gt;
&lt;br /&gt;
== Using rsync/scp ==&lt;br /&gt;
&lt;br /&gt;
Move amounts less than 10GB through the login nodes.&lt;br /&gt;
* Niagara login nodes and datamovers are visible from outside SciNet.&lt;br /&gt;
* Use scp or rsync to niagara.scinet.utoronto.ca or niagara.alliancecan.ca (no difference).&lt;br /&gt;
* This will time out for amounts larger than about 10GB.&lt;br /&gt;
&lt;br /&gt;
Move amounts larger than 10GB through the datamover nodes.&lt;br /&gt;
* From a Niagara login node, ssh to &amp;lt;code&amp;gt;nia-datamover1&amp;lt;/code&amp;gt; or  &amp;lt;code&amp;gt;nia-datamover2&amp;lt;/code&amp;gt;.  From there you can transfer to or from Niagara.&lt;br /&gt;
* Alternatively, you may also login/scp/rsync directly to the datamovers from the outside:&lt;br /&gt;
  nia-datamover1.scinet.utoronto.ca&lt;br /&gt;
  nia-datamover2.scinet.utoronto.ca&lt;br /&gt;
* If you do this often, consider using [https://docs.alliancecan.ca/wiki/Globus Globus], a web-based tool for data transfer.&lt;br /&gt;
&lt;br /&gt;
Note that you can only connect 4 times in a 2-minute window to the login nodes or the datamover nodes. So bundle your transfers, i.e., specify multiple files to be copied as arguments to scp or rsync, or copy whole directories, or zip/tar the files up and unzip/untar them on the other end.&lt;br /&gt;
&lt;br /&gt;
If you want to transfer smaller files between other Digital Research Alliance Canada clusters and Niagara use the SSH agent forwarding flag, &amp;lt;code&amp;gt;-A&amp;lt;/code&amp;gt; when logging into another cluster. For example, to copy files to Niagara from Cedar use:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
ssh -A USERNAME@cedar.alliancecan.ca&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
then perform the copy:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
[USERNAME@cedar5 ~]$ scp file USERNAME@niagara.alliancecan.ca:/scratch/g/group/USERNAME/&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
== Using Globus ==&lt;br /&gt;
Please check the comprehensive documentation [https://docs.alliancecan.ca/wiki/Globus here], and [[Globus | here]].&lt;br /&gt;
&lt;br /&gt;
The Niagara endpoint is &amp;quot;computecanada#niagara&amp;quot;.&lt;br /&gt;
&lt;br /&gt;
== Moving data to HPSS/Archive/Nearline ==&lt;br /&gt;
HPSS is for long-term storage of data.&lt;br /&gt;
* [[HPSS]] is a tape-based storage solution, and is SciNet's nearline a.k.a. archive facility.&lt;br /&gt;
* Storage space on HPSS is allocated through the annual [https://alliancecan.ca/en/services/advanced-research-computing/accessing-resources/resource-allocation-competition Resource Allocation Competition (RAC) administered by the Digital Research Alliance of Canada].&lt;br /&gt;
&lt;br /&gt;
=File/Ownership Management (ACL)=&lt;br /&gt;
* By default, at SciNet, users within the same group already have read permission to each other's files (not write)&lt;br /&gt;
* You may use access control list ('''ACL''') to allow your supervisor (or another user within your group) to manage files for you (i.e., create, move, rename, delete), while still retaining your access and permission as the original owner of the files/directories. You may also let users in other groups or whole other groups access (read, execute) your files using this same mechanism. &lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--&lt;br /&gt;
===Using  setfacl/getfacl===&lt;br /&gt;
* To allow [supervisor] to manage files in /project/g/group/[owner] using '''setfacl''' and '''getfacl''' commands, follow the 3-steps below as the [owner] account from a shell:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
1) $ /scinet/gpc/bin/setfacl -d -m user:[supervisor]:rwx /project/g/group/[owner]&lt;br /&gt;
   (every *new* file/directory inside [owner] will inherit [supervisor] ownership by default from now on)&lt;br /&gt;
&lt;br /&gt;
2) $ /scinet/gpc/bin/setfacl -d -m user:[owner]:rwx /project/g/group/[owner]&lt;br /&gt;
   (but will also inherit [owner] ownership, ie, ownership of both by default, for files/directories created by [supervisor])&lt;br /&gt;
&lt;br /&gt;
3) $ /scinet/gpc/bin/setfacl -Rm user:[supervisor]:rwx /project/g/group/[owner]&lt;br /&gt;
   (recursively modify all *existing* files/directories inside [owner] to also be rwx by [supervisor])&lt;br /&gt;
&lt;br /&gt;
   $ /scinet/gpc/bin/getfacl /project/g/group/[owner]&lt;br /&gt;
   (to determine the current ACL attributes)&lt;br /&gt;
&lt;br /&gt;
   $ /scinet/gpc/bin/setfacl -b /project/g/group/[owner]&lt;br /&gt;
   (to remove any previously set ACL)&lt;br /&gt;
&lt;br /&gt;
PS: on the datamovers getfacl, setfacl and chacl will be on your path&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
For more information on using [http://linux.die.net/man/1/setfacl &amp;lt;tt&amp;gt;setfacl&amp;lt;/tt&amp;gt;] or [http://linux.die.net/man/1/getfacl &amp;lt;tt&amp;gt;getfacl&amp;lt;/tt&amp;gt;] see their man pages.&lt;br /&gt;
&lt;br /&gt;
--&amp;gt;&lt;br /&gt;
==Using mmputacl/mmgetacl==&lt;br /&gt;
* You may use gpfs' native '''mmputacl''' and '''mmgetacl''' commands. The advantages are that you can set &amp;quot;control&amp;quot; permission and that [http://publib.boulder.ibm.com/infocenter/clresctr/vxrx/index.jsp?topic=%2Fcom.ibm.cluster.gpfs.doc%2Fgpfs31%2Fbl1adm1160.html POSIX or NFS v4 style ACL] are supported. You will need first to create a /tmp/supervisor.acl template with the following contents, as needed:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
user::rwxc&lt;br /&gt;
group::r-xc&lt;br /&gt;
other::----&lt;br /&gt;
mask::rwxc&lt;br /&gt;
user:[owner]:rwxc&lt;br /&gt;
user:[supervisor]:rwxc   #read and WRITE permissions to supervisor (may not be necessary)&lt;br /&gt;
group:[othegroup]:r-xc   #read ONLY permissions to members of other groups (recommended)&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Then issue the following 2 commands:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
1) $ mmputacl -i /tmp/supervisor.acl /project/g/group/[owner]&lt;br /&gt;
2) $ mmputacl -d -i /tmp/supervisor.acl /project/g/group/[owner]&lt;br /&gt;
   (every *new* file/directory inside [owner] will inherit [supervisor] ownership by default as well as &lt;br /&gt;
   [owner] ownership, ie, ownership of both by default, for files/directories created by [supervisor])&lt;br /&gt;
&lt;br /&gt;
   $ mmgetacl /project/g/group/[owner]&lt;br /&gt;
   (to determine the current ACL attributes)&lt;br /&gt;
&lt;br /&gt;
   $ mmdelacl -d /project/g/group/[owner]&lt;br /&gt;
   (to remove any previously set ACL)&lt;br /&gt;
&lt;br /&gt;
   $ mmeditacl /project/g/group/[owner]&lt;br /&gt;
   (to create or change a GPFS access control list)&lt;br /&gt;
   (for this command to work set the EDITOR environment variable: export EDITOR=/usr/bin/vi)&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
If you want to apply ACL to a folder deep in the tree, as in /project/g/group/owner/dir1/subdir2/subdir3, you will need to also apply ACL to every individual path above the subdir3 level, as in:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
   $ mmputacl -i /tmp/supervisor.acl /project/g/group/owner&lt;br /&gt;
   $ mmputacl -i /tmp/supervisor.acl /project/g/group/owner/dir1&lt;br /&gt;
   $ mmputacl -i /tmp/supervisor.acl /project/g/group/owner/dir1/subdir2&lt;br /&gt;
   $ mmputacl -i /tmp/supervisor.acl /project/g/group/owner/dir1/subdir2/subdir3&lt;br /&gt;
&lt;br /&gt;
   $ mmputacl -d -i /tmp/supervisor.acl /project/g/group/owner/dir1/subdir2/subdir3&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
In addition, you'll need to ask your PI to apply ACL to the group level:&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
   $ mmputacl -i /tmp/supervisor.acl /project/g/group&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
NOTES:&lt;br /&gt;
* There is no option to recursively add or remove ACL attributes using a gpfs built-in command to existing files. You'll need to use the -i option as above for each file or directory individually. [[Recursive_ACL_script | Here is a sample bash script you may use for that purpose]]&lt;br /&gt;
&lt;br /&gt;
* mmputacl will not overwrite the original linux group permissions for a directory when copied to another directory already with ACLs, hence the &amp;quot;#effective:r-x&amp;quot; note you may see from time to time with mmgetacf. If you want to give rwx permissions to everyone in your group you should simply rely on the plain unix 'chmod g+rwx' command. You may do that before or after copying the original material to another folder with the ACLs.&lt;br /&gt;
&lt;br /&gt;
* The only latitude you have is with the &amp;quot;w&amp;quot;. You may or not want to let the collaborator/supervisor write to your folder. As for &amp;quot;r-xc&amp;quot; you don't have the option, this combination must always be applied. &lt;br /&gt;
&lt;br /&gt;
* In the case of PROJECT, your group's supervisor will need to set proper ACL to the /project/G/GROUP level in order to let users from other groups access your files.&lt;br /&gt;
&lt;br /&gt;
* ACL's won't let you give away permissions to files or directories that do not belong to you.&lt;br /&gt;
&lt;br /&gt;
* We highly recommend that you never give write permission to other users on the top level of your home directory (/home/G/GROUP/[owner]), since that would seriously compromise your privacy, in addition to disable ssh key authentication, among other things. If necessary, make specific sub-directories under your home directory so that other users can manipulate/access files from those.&lt;br /&gt;
&lt;br /&gt;
* Just a reminder: setfacl/getfacl only works on cedar/graham/beluga, since they have lustre. On niagara you have to use the mm* command just for GPFS: mmputacl, mmgetacl, mmdelacl, mmeditacl &lt;br /&gt;
&lt;br /&gt;
For more information on using [https://www.ibm.com/support/knowledgecenter/SSFKCN_4.1.0/com.ibm.cluster.gpfs.v4r1.gpfs100.doc/bl1adm_mmputacl.htm &amp;lt;tt&amp;gt;mmputacl&amp;lt;/tt&amp;gt;] or [https://www.ibm.com/support/knowledgecenter/SSFKCN_4.1.0/com.ibm.cluster.gpfs.v4r1.gpfs100.doc/bl1adm_mmgetacl.htm &amp;lt;tt&amp;gt;mmgetacl&amp;lt;/tt&amp;gt;] see their man pages.&lt;br /&gt;
&lt;br /&gt;
==Recursive ACL script ==&lt;br /&gt;
You may use/adapt '''[[Recursive_ACL_script| this sample bash script]]''' to recursively add or remove ACL attributes using gpfs built-in commands&lt;br /&gt;
&lt;br /&gt;
Courtesy of Agata Disks (http://csngwinfo.in2p3.fr/mediawiki/index.php/GPFS_ACL)&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7100</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7100"/>
		<updated>2025-09-22T18:59:35Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Partial | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down | Teach|Teach}}&lt;br /&gt;
|{{Up | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | HPSS|HPSS}}&lt;br /&gt;
|{{Up | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | Cvmfs|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Thu Sep 18, 2025, 11:30 am:''' Open OnDemand is fully functional again.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 17, 2025, 6:00 pm:''' Niagara is back up as well (including its Globus endpoint).  We are still working on the other systems.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 17, 2025, 1:40 pm:''' Trillium is back up (except for its Globus endpoint).  We are working on the other systems still.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, 5:45 pm:''' Unfortunately, we cannot bring all systems up yet because we are waiting for a spare part for the cooling system that will be brought tomorrow.  In the meantime, we have managed to keep the Trillium login nodes up, but not other systems.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, from 7:00 am to 5:00 pm (EDT):''' The SciNet datacentre will undergo maintenance of several critical parts of the centre.  This will require a full shutdown of all SciNet systems (Trillium, Niagara, Mist, HPSS, Rouge, Teach, as well as hosted equipment). This will also be the time that the Mist cluster gets decommissioned. &lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 12 22:03:17 EDT 2025:''' HPSS software and OS upgrades are finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep  9 17:05:38 EDT 2025:''' Starting tomorrow, Sep/10, and for the following 3 days HPSS will be down for software and OS upgrades. We will strive to finish sooner, at which time we will make the system available to users again.&lt;br /&gt;
&lt;br /&gt;
===Mist/Niagara Decommissioning Schedule===&lt;br /&gt;
&lt;br /&gt;
'''September 4, 2025'''&lt;br /&gt;
* Niagara reduced to 863 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 9, 2025'''&lt;br /&gt;
* Niagara's Open OnDemand decommissioned.&lt;br /&gt;
* Brief data centre connection outage at 9 AM EDT&lt;br /&gt;
* Niagara reduced to 647 compute nodes at end of day.&lt;br /&gt;
&lt;br /&gt;
'''September 11, 2025'''&lt;br /&gt;
* Trillium Open OnDemand goes live.&lt;br /&gt;
&lt;br /&gt;
'''September 16, 2025'''&lt;br /&gt;
* '''Full-day data centre maintenance'''&lt;br /&gt;
* Niagara reduced to 431 compute nodes.&lt;br /&gt;
* Mist decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''September 24, 2025'''&lt;br /&gt;
* Niagara reduced to 215 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 30, 2025'''&lt;br /&gt;
* Niagara decommissioned.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7082</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7082"/>
		<updated>2025-09-18T01:03:16Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3 | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Partial | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down | Teach|Teach}}&lt;br /&gt;
|{{Down | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up| HPSS|HPSS}}&lt;br /&gt;
|{{Up | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up| External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Partial| Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Balam|Balam}}&lt;br /&gt;
|{{Up | Cvmfs|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 17, 2025, 6:00 pm:''' Niagara is back up as well (including its Globus endpoint).  We are still working on the other systems.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 17, 2025, 1:40 pm:''' Trillium is back up (except for its Globus endpoint).  We are working on the other systems still.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, 5:45 pm:''' Unfortunately, we cannot bring all systems up yet because we are waiting for a spare part for the cooling system that will be brought tomorrow.  In the meantime, we have managed to keep the Trillium login nodes up, but not other systems.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, from 7:00 am to 5:00 pm (EDT):''' The SciNet datacentre will undergo maintenance of several critical parts of the centre.  This will require a full shutdown of all SciNet systems (Trillium, Niagara, Mist, HPSS, Rouge, Teach, as well as hosted equipment). This will also be the time that the Mist cluster gets decommissioned. &lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 12 22:03:17 EDT 2025:''' HPSS software and OS upgrades are finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep  9 17:05:38 EDT 2025:''' Starting tomorrow, Sep/10, and for the following 3 days HPSS will be down for software and OS upgrades. We will strive to finish sooner, at which time we will make the system available to users again.&lt;br /&gt;
&lt;br /&gt;
===Mist/Niagara Decommissioning Schedule===&lt;br /&gt;
&lt;br /&gt;
'''September 4, 2025'''&lt;br /&gt;
* Niagara reduced to 863 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 9, 2025'''&lt;br /&gt;
* Niagara's Open OnDemand decommissioned.&lt;br /&gt;
* Brief data centre connection outage at 9 AM EDT&lt;br /&gt;
* Niagara reduced to 647 compute nodes at end of day.&lt;br /&gt;
&lt;br /&gt;
'''September 11, 2025'''&lt;br /&gt;
* Trillium Open OnDemand goes live.&lt;br /&gt;
&lt;br /&gt;
'''September 16, 2025'''&lt;br /&gt;
* '''Full-day data centre maintenance'''&lt;br /&gt;
* Niagara reduced to 431 compute nodes.&lt;br /&gt;
* Mist decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''September 24, 2025'''&lt;br /&gt;
* Niagara reduced to 215 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 30, 2025'''&lt;br /&gt;
* Niagara decommissioned.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7031</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7031"/>
		<updated>2025-09-17T00:32:37Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Down | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down | Teach|Teach}}&lt;br /&gt;
|{{Down | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Down | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Partial| Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | Balam|Balam}}&lt;br /&gt;
|{{Up   | Cvmfs|Using_modules}}&lt;br /&gt;
|{{Down | Mist|Mist}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, 5:45 pm:''' Unfortunately, we cannot bring all systems up yet because we are waiting for a spare part for the cooling system that will be brought tomorrow.  In the meantime, we will try bring Trillium up, but only the login nodes of other systems.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, from 7:00 am to 5:00 pm (EDT):''' The SciNet datacentre will undergo maintenance of several critical parts of the centre.  This will require a full shutdown of all SciNet systems (Trillium, Niagara, Mist, HPSS, Rouge, Teach, as well as hosted equipment). This will also be the time that the Mist cluster gets decommissioned. &lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 12 22:03:17 EDT 2025:''' HPSS software and OS upgrades are finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep  9 17:05:38 EDT 2025:''' Starting tomorrow, Sep/10, and for the following 3 days HPSS will be down for software and OS upgrades. We will strive to finish sooner, at which time we will make the system available to users again.&lt;br /&gt;
&lt;br /&gt;
===Mist/Niagara Decommissioning Schedule===&lt;br /&gt;
&lt;br /&gt;
'''September 4, 2025'''&lt;br /&gt;
* Niagara reduced to 863 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 9, 2025'''&lt;br /&gt;
* Niagara's Open OnDemand decommissioned.&lt;br /&gt;
* Brief data centre connection outage at 9 AM EDT&lt;br /&gt;
* Niagara reduced to 647 compute nodes at end of day.&lt;br /&gt;
&lt;br /&gt;
'''September 11, 2025'''&lt;br /&gt;
* Trillium Open OnDemand goes live.&lt;br /&gt;
&lt;br /&gt;
'''September 16, 2025'''&lt;br /&gt;
* '''Full-day data centre maintenance'''&lt;br /&gt;
* Niagara reduced to 431 compute nodes.&lt;br /&gt;
* Mist decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''September 24, 2025'''&lt;br /&gt;
* Niagara reduced to 215 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 30, 2025'''&lt;br /&gt;
* Niagara decommissioned.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7028</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=7028"/>
		<updated>2025-09-16T23:14:03Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Down3  | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Down | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down | Teach|Teach}}&lt;br /&gt;
|{{Down | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Down   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up  | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up  | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Down   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | Balam|Balam}}&lt;br /&gt;
|{{Up  | Cvmfs|Using_modules}}&lt;br /&gt;
|{{Down  | Mist|Mist}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, 5:45 pm:''' Unfortunately, we cannot bring all systems up yet because we are waiting for a spare part for the cooling system that will be brought tomorrow.  In the meantime, we will try bring Trillium up, but only the login nodes of other systems.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, from 7:00 am to 5:00 pm (EDT):''' The SciNet datacentre will undergo maintenance of several critical parts of the centre.  This will require a full shutdown of all SciNet systems (Trillium, Niagara, Mist, HPSS, Rouge, Teach, as well as hosted equipment). This will also be the time that the Mist cluster gets decommissioned. &lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 12 22:03:17 EDT 2025:''' HPSS software and OS upgrades are finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep  9 17:05:38 EDT 2025:''' Starting tomorrow, Sep/10, and for the following 3 days HPSS will be down for software and OS upgrades. We will strive to finish sooner, at which time we will make the system available to users again.&lt;br /&gt;
&lt;br /&gt;
===Mist/Niagara Decommissioning Schedule===&lt;br /&gt;
&lt;br /&gt;
'''September 4, 2025'''&lt;br /&gt;
* Niagara reduced to 863 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 9, 2025'''&lt;br /&gt;
* Niagara's Open OnDemand decommissioned.&lt;br /&gt;
* Brief data centre connection outage at 9 AM EDT&lt;br /&gt;
* Niagara reduced to 647 compute nodes at end of day.&lt;br /&gt;
&lt;br /&gt;
'''September 11, 2025'''&lt;br /&gt;
* Trillium Open OnDemand goes live.&lt;br /&gt;
&lt;br /&gt;
'''September 16, 2025'''&lt;br /&gt;
* '''Full-day data centre maintenance'''&lt;br /&gt;
* Niagara reduced to 431 compute nodes.&lt;br /&gt;
* Mist decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''September 24, 2025'''&lt;br /&gt;
* Niagara reduced to 215 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 30, 2025'''&lt;br /&gt;
* Niagara decommissioned.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6986</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6986"/>
		<updated>2025-09-16T11:20:06Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3  | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Partial | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Up   | Teach|Teach}}&lt;br /&gt;
|{{Up   | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Down   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | Balam|Balam}}&lt;br /&gt;
|{{Up   | Cvmfs|Using_modules}}&lt;br /&gt;
|{{Partial | Mist|Mist}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 16, 2025, from 7:00 am to 5:00 pm (EDT):''' The SciNet datacentre will undergo maintenance of several critical parts of the centre.  This will require a full shutdown of all SciNet systems (Trillium, Niagara, Mist, HPSS, Rouge, Teach, as well as hosted equipment). This will also be the time that the Mist cluster gets decommissioned. &lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 12 22:03:17 EDT 2025:''' HPSS software and OS upgrades are finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep  9 17:05:38 EDT 2025:''' Starting tomorrow, Sep/10, and for the following 3 days HPSS will be down for software and OS upgrades. We will strive to finish sooner, at which time we will make the system available to users again.&lt;br /&gt;
&lt;br /&gt;
===Mist/Niagara Decommissioning Schedule===&lt;br /&gt;
&lt;br /&gt;
'''September 4, 2025'''&lt;br /&gt;
* Niagara reduced to 863 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 9, 2025'''&lt;br /&gt;
* Niagara's Open OnDemand decommissioned.&lt;br /&gt;
* Brief data centre connection outage at 9 AM EDT&lt;br /&gt;
* Niagara reduced to 647 compute nodes at end of day.&lt;br /&gt;
&lt;br /&gt;
'''September 11, 2025'''&lt;br /&gt;
* Trillium Open OnDemand goes live.&lt;br /&gt;
&lt;br /&gt;
'''Septmeber 16, 2025'''&lt;br /&gt;
* '''Full-day data centre maintenance'''&lt;br /&gt;
* Niagara reduced to 431 compute nodes.&lt;br /&gt;
* Mist decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''September 24, 2025'''&lt;br /&gt;
* Niagara reduced to 215 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 30, 2025'''&lt;br /&gt;
* Niagara decommissioned.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6980</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6980"/>
		<updated>2025-09-13T02:04:49Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3  | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Partial | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Up   | Teach|Teach}}&lt;br /&gt;
|{{Up   | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | Balam|Balam}}&lt;br /&gt;
|{{Up   | Cvmfs|Using_modules}}&lt;br /&gt;
|{{Partial | Mist|Mist}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 12 22:03:17 EDT 2025:''' HPSS software and OS upgrades are finished.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep  9 17:05:38 EDT 2025:''' Starting tomorrow, Sep/10, and for the following 3 days HPSS will be down for software and OS upgrades. We will strive to finish sooner, at which time we will make the system available to users again.&lt;br /&gt;
&lt;br /&gt;
===Mist/Niagara Decommissioning Schedule===&lt;br /&gt;
&lt;br /&gt;
'''September 4, 2025'''&lt;br /&gt;
* Niagara reduced to 863 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 9, 2025'''&lt;br /&gt;
* Niagara's Open OnDemand decommissioned.&lt;br /&gt;
* Brief data centre connection outage at 9 AM EDT&lt;br /&gt;
* Niagara reduced to 647 compute nodes at end of day.&lt;br /&gt;
&lt;br /&gt;
'''September 11, 2025'''&lt;br /&gt;
* Trillium Open OnDemand goes live.&lt;br /&gt;
&lt;br /&gt;
'''Septmeber 16, 2025'''&lt;br /&gt;
* '''Full-day data centre maintenance'''&lt;br /&gt;
* Niagara reduced to 431 compute nodes.&lt;br /&gt;
* Mist decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''September 24, 2025'''&lt;br /&gt;
* Niagara reduced to 215 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 30, 2025'''&lt;br /&gt;
* Niagara decommissioned.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6974</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6974"/>
		<updated>2025-09-09T21:09:16Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up3  | Trillium|https://docs.alliancecan.ca/wiki/Trillium_Quickstart}}&lt;br /&gt;
|{{Partial | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Up   | Teach|Teach}}&lt;br /&gt;
|{{Up   | Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | Balam|Balam}}&lt;br /&gt;
|{{Up   | Cvmfs|Using_modules}}&lt;br /&gt;
|{{Partial | Mist|Mist}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep  9 17:05:38 EDT 2025:''' Starting tomorrow, Sep/10, and for the following 3 days HPSS will be down for software and OS upgrades. We will strive to finish sooner, at which time we will make the system available to users again.&lt;br /&gt;
&lt;br /&gt;
===Mist/Niagara Decommissioning Schedule===&lt;br /&gt;
&lt;br /&gt;
'''September 4, 2025'''&lt;br /&gt;
* Niagara reduced to 863 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 9, 2025'''&lt;br /&gt;
* Niagara's Open OnDemand decommissioned.&lt;br /&gt;
* Brief data centre connection outage at 9 AM EDT&lt;br /&gt;
* Niagara reduced to 647 compute nodes at end of day.&lt;br /&gt;
&lt;br /&gt;
'''September 11, 2025'''&lt;br /&gt;
* Trillium Open OnDemand goes live.&lt;br /&gt;
&lt;br /&gt;
'''Septmeber 16, 2025'''&lt;br /&gt;
* '''Full-day data centre maintenance'''&lt;br /&gt;
* Niagara reduced to 431 compute nodes.&lt;br /&gt;
* Mist decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''September 24, 2025'''&lt;br /&gt;
* Niagara reduced to 215 compute nodes.&lt;br /&gt;
&lt;br /&gt;
'''September 30, 2025'''&lt;br /&gt;
* Niagara decommissioned.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [https://docs.alliancecan.ca/wiki/Trillium_Quickstart Trillium Quickstart]&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Running_Serial_Jobs_on_Niagara&amp;diff=6962</id>
		<title>Running Serial Jobs on Niagara</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Running_Serial_Jobs_on_Niagara&amp;diff=6962"/>
		<updated>2025-09-05T18:04:59Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;===General considerations===&lt;br /&gt;
&lt;br /&gt;
====Use whole nodes...====&lt;br /&gt;
&lt;br /&gt;
When you submit a job to Niagara, it is run on one (or more than one) entire node - meaning that your job is occupying at least 40 processors for the duration of its run.  The SciNet systems are usually fully utilized, with many researchers waiting in the queue for computational resources, so we require that you make full use of the nodes that your job is allocated, so other researchers don't have to wait unnecessarily, and so that your jobs get as much work done as possible.&lt;br /&gt;
&lt;br /&gt;
Often, the best way to make full use of the node is to run one large parallel computation; but sometimes it is beneficial to run several serial codes at the same time.  On this page, we discuss ways to run suites of serial computations at once, as efficiently as possible, using the full resources of the node.&lt;br /&gt;
&lt;br /&gt;
====... memory permitting====&lt;br /&gt;
&lt;br /&gt;
When running multiple serial jobs on the same node, it is essential to have a good idea of how much memory the jobs will require. The Niagara compute nodes have about 200GB of memory available to user jobs running on the 40 cores, i.e., a bit over 4GB per core.  So the jobs also have to be  bunched in ways that will fit into 200GB.  If they use more than this, it will crash the node, inconveniencing you and other researchers waiting for that node.&lt;br /&gt;
&lt;br /&gt;
If 40 serial jobs would not fit within the 200GB limit -- i.e. each individual job requires significantly in excess of ~4GB -- then it's allowed to just run fewer jobs so that they do fit.   Note that in that case, the jobs are likely candidates for parallelization, and you can contact us at [mailto:support@scinet.utoronto.ca &amp;lt;support@scinet.utoronto.ca&amp;gt;] and arrange a meeting with one of the technical analysts to help you with that.&lt;br /&gt;
&lt;br /&gt;
If the memory requirements allow it, you could actually run more than 40 jobs at the same time, up to 80, exploiting the [[Niagara_Quickstart#Hyperthreading:_Logical_CPUs_vs._cores | HyperThreading]] feature of the Intel CPUs.  It may seem counter-intuitive, but running 80 simultaneous jobs on 40 cores for certain types of tasks has increased some users overall throughput.&lt;br /&gt;
&lt;br /&gt;
====Is your job really serial?====&lt;br /&gt;
&lt;br /&gt;
While your program may not be explicitly parallel, it may use some of Niagara's threaded libraries for numerical computations, which can make use of multiple processors.  In particular, Niagara's [[Python]] and [[R_Statistical_Package | R]] modules are compiled with aggressive optimization and using threaded numerical libraries which by default will make use of multiple cores for computations such as large matrix operations.  This can greatly speed up individual runs, but by less (usually much less) than a factor of 40.  If you do have many such threaded computations to do, you often get more calculations done per unit time if you turn off the threading and run multiple such computations at once (provided that fits in memory, as explained above).  You can turn off threading of these libraries with the shell script line &amp;lt;tt&amp;gt;export OMP_NUM_THREADS=1&amp;lt;/tt&amp;gt;; that line will be included in the scripts below.  &lt;br /&gt;
&lt;br /&gt;
If your calculations implicitly use threading, you may want to experiment to see what gives you the best performance - you may find that running 4 (or even 8) jobs with 10 threads each (&amp;lt;tt&amp;gt;OMP_NUM_THREADS=10&amp;lt;/tt&amp;gt;), or 2 jobs with 20 threads, gives better performance than 40 jobs with 1 thread (and almost certainly better than 1 job with 40 threads).  We'd encourage to you to perform exactly such a scaling test to find the combination of number of threads per process and processes per job that maximizes your throughput; for a small up-front investment in time you may significantly speed up all the computations you need to do.&lt;br /&gt;
&lt;br /&gt;
===Serial jobs of similar duration===&lt;br /&gt;
&lt;br /&gt;
The most straightforward way to run multiple serial jobs is to bunch the serial jobs in groups of 40 or more that will take roughly the same amount of time, and create a job script that looks a &lt;br /&gt;
bit like this&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# SLURM submission script for multiple serial jobs on Niagara&lt;br /&gt;
#&lt;br /&gt;
#SBATCH --nodes=1&lt;br /&gt;
#SBATCH --ntasks-per-node=40&lt;br /&gt;
#SBATCH --time=1:00:00&lt;br /&gt;
#SBATCH --job-name serialx40&lt;br /&gt;
&lt;br /&gt;
# Turn off implicit threading in Python, R&lt;br /&gt;
export OMP_NUM_THREADS=1&lt;br /&gt;
&lt;br /&gt;
# EXECUTION COMMAND; ampersand off 40 jobs and wait&lt;br /&gt;
(cd serialjobdir01 &amp;amp;&amp;amp; ./doserialjob01 &amp;amp;&amp;amp; echo &amp;quot;job 01 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir02 &amp;amp;&amp;amp; ./doserialjob02 &amp;amp;&amp;amp; echo &amp;quot;job 02 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir03 &amp;amp;&amp;amp; ./doserialjob03 &amp;amp;&amp;amp; echo &amp;quot;job 03 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir04 &amp;amp;&amp;amp; ./doserialjob04 &amp;amp;&amp;amp; echo &amp;quot;job 04 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir05 &amp;amp;&amp;amp; ./doserialjob05 &amp;amp;&amp;amp; echo &amp;quot;job 05 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir06 &amp;amp;&amp;amp; ./doserialjob06 &amp;amp;&amp;amp; echo &amp;quot;job 06 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir07 &amp;amp;&amp;amp; ./doserialjob07 &amp;amp;&amp;amp; echo &amp;quot;job 07 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir08 &amp;amp;&amp;amp; ./doserialjob08 &amp;amp;&amp;amp; echo &amp;quot;job 08 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir09 &amp;amp;&amp;amp; ./doserialjob09 &amp;amp;&amp;amp; echo &amp;quot;job 09 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir10 &amp;amp;&amp;amp; ./doserialjob10 &amp;amp;&amp;amp; echo &amp;quot;job 10 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir11 &amp;amp;&amp;amp; ./doserialjob11 &amp;amp;&amp;amp; echo &amp;quot;job 11 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir12 &amp;amp;&amp;amp; ./doserialjob12 &amp;amp;&amp;amp; echo &amp;quot;job 12 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir13 &amp;amp;&amp;amp; ./doserialjob13 &amp;amp;&amp;amp; echo &amp;quot;job 13 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir14 &amp;amp;&amp;amp; ./doserialjob14 &amp;amp;&amp;amp; echo &amp;quot;job 14 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir15 &amp;amp;&amp;amp; ./doserialjob15 &amp;amp;&amp;amp; echo &amp;quot;job 15 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir16 &amp;amp;&amp;amp; ./doserialjob16 &amp;amp;&amp;amp; echo &amp;quot;job 16 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir17 &amp;amp;&amp;amp; ./doserialjob17 &amp;amp;&amp;amp; echo &amp;quot;job 17 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir18 &amp;amp;&amp;amp; ./doserialjob18 &amp;amp;&amp;amp; echo &amp;quot;job 18 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir19 &amp;amp;&amp;amp; ./doserialjob19 &amp;amp;&amp;amp; echo &amp;quot;job 19 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir20 &amp;amp;&amp;amp; ./doserialjob20 &amp;amp;&amp;amp; echo &amp;quot;job 20 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir21 &amp;amp;&amp;amp; ./doserialjob21 &amp;amp;&amp;amp; echo &amp;quot;job 21 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir22 &amp;amp;&amp;amp; ./doserialjob22 &amp;amp;&amp;amp; echo &amp;quot;job 22 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir23 &amp;amp;&amp;amp; ./doserialjob23 &amp;amp;&amp;amp; echo &amp;quot;job 23 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir24 &amp;amp;&amp;amp; ./doserialjob24 &amp;amp;&amp;amp; echo &amp;quot;job 24 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir25 &amp;amp;&amp;amp; ./doserialjob25 &amp;amp;&amp;amp; echo &amp;quot;job 25 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir26 &amp;amp;&amp;amp; ./doserialjob26 &amp;amp;&amp;amp; echo &amp;quot;job 26 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir27 &amp;amp;&amp;amp; ./doserialjob27 &amp;amp;&amp;amp; echo &amp;quot;job 27 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir28 &amp;amp;&amp;amp; ./doserialjob28 &amp;amp;&amp;amp; echo &amp;quot;job 28 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir29 &amp;amp;&amp;amp; ./doserialjob29 &amp;amp;&amp;amp; echo &amp;quot;job 29 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir30 &amp;amp;&amp;amp; ./doserialjob30 &amp;amp;&amp;amp; echo &amp;quot;job 30 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir31 &amp;amp;&amp;amp; ./doserialjob31 &amp;amp;&amp;amp; echo &amp;quot;job 31 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir32 &amp;amp;&amp;amp; ./doserialjob32 &amp;amp;&amp;amp; echo &amp;quot;job 32 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir33 &amp;amp;&amp;amp; ./doserialjob33 &amp;amp;&amp;amp; echo &amp;quot;job 33 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir34 &amp;amp;&amp;amp; ./doserialjob34 &amp;amp;&amp;amp; echo &amp;quot;job 34 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir35 &amp;amp;&amp;amp; ./doserialjob35 &amp;amp;&amp;amp; echo &amp;quot;job 35 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir36 &amp;amp;&amp;amp; ./doserialjob36 &amp;amp;&amp;amp; echo &amp;quot;job 36 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir37 &amp;amp;&amp;amp; ./doserialjob37 &amp;amp;&amp;amp; echo &amp;quot;job 37 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir38 &amp;amp;&amp;amp; ./doserialjob38 &amp;amp;&amp;amp; echo &amp;quot;job 38 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir39 &amp;amp;&amp;amp; ./doserialjob39 &amp;amp;&amp;amp; echo &amp;quot;job 39 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(cd serialjobdir40 &amp;amp;&amp;amp; ./doserialjob40 &amp;amp;&amp;amp; echo &amp;quot;job 40 finished&amp;quot;) &amp;amp;&lt;br /&gt;
wait&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
There are four important things to take note of here.  First, the &amp;lt;tt&amp;gt;'''wait'''&amp;lt;/tt&amp;gt;&lt;br /&gt;
command at the end is crucial; without it the job will terminate &lt;br /&gt;
immediately, killing the 40 programs you just started.&lt;br /&gt;
&lt;br /&gt;
Second is that every serial job is running in its own directory; this is important because writing to the same directory from different processes can lead to slow down because of directory locking.  How badly your job suffers from this depends on how much I/O your serial jobs are doing, but with 40 jobs on a node, it can quickly add up.&lt;br /&gt;
 &lt;br /&gt;
Third is that it is important to group the programs by how long they &lt;br /&gt;
will take.   If (say) &amp;lt;tt&amp;gt;dojob08&amp;lt;/tt&amp;gt; takes 2 hours and the rest only take 1, &lt;br /&gt;
then for one hour 39 of the 40 cores on that Niagara node are wasted; they are &lt;br /&gt;
sitting idle but are unavailable for other users, and the utilization of &lt;br /&gt;
this node over the whole run is only 51%.   This is the sort of thing &lt;br /&gt;
we'll notice, and users who don't make efficient use of the machine will &lt;br /&gt;
have their ability to use Niagara resources reduced.  If you have many serial jobs of varying length, &lt;br /&gt;
use the submission script to balance the computational load, as explained [[ #Serial jobs of varying duration | below]].&lt;br /&gt;
&lt;br /&gt;
Fourth, if memory requirements allow it, you should try to run more than 40 jobs at once, with a maximum of 80 jobs.&lt;br /&gt;
&lt;br /&gt;
Finally, writing out 80 cases (or even just 40, as in the above example) can become highly tedious, as can keeping track of all these subjobs. You should consider using a tool that automates this, like:&lt;br /&gt;
&lt;br /&gt;
===GNU Parallel===&lt;br /&gt;
&lt;br /&gt;
GNU parallel is a really nice tool written by Ole Tange to run multiple serial jobs in&lt;br /&gt;
parallel. It allows you to keep the processors on each 40-core node busy, if you provide enough jobs to do.&lt;br /&gt;
&lt;br /&gt;
GNU parallel is accessible on Niagara in the module&lt;br /&gt;
&amp;lt;tt&amp;gt;gnu-parallel&amp;lt;/tt&amp;gt;:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
module load NiaEnv/2019b gnu-parallel&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
This also switches to the newer NiaEnv/2019b stack. The current version of the GNU parallel module in that stack is 20191122. In the older stack, NiaEnv/2018a (which is loaded by default), the version of GNU parallel is 20180322.  &lt;br /&gt;
&lt;br /&gt;
The command &amp;lt;tt&amp;gt;man parallel_tutorial&amp;lt;/tt&amp;gt; shows much of GNU parallel's functionality, while &amp;lt;tt&amp;gt;man parallel&amp;lt;/tt&amp;gt; gives the details of its syntax.&lt;br /&gt;
&lt;br /&gt;
The citation for GNU Parallel is: O. Tange (2018): GNU Parallel 2018, March 2018, https://doi.org/10.5281/zenodo.1146014.&lt;br /&gt;
&lt;br /&gt;
It is easiest to demonstrate the usage of GNU parallel by&lt;br /&gt;
examples.  First, suppose you have 80 jobs to do (similar to the above case), and that these jobs duration varies quite a bit, but that the average job duration is around 5 hours. You could use the following script (but don't, see below):&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# SLURM submission script for multiple serial jobs on Niagara&lt;br /&gt;
#&lt;br /&gt;
#SBATCH --nodes=1&lt;br /&gt;
#SBATCH --ntasks-per-node=40&lt;br /&gt;
#SBATCH --time=12:00:00&lt;br /&gt;
#SBATCH --job-name gnu-parallel-example&lt;br /&gt;
&lt;br /&gt;
# Turn off implicit threading in Python, R&lt;br /&gt;
export OMP_NUM_THREADS=1&lt;br /&gt;
&lt;br /&gt;
module load NiaEnv/2019b gnu-parallel&lt;br /&gt;
&lt;br /&gt;
# EXECUTION COMMAND - DON'T USE THIS ONE&lt;br /&gt;
parallel -j $SLURM_TASKS_PER_NODE &amp;lt;&amp;lt;EOF&lt;br /&gt;
  cd serialjobdir01 &amp;amp;&amp;amp; ./doserialjob01 &amp;amp;&amp;amp; echo &amp;quot;job 01 finished&amp;quot;&lt;br /&gt;
  cd serialjobdir02 &amp;amp;&amp;amp; ./doserialjob02 &amp;amp;&amp;amp; echo &amp;quot;job 02 finished&amp;quot;&lt;br /&gt;
  ...&lt;br /&gt;
  cd serialjobdir80 &amp;amp;&amp;amp; ./doserialjob80 &amp;amp;&amp;amp; echo &amp;quot;job 80 finished&amp;quot;&lt;br /&gt;
EOF&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
The &amp;lt;tt&amp;gt;-j $SLURM_TASKS_PER_NODE&amp;lt;/tt&amp;gt; parameter sets the number of jobs to run at the same time on each compute node, and is using the slurm value, which coincides with the &amp;lt;tt&amp;gt;--ntasks-per-node&amp;lt;/tt&amp;gt; parameter.  For gpu-parallel modules starting from version 20191122, if you omit the option &amp;lt;tt&amp;gt;-j $SLURM_TASKS_PER_NODE&amp;lt;/tt&amp;gt;, you will get as many simultaneous subjobs as the &amp;lt;tt&amp;gt;ntask-per-node&amp;lt;/tt&amp;gt; parameter you specify in the &amp;lt;tt&amp;gt;#SBATCH&amp;lt;/tt&amp;gt; part of the jobs script.&lt;br /&gt;
&lt;br /&gt;
Each line in the input given to parallel is a separate subjob, so 80 jobs are lined up to run. Initially, 40 subjobs are given to the 40 processors on the node. When one of the processors is done with its assigned subjob, it will get a next subjob instead of sitting idle until the other processors are done. While you would expect that on average this script should take 10 hours (each processor on average has to complete two jobs of 5 hours), there's a good chance that one of the processors gets two jobs that take more than 5 hours, so the job script requests 12 hours to be safe. How much more time you should ask for in practice depends on the spread in expected run times of the separate jobs.&lt;br /&gt;
&lt;br /&gt;
===Serial jobs of varying duration===&lt;br /&gt;
&lt;br /&gt;
The script above works, and can be extended to more subjobs, which is especially important if you have to do a lot (100+) of relatively short serial runs '''of which the walltime varies'''. But it gets tedious to write out all the cases.   You could write a script to automate this, but you do not have to, because GNU Parallel already has ways of generating subjobs, as we will show below.&lt;br /&gt;
&lt;br /&gt;
GNU Parallel can also keep track of the subjobs with succeeded, failed, or never started. For that, you just add &amp;lt;tt&amp;gt;--joblog&amp;lt;/tt&amp;gt; to the parallel command followed by a filename to which to write the status:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot; line start=17&amp;gt;&lt;br /&gt;
# EXECUTION COMMAND - DON'T USE THIS ONE&lt;br /&gt;
parallel --joblog slurm-$SLURM_JOBID.log -j $SLURM_TASKS_PER_NODE &amp;lt;&amp;lt;EOF&lt;br /&gt;
  cd serialjobdir01 &amp;amp;&amp;amp; ./doserialjob01&lt;br /&gt;
  cd serialjobdir02 &amp;amp;&amp;amp; ./doserialjob02&lt;br /&gt;
  ...&lt;br /&gt;
  cd serialjobdir80 &amp;amp;&amp;amp; ./doserialjob80&lt;br /&gt;
EOF&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
In this case, the job log gets written to &amp;quot;slurm-$SLURM_JOBID.log&amp;quot;, where &amp;quot;&amp;lt;tt&amp;gt;$SLURM_JOBID&amp;lt;/tt&amp;gt;&amp;quot; will be replaced by the job number. The joblog can also be used to retry failed jobs (more below).&lt;br /&gt;
&lt;br /&gt;
Second, we can generate that set of subjobs instead of writing them out by hand. The following does the trick:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot; line start=17&amp;gt;&lt;br /&gt;
# EXECUTION COMMAND &lt;br /&gt;
parallel --joblog slurm-$SLURM_JOBID.log -j $SLURM_TASKS_PER_NODE &amp;quot;cd serialjobdir{} &amp;amp;&amp;amp; ./doserialjob{}&amp;quot; ::: {01..80}&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
This works as follows: &amp;lt;tt&amp;gt;&amp;quot;cd serialjobdir{} &amp;amp;&amp;amp; ./doserialjob{}&amp;quot;&amp;lt;/tt&amp;gt; is a template command, with placeholders {}.  &amp;lt;tt&amp;gt;:::&amp;lt;/tt&amp;gt; indicated that a set of parameters follows that are to be put into the template, thus generating the commands for each subjob. After the &amp;lt;tt&amp;gt;:::&amp;lt;/tt&amp;gt; we can place a space-separated set of arguments, which in this case are generated using the bash-specific construct for a range, &amp;lt;tt&amp;gt;{01..80}&amp;lt;/tt&amp;gt;.&lt;br /&gt;
&lt;br /&gt;
The final script now looks like this:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# SLURM submission script for multiple serial jobs on Niagara&lt;br /&gt;
#&lt;br /&gt;
#SBATCH --nodes=1&lt;br /&gt;
#SBATCH --ntasks-per-node=40&lt;br /&gt;
#SBATCH --time=12:00:00&lt;br /&gt;
#SBATCH --job-name gnu-parallel-example&lt;br /&gt;
&lt;br /&gt;
# Turn off implicit threading in Python, R&lt;br /&gt;
export OMP_NUM_THREADS=1&lt;br /&gt;
&lt;br /&gt;
module load NiaEnv/2019b gnu-parallel &lt;br /&gt;
&lt;br /&gt;
# EXECUTION COMMAND &lt;br /&gt;
parallel --joblog slurm-$SLURM_JOBID.log &amp;quot;cd serialjobdir{} &amp;amp;&amp;amp; ./doserialjob{}&amp;quot; ::: {01..80}&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
Notes:&lt;br /&gt;
* As before, GNU Parallel keeps 40 jobs running at a time, and if one finishes, starts the next. This is an easy way to do ''load balancing''.&lt;br /&gt;
* The &amp;lt;tt&amp;gt;-j&amp;lt;/tt&amp;gt; option was omitted, which works if using GNU Parallel module version 20191122 or higher. Otherwise, you need to add the &amp;lt;tt&amp;gt;-j $SLURM_TASKS_PER_NODE&amp;lt;/tt&amp;gt; flag to the parallel command. &lt;br /&gt;
* Doing many serial jobs often entails doing many disk reads and writes, which can be detrimental to the performance. In that case, running from the ramdisk may be an option.  &lt;br /&gt;
** When using a ramdisk, make sure you copy your results from the ramdisk back to the scratch after the runs, or when the job is killed because time has run out.&lt;br /&gt;
** More details on how to setup your script to use the ramdisk can be found on the [[User_Ramdisk | Ramdisk page]].&lt;br /&gt;
* This script optimizes resource utility, but can only use 1 node (40 cores) at a time. The next section addresses how to use more nodes.&lt;br /&gt;
* While on the command line, the option &amp;quot;--bar&amp;quot; can be nice to see the progress, when running as a job, you would not see this status bar. &lt;br /&gt;
* The &amp;lt;tt&amp;gt;--joblog&amp;lt;/tt&amp;gt; parameter also keeps track of failed or unfinished jobs, so you can later try to redo those with the same command, but with the option &amp;quot;--resume&amp;quot; added.&lt;br /&gt;
* If it happens that your serial jobs are running out of memory and being killed by the system, the &amp;lt;tt&amp;gt;--memfree size&amp;lt;/tt&amp;gt; option can be helpful. It sets the minimum memory free when starting another job. On Niagara, &amp;lt;tt&amp;gt;size&amp;lt;/tt&amp;gt; could be set to &amp;lt;tt&amp;gt;15000M&amp;lt;/tt&amp;gt; for example to match what the RealMemory slurm configuration provides to users on compute nodes. You might have to adjust it if your jobs do make use of ramdisk to hold data for example.&lt;br /&gt;
&lt;br /&gt;
===Version for more than 1 node at once===&lt;br /&gt;
&lt;br /&gt;
If you have many hundreds of serial jobs that you want to run concurrently and the nodes are available, then the approach above, while useful, would require tens of scripts to be submitted separately.  Alternatively, it is possible to request more than one node and to use the following routine to distribute your processes amongst the cores.&lt;br /&gt;
&lt;br /&gt;
Although it is not recommended to use GNU parallel modules before version 20191122, if you do, the script should look like this:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# SLURM submission script for multiple serial jobs on multiple Niagara nodes&lt;br /&gt;
#&lt;br /&gt;
#SBATCH --nodes=4&lt;br /&gt;
#SBATCH --ntasks-per-node=40&lt;br /&gt;
#SBATCH --time=12:00:00&lt;br /&gt;
#SBATCH --job-name gnu-parallel-multinode-example&lt;br /&gt;
 &lt;br /&gt;
# Turn off implicit threading in Python, R&lt;br /&gt;
export OMP_NUM_THREADS=1&lt;br /&gt;
 &lt;br /&gt;
module load gnu-parallel&lt;br /&gt;
&lt;br /&gt;
HOSTS=$(scontrol show hostnames $SLURM_NODELIST | tr '\n' ,)&lt;br /&gt;
NCORES=40&lt;br /&gt;
&lt;br /&gt;
parallel --env OMP_NUM_THREADS,PATH,LD_LIBRARY_PATH --joblog slurm-$SLURM_JOBID.log -j $NCORES -S $HOSTS --wd $PWD &amp;quot;cd serialjobdir{} &amp;amp;&amp;amp; ./doserialjob{}&amp;quot; ::: {001..800}&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
* The parameter &amp;lt;tt&amp;gt;-S $HOSTS&amp;lt;/tt&amp;gt; divides the work over different nodes. &amp;lt;tt&amp;gt;$HOSTS&amp;lt;/tt&amp;gt; should be a comma separated list of the node names. These node names are also stored in &amp;lt;tt&amp;gt;$SLURM_NODELIST&amp;lt;/tt&amp;gt;, but with a syntax that allows for ranges, which GNU parallel does not understand. The &amp;lt;tt&amp;gt;scontrol&amp;lt;/tt&amp;gt; command in the script above fixes that.&lt;br /&gt;
* Alternatively, GNU Parallel can be passed a file with the list of nodes to which to ssh, using &amp;lt;tt&amp;gt;--sshloginfile&amp;lt;/tt&amp;gt;, but your jobs script would first have to create that file.&lt;br /&gt;
* The parameter &amp;lt;tt&amp;gt;-j $NCORES&amp;lt;/tt&amp;gt; tells &amp;lt;tt&amp;gt;parallel&amp;lt;/tt&amp;gt; to run 40 subjobs simultaneously on each of the nodes (note: do not use the similarly named variable $SLURM_TASKS_PER_NODE as its format is incompatible with GNU parallel).&lt;br /&gt;
* The parameter &amp;lt;tt&amp;gt;--wd $PWD&amp;lt;/tt&amp;gt; sets the working directory on the other nodes to the working directory on the first node.  &amp;lt;span style=&amp;quot;color:red;&amp;quot;&amp;gt;The &amp;lt;tt&amp;gt;--wd&amp;lt;/tt&amp;gt; argument is essential:&amp;lt;/span&amp;gt; without this, the run tries to start from the wrong place and will most likely fail.&lt;br /&gt;
* If you need an environment variable to be transfered from the job script to the remotely running subjobs, use the &amp;lt;tt&amp;gt;--env ENVIRONMENTVARIABLE&amp;lt;/tt&amp;gt; argument for the parallel command. The example above copies the most common variables that a remote command may need.&lt;br /&gt;
&lt;br /&gt;
Instead of this script using an old version of GNU parallel, we recommend using GNU parallel modules starting from version 20191122 that is available in NiaEnv/2019b, &lt;br /&gt;
which facilitate automatic distribution of subjobs over nodes.  For these newer versions of the module, the script can look like this:&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# SLURM submission script for multiple serial jobs on multiple Niagara nodes&lt;br /&gt;
#&lt;br /&gt;
#SBATCH --nodes=4&lt;br /&gt;
#SBATCH --ntasks-per-node=40&lt;br /&gt;
#SBATCH --time=12:00:00&lt;br /&gt;
#SBATCH --job-name gnu-parallel-multinode-example&lt;br /&gt;
 &lt;br /&gt;
# Turn off implicit threading in Python, R&lt;br /&gt;
export OMP_NUM_THREADS=1&lt;br /&gt;
 &lt;br /&gt;
module load NiaEnv/2019b gnu-parallel&lt;br /&gt;
&lt;br /&gt;
parallel --joblog slurm-$SLURM_JOBID.log --wd $PWD &amp;quot;cd serialjobdir{} &amp;amp;&amp;amp; ./doserialjob{}&amp;quot; ::: {001..800}&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
* The mechanism of the automation of the number of tasks per nodes and the node names that GNU Parallel can use, is all through the environment variable &amp;lt;tt&amp;gt;$PARALLEL&amp;lt;/tt&amp;gt;, which is set by the gnu-parallel module.&lt;br /&gt;
* The parameter &amp;lt;tt&amp;gt;--wd $PWD&amp;lt;/tt&amp;gt; sets the working directory on the other nodes to the working directory on the first node.  &amp;lt;span style=&amp;quot;color:red;&amp;quot;&amp;gt;The &amp;lt;tt&amp;gt;--wd&amp;lt;/tt&amp;gt; argument is essential:&amp;lt;/span&amp;gt; without this, the run tries to start from the wrong place and will most likely fail.&lt;br /&gt;
* If you need an environment variable to be transfered from the job script to the remotely running subjobs, use the &amp;lt;tt&amp;gt;--env ENVIRONMENTVARIABLE&amp;lt;/tt&amp;gt; argument for the parallel command. The &amp;lt;tt&amp;gt;$PARALLEL&amp;lt;/tt&amp;gt; environment variable is already set to copy the most common variables &amp;lt;tt&amp;gt;$PATH, $LD_LIBRARY_PATH, and $OMP_NUM_THREADS&amp;lt;/tt&amp;gt;.&lt;br /&gt;
&lt;br /&gt;
Of course, this is just an example of what you could do with gnu parallel. How you set up your specific run depends on how each of the runs would be started. One could for instance also prepare a file of commands to run and make that the input to parallel as well.&lt;br /&gt;
&lt;br /&gt;
Submitting several bunches to single nodes, as in the section above, is a more fail-safe way of proceeding, since a node failure would only affect one of these bunches, rather than all runs. &lt;br /&gt;
&lt;br /&gt;
We reiterate that if memory requirements allow it, you should try to run more than 40 jobs at once, with a maximum of 80 jobs. The way the above example job script are written, you simple change &amp;lt;tt&amp;gt;#SBATCH --ntasks-per-node=40&amp;lt;/tt&amp;gt; to  &amp;lt;tt&amp;gt;#SBATCH --ntasks-per-node=80&amp;lt;/tt&amp;gt; to accomplish this.&lt;br /&gt;
&lt;br /&gt;
===More on GNU parallel=== &lt;br /&gt;
* The documentation for GNU parallel can be found at http://www.gnu.org/software/parallel/ .&lt;br /&gt;
* After loading the &amp;lt;tt&amp;gt;gnu-parallel&amp;lt;/tt&amp;gt; module, type &amp;lt;tt&amp;gt;man parallel_tutorial&amp;lt;/tt&amp;gt;&lt;br /&gt;
* After loading the &amp;lt;tt&amp;gt;gnu-parallel&amp;lt;/tt&amp;gt; module, type &amp;lt;tt&amp;gt;man parallel&amp;lt;/tt&amp;gt;&amp;lt;br/&amp;gt;The man page can also be found at http://www.gnu.org/software/parallel/man.html .&lt;br /&gt;
* Watch a [https://www.youtube.com/watch?v=2tVpUfND3LI&amp;amp;t=1852s recording of a Compute Ontario Colloquium&amp;lt;/a&amp;gt; on GNU parallel].&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
===Bundling multiple MPI sub-jobs on trillium===&lt;br /&gt;
&lt;br /&gt;
There may be cases in which you code won't scale well to all 192 cores on a trillium node. In that case you may still fully utilize the node, by running multiple sub-jobs in parallel, even if they are not serial:&lt;br /&gt;
&lt;br /&gt;
&amp;lt;source lang=&amp;quot;bash&amp;quot;&amp;gt;&lt;br /&gt;
#!/bin/bash&lt;br /&gt;
# SLURM submission script for multiple MPI jobs on a full niagara node&lt;br /&gt;
#&lt;br /&gt;
#SBATCH --nodes=1&lt;br /&gt;
#SBATCH --ntasks-per-node=192&lt;br /&gt;
#SBATCH --time=12:00:00&lt;br /&gt;
#SBATCH --job-name gnu-parallel-MPI-example&lt;br /&gt;
&lt;br /&gt;
# EXECUTION COMMAND; ampersand off 3 sub-jobs, 64-tasks-each and wait&lt;br /&gt;
(mpirun -N 64 ./test_solver 3 &amp;amp;&amp;amp; echo &amp;quot;job 1 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(mpirun -N 64 ./test_solver 4 &amp;amp;&amp;amp; echo &amp;quot;job 1 finished&amp;quot;) &amp;amp;&lt;br /&gt;
(mpirun -N 64 ./test_solver 5 &amp;amp;&amp;amp; echo &amp;quot;job 1 finished&amp;quot;) &amp;amp;&lt;br /&gt;
wait&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/source&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
===GNU Parallel Reference===&lt;br /&gt;
&lt;br /&gt;
The author of GNU parallel request that when using GNU parallel for a publication, you please cite:&lt;br /&gt;
&lt;br /&gt;
* O. Tange (2018): GNU Parallel 2018, March 2018, https://doi.org/10.5281/zenodo.1146014.&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6554</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6554"/>
		<updated>2025-04-10T01:30:04Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|{{Down   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''April 9, 2025 9PM:''' HPSS is back online.&lt;br /&gt;
&lt;br /&gt;
'''April 8, 2025 9PM:''' HPSS is being reserved for OS updates on April 9 (Wednesday).&lt;br /&gt;
&lt;br /&gt;
'''April 1, 2025:''' The Jupyter Hub has been replaced by SciNet's [[Open OnDemand Quickstart|Open OnDemand service]].&lt;br /&gt;
&lt;br /&gt;
'''March 1, 2025:''' As of March 1st scratch purging is suspended until after Trillium comes online.&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there is now a permanent reduction in computing capacity of Niagara to 50% and of Mist to 35%.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6551</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6551"/>
		<updated>2025-04-10T01:29:48Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|{{Down   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''April 9, 2025 9PM:''' HPSS is back online.&lt;br /&gt;
&lt;br /&gt;
'''April 8, 2025 9PM:''' HPSS is being reserved for OS updates on April 9 (Wednesday).&lt;br /&gt;
&lt;br /&gt;
'''April 1, 2025:''' The Jupyter Hub has been replaced by SciNet's [[Open OnDemand Quickstart|Open OnDemand service]].&lt;br /&gt;
&lt;br /&gt;
'''March 1, 2025:''' As of March 1st scratch purging is suspended until after Trillium comes online.&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there is now a permanent reduction in computing capacity of Niagara to 50% and of Mist to 35%.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Previous_messages&amp;diff=6542</id>
		<title>Previous messages</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Previous_messages&amp;diff=6542"/>
		<updated>2025-04-08T21:05:40Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;&lt;br /&gt;
'''March 31, 2025 3:20 pm EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''March 31, 2025 2:45 pm EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''March 28, 2025 3:00 pm - 4:00 pm EDT:''' A short maintenance was needed for the Teach compute nodes; you might have experienced some job scheduling delays on that cluster. &lt;br /&gt;
&lt;br /&gt;
'''March 20, 2025 10:30 am EDT:''' Teach compute nodes are back. &lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 11:00 pm EDT:''' Teach compute nodes are down again. &lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 5:15pm EDT:''' Maintenance of the cooling system was performed successfully. The cluster is back on line&lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 8:00 am - 5:00 pm EDT:''' Maintenance of the cooling system as well as preparations for the Trillium cluster will require a shutdown of the compute nodes of all SciNet systems (Niagara, Mist, Rouge, Balam, Teach, as well as hosted equipment). The login nodes, file systems and the HPSS system will remain available. The scheduler will hold jobs that are submitted until the maintenance has finished.&lt;br /&gt;
&lt;br /&gt;
'''March 18, 2025 10:00 am EDT:''' Teach compute nodes are back.&lt;br /&gt;
&lt;br /&gt;
'''March 17, 2025 10:00 pm EDT:''' Teach compute nodes are down. We are working on it. &lt;br /&gt;
&lt;br /&gt;
'''February 27, 2025 9:00 pm EST:''' Access to HPSS via Globus has been restored.&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 2:30 pm EST:''' Access to HPSS via Globus is currently suspended (sorry, trivial upgrade has gone wrong).&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 12:30 pm EST:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 11:50 am EST:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''February 7, 2025 2:45 pm EST:''' Systems are back online.&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb  7 01:04:33 EST 2025:''' There has been a problem with the water chiller. Automatic thermal shutdown of the compute nodes&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 11:45 am EST:''' Power is back.&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 6:00 am EST:''' Power outage in the data center. Many compute jobs will have stopped. Until power gets restored, parts of the systems are  running on the generator. No ETA on full power restoration.&lt;br /&gt;
 &lt;br /&gt;
'''January 28, 2025 9:30 pm EST:''' The CCEnv stack has been restored.&lt;br /&gt;
&lt;br /&gt;
'''January 28, 2025 5:00 pm EST:''' The CCEnv stack from cvmfs has issues and may not work reliable.&lt;br /&gt;
&lt;br /&gt;
'''January 23, 2025 9:00 am - 1:00 pm EST:''' Balam, Rouge and Neptune compute nodes will be shut down from 9 AM to 1 PM EST for additional electrical work.&lt;br /&gt;
&lt;br /&gt;
'''January 22, 2025 12:55 pm EST:''' Compute nodes are back online&lt;br /&gt;
&lt;br /&gt;
'''January 22, 2025 8:00 am - 5:00 pm EST:''' Preparations for the new system Trillium will require a shutdown of the compute nodes of all SciNet systems (Niagara, Mist, Rouge, Teach, as well as hosted equipment) from 8 AM to 5 PM EST. The login nodes, file systems and the HPSS system will remain available. The scheduler will hold jobs that are submitted until the maintenance has finished.&lt;br /&gt;
&lt;br /&gt;
'''January 9, 2025 11:00 am EST:''' Systems are back online&lt;br /&gt;
&lt;br /&gt;
'''January 8, 2025 10:34 pm EST:''' We had some sort of thermal event at the datacenter, and the clusters are down. We're still investigating&lt;br /&gt;
&lt;br /&gt;
'''January 8, 2025 08:00 am EST:''' Balam, Rouge and Neptune are shutdown for electrical upgrades&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there will be a (permanent) reduction in computing capacity of Niagara and Mist. Only 50% of Niagara and 35% of Mist will remain active after January 6th.  The reduction will require Mist to be shutdown for a few hours on January 6th. Balam, Rouge and Neptune will be shutdown on Wednesday January 8th for the same reason.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''December 20, 2024 09:00 am EST:''' OpenOnDemand service will not be available on Dec 20 from 9 a.m. to 5 p.m. due to scheduled maintenance.&lt;br /&gt;
&lt;br /&gt;
'''December 16, 2024, 08:21 am EST:''' The Niagara scheduler has been restarted.&lt;br /&gt;
  &lt;br /&gt;
'''December 16, 2024, 00:04 am EST:''' The Niagara scheduler has an issue; we are investigating.&lt;br /&gt;
  &lt;br /&gt;
'''Fri Nov 8, 2024, 09:45 AM EST.''' Balam and Rouge schedulers are back online.&lt;br /&gt;
&lt;br /&gt;
'''Thu Nov 7, 2024, 10:30 PM EST.''' Most systems are up, except for the schedulers on Balam and Rouge (but even their login nodes are up), and a few 'neptune' niagara nodes.&lt;br /&gt;
&lt;br /&gt;
'''Thu Nov 7, 2024, 5:30 PM EST:''' Systems are being brought up, but not yet available for users.&lt;br /&gt;
&lt;br /&gt;
'''Downtime Announcement: On Thu Nov 7, 2024, all systems and storage located at the SciNet Datacenter (Niagara, Mist, HPSS, Rouge, Teach, JupyterHub, Balam) will be unavailable from 7 a.m. to 5 p.m. ET.&lt;br /&gt;
This outage is required to install new electrical equipment (UPS) for the upcoming systems refresh. The work is expected to be completed in one day.&lt;br /&gt;
The scheduler will hold jobs that cannot finish before the start of the shutdown. Users are encouraged to submit small and short jobs that can take advantage of this, as the scheduler may be able to fit these jobs in before the maintenance on otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 24 15:05 EDT 2024''': Cooling pump motor has been replaced. All systems are back to normal.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 22 16:35 EDT 2024''': The motor is scheduled for replacement on Thursday, Oct 24.&lt;br /&gt;
&lt;br /&gt;
'''Mon Oct 21 17:15 EDT 2024''': Compute nodes will remain down until we can replace the main cooling pump.  This may take several days.  Please see this page for updates.&lt;br /&gt;
&lt;br /&gt;
'''Mon Oct 21 12:15 EDT 2024''': Compute nodes have been shutdown due to a cooling system failure.&lt;br /&gt;
&lt;br /&gt;
'''Fri Oct 18 21:40 EDT 2024''': Systems are back to normal&lt;br /&gt;
&lt;br /&gt;
'''Fri Oct 18 21:15 EDT 2024''': We are experiences technical difficulties, apparently caused by a glitch in the file systems&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 1 10:45 EDT 2024''': The Jupyter Hub service will be rebooted today at around 11:00 am EDT for system upgrades. &lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 3 07:00 EDT 2024''': Intermittent file system issues which may cause issues logging in.  We are in the process of resolving the issue.&lt;br /&gt;
&lt;br /&gt;
'''Sun Sep 1 00:01 - 04:00 EDT 2024''': Network maintenance may cause connection issues to the datacentre.&lt;br /&gt;
&lt;br /&gt;
'''Thu Aug 22 13:30:00 EDT 2024''': Chiller issue caused about 25% of Niagara compute nodes to go down; users should resubmit any affected jobs.&lt;br /&gt;
&lt;br /&gt;
'''Wed Aug 21 16:35:00 EDT 2024''': Maintenance finished; compute nodes are now available for user jobs.&lt;br /&gt;
&lt;br /&gt;
'''Wed Aug 21 7:00:00 EDT 2024''': Maintenance started.&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 19:15:00 EDT 2024''': Issues have been resolved.&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 14:30:00 EDT 2024''': Power issues seem to have brought compute nodes down, and compounded to the file system issues we had earlier.&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 10:31:53 EDT 2024''': GPFS is back online, and seems to be holding&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 08:44:40 EDT 2024''': Sorry, problems with GPFS file systems are reoccurring. &lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 07:59:02 EDT 2024''': GPFS file systems are back to normal. Many jobs have died and will need to be resubmitted.&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 06:39:12 EDT 2024''': Support staff detected the problem and started to work on the fix&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 00:53:52 EDT 2024''': GPFS file systems (home, scratch, project) started to show initial stages of problems&lt;br /&gt;
&lt;br /&gt;
'''August 21, 2024''': The annual cooling tower maintenance for the SciNet data centre will take place on August 21, 2024 from 7 a.m. EDT until the end of day. This maintenance requires a shutdown of the compute nodes of all SciNet systems (Niagara, Mist, Rouge, Teach, as well as hosted equipment). The login nodes, file systems and the HPSS system will remain available.&lt;br /&gt;
&lt;br /&gt;
The scheduler will hold jobs that cannot finish before the start of the shutdown. Users are encouraged to submit small and short jobs that can take advantage of this, as the scheduler may be able to fit these jobs in before the maintenance on otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
'''Thursday, August 1, 10:00 PM EDT''' Filesystem problems resolved.&lt;br /&gt;
&lt;br /&gt;
'''Thursday, August 1, 9:30 PM EDT''' Filesystem problems preventing logins to the systems.  Working on it.&lt;br /&gt;
&lt;br /&gt;
'''Monday, July 22, 11:50 AM EDT''' Systems are back to normal&lt;br /&gt;
&lt;br /&gt;
'''Monday, July 22, 10:50 AM EDT''' Cooling problem has been fixed. Systems are coming up&lt;br /&gt;
&lt;br /&gt;
'''Monday, July 22, 10:20 AM EDT''' Compute nodes have been shutdown due to a cooling tower failure.&lt;br /&gt;
&lt;br /&gt;
'''Friday, July 19, 9:30 AM EDT''' CCEnv modules available on all login nodes again.&lt;br /&gt;
&lt;br /&gt;
'''Friday, July 19, 5:00 AM EDT''' Some login nodes do not have the CCEnv modules available.  We are working on a fix.&lt;br /&gt;
&lt;br /&gt;
'''Monday, Jun 3, 12:55 PM EDT''' All systems are recovered now.&lt;br /&gt;
&lt;br /&gt;
'''Monday, Jun 3, 10:50 AM EDT''' The file system issues affect all nodes, so all systems are inaccessible to users at the moment. No time estimate yet for when the systems may be back.&lt;br /&gt;
&lt;br /&gt;
'''Monday, Jun 3, 7:58 AM EDT''' Login issues for Niagara and Mist. There are file system issues as well. Investigating.&lt;br /&gt;
&lt;br /&gt;
'''Sunday, Jun 2, 12:00 PM EDT''' CCEnv modules missing, investigating.&lt;br /&gt;
&lt;br /&gt;
'''Wednesday May 29, 5:50 PM EDT''' Niagara compute nodes are up.  &lt;br /&gt;
&lt;br /&gt;
'''Wednesday May 29, 4:40 PM EDT''' Niagara compute nodes are coming up.  &lt;br /&gt;
&lt;br /&gt;
'''Wednesday May 29, 4 PM EDT''' Niagara login nodes and jupyterhub are up; file system is now accessible.  &lt;br /&gt;
&lt;br /&gt;
'''Wednesday May 29, 2 PM EDT''' Electricians are checking and testing all junction boxes and connectors under the raised floor for safety.  Some systems are expected to be back up later today (storage, login nodes), and compute systems will be powered up as soon as it is deemed safe.&lt;br /&gt;
&lt;br /&gt;
'''Tuesday May 28, 3 PM EDT''' Cleaning crews are at the datacentre, to pump the water and install dryers.  Once the floors are dry, we need to inspect all electrical boxes to ensure safety.  We do not expect to have a fully functional datacentre before Thursday, although we hope to be able to turn on the storage and login nodes sometime tomorrow, if circumstances permit.  Apologies, and thank you for your patience.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Tuesday May 28, 7 AM EDT''' A water mains break outside our datacentre has caused extensive flooding, and all systems have been shut down preventatively. &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Friday May 17, 10 PM EDT - Saturday May 18, 2 AM EDT:''' The external network will be unavailable for maintenance. Running and queued jobs on the systems will not be affected.&lt;br /&gt;
&lt;br /&gt;
'''Tuesday May 14, 6:45 PM EDT:''' All systems are recovered now.&lt;br /&gt;
&lt;br /&gt;
'''Tuesday May 14, 5 PM EDT:''' Power loss at the datacentre resulted in loss of cooling.  Systems are being restored.&lt;br /&gt;
&lt;br /&gt;
'''Friday May 3, 10 PM EDT - Saturday May 4, 2 AM EDT:''' The external network will be unavailable for maintenance. Running and queued jobs on the systems will not be affected.&lt;br /&gt;
&lt;br /&gt;
'''Tuesday April 17, 2024: 11:00 ''' The restart of the Niagara login nodes has been completed successfully.&lt;br /&gt;
&lt;br /&gt;
'''Tuesday April 17, 2024: 09:40 ''' Niagara login nodes will be rebooted &lt;br /&gt;
&lt;br /&gt;
'''Tuesday April 16, 2024: 12:45 ''' mist-login01  recovered now&lt;br /&gt;
&lt;br /&gt;
'''Tuesday April 16, 2024: 11:45 ''' mist-login01  will be unavailable due to maintenance from 12:15 to 12:45. Following the completion of maintenance, login access should be restored &lt;br /&gt;
&lt;br /&gt;
'''Monday April 15, 2024: 13:02 ''' Balam-login01 will be unavailable due to maintenance from 13:00 to 13:30. Following the completion of maintenance, login access should be restored and available once more. &lt;br /&gt;
&lt;br /&gt;
'''Monday March 18, 2024: 14:45 ''' File system issue resolved.  Users are advised to check if their running jobs were affected, and if so, to resubmit.&lt;br /&gt;
&lt;br /&gt;
'''Monday March 18, 2024: 13:02 ''' File system issues.  This affects the ability to log in. We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Monday March 11, 2024: 14:05 ''' All systems are recovered now&lt;br /&gt;
&lt;br /&gt;
'''Monday March 11, 2024:''' There will be an shutdown of the file system at SciNet for an emergency repair. As a consequence, the login nodes and compute nodes of all SciNet clusters using the file system (Niagara, Mist, Balam, Rouge, and Teach) will be down from 11 am EST until later in the afternoon. &lt;br /&gt;
&lt;br /&gt;
'''February 28, 2024, 16:30 PM EDT:''' All systems are recovered now.&lt;br /&gt;
&lt;br /&gt;
'''February 28, 2024, 1:00 PM EDT:''' A loop pump fault caused many compute nodes overheat. If you jobs failed around this time, please resubmit. Once the root cause has been addressed, the cluster will be brought up completely. Please report issues to support@scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''February 22, 2024, 5:45 PM EDT:''' Maintenance finished and system restored. Please report issues to support@scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''February 21, 2024, 7:00 AM EDT:''' Maintenance starting.  Niagara login nodes and the file system are kept up as much as possible, but will be rebooted at some point.&lt;br /&gt;
&lt;br /&gt;
'''February 20, 2024, 3:45 PM EDT:''' Cooling tower has been restored, all systems are in production. &lt;br /&gt;
&lt;br /&gt;
'''February 20, 2024, 1:30 AM EDT:''' Cooling tower malfunction, all compute nodes are shutdown, the root cause will be addressed earliest in the morning.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style=&amp;quot;color:red&amp;quot;&amp;gt;&amp;lt;b&amp;gt; February 21 and 22, 2024: SciNet Data Centre Maintenance:&amp;lt;/b&amp;gt;&amp;lt;/span&amp;gt;&amp;lt;br/&amp;gt;&lt;br /&gt;
This annual winter maintenance involves a full data centre shutdown&lt;br /&gt;
starting at 7:00 am EST on Wednesday, February 21st.  None of the&lt;br /&gt;
SciNet systems (Niagara, Mist, Rouge, Teach, the file systems, as&lt;br /&gt;
well as hosted equipment) will be accessible.  All systems should be&lt;br /&gt;
fully available again in the last afternoon of the 22nd.&lt;br /&gt;
&lt;br /&gt;
The scheduler will hold jobs that cannot finish before the start of&lt;br /&gt;
the shutdown. Users are encouraged to submit small and short jobs&lt;br /&gt;
that can take advantage of this, as the scheduler may be able to fit&lt;br /&gt;
these jobs in before the maintenance on otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
'''Mon January 29, 08:20    (EST):''' Access to Niagara login nodes restored (it was an internal routing issue).&lt;br /&gt;
&lt;br /&gt;
'''Mon January 29, 07:35    (EST):''' No access to Niagara login nodes.  We are investigating.  Use the Mist login to get access to SciNet systems.&lt;br /&gt;
&lt;br /&gt;
'''Wed January 24, 15:20    (EST):''' maintenance on rouge-login01 &lt;br /&gt;
&lt;br /&gt;
'''Wed January 24, 14:55    (EST):''' Rebooting rouge-login01 &lt;br /&gt;
&lt;br /&gt;
'''Tue January 23, 10:25 am (EST):''' Mist-login01 maintenance done &lt;br /&gt;
&lt;br /&gt;
'''Tue January 23, 10:10 am (EST):''' Rebooting Mist-login01 to deploy new image&lt;br /&gt;
&lt;br /&gt;
'''Tue January 22, 21:00 am (EST):''' HPSS performance for hsi &amp;amp; htar clients is back to normal.&lt;br /&gt;
&lt;br /&gt;
'''Tue January 20, 11:50 am (EST):''' HPSS hsi/htar/VFS jobs will remain on PD state on the queue over the weekend, so we may work on archive02/vfs02 on Monday, and try to improve transfer performance. In the meantime you may use Globus (computecanada#hpss) if your workflow is suitable. &lt;br /&gt;
&lt;br /&gt;
'''Tue January 14, 13:20 am (EST):''' The ongoing HPSS jobs from Friday finished earlier, so we restarted HPSS sooner and released the PD jobs on the queue. &lt;br /&gt;
&lt;br /&gt;
'''Tue January 12, 10:40 am (EST):''' We have applied some tweaks to the HPSS configuration to improve performance, but they won't take effect until we restart the services, which scheduled for Monday morning. If over the weekend we notice that there are no HPSS jobs running on the queue we may restart HPSS sooner. &lt;br /&gt;
&lt;br /&gt;
'''Tue January 09, 9:10 am (EST):''' Remaining cvmfs issues cleared.&lt;br /&gt;
&lt;br /&gt;
'''Tue January 09, 8:00 am (EST):''' We're investigating remaining issues with cvmfs access on login nodes.&lt;br /&gt;
&lt;br /&gt;
'''Mon January 08, 21:50 pm (EST):''' File systems are back to normal. Please resubmit your jobs.  &lt;br /&gt;
&lt;br /&gt;
'''Mon January 08, 9:10 pm (EST):''' We had a severe deadlock, and some disk volumes went down. The file systems are being recovered now. It could take another hour.&lt;br /&gt;
&lt;br /&gt;
'''Mon January 08, 7:20 pm (EST):''' We seem to have a problem with the file system, and are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Tue December 19, 2:45 pm (EST):''' Compute nodes are available again.  &lt;br /&gt;
&lt;br /&gt;
'''Tue December 19, 12:09 pm (EST):''' Maintenance was postponed by one hour. &lt;br /&gt;
&lt;br /&gt;
'''Tue December 19, 12 noon - 1 pm (EST):''' There will be a shutdown of the compute nodes of the Niagara, Mist and Rouge cluster to allow for an emergency repair to the cooling tower.  Login nodes will remain available but no jobs will run during that time.  Updates will be posted on here.&lt;br /&gt;
&lt;br /&gt;
'''Mon Dec  11 11:17:00 EST 2023:''' File systems recovered; Niagara and Mist are operational again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Dec  11 7:51:00 EST 2023:''' Niagara's login nodes are being overwhelmed.  We are investigating. Likely file-system related.&lt;br /&gt;
&lt;br /&gt;
'''Thu Dec  6 10:01:24 EST 2023:''' Niagara's scheduler rebooting for security patches.&lt;br /&gt;
&lt;br /&gt;
'''Wed Dec  6 13:06:46 EST 2023:''' Endpoint computecanada#niagara transition from Globus GCSv4 to GCSv5 is completed. computecanada#niagara-GCSv4 has been deactivated&lt;br /&gt;
&lt;br /&gt;
'''Mon Dec  4 16:35:07 EST 2023:''' Endpoint computecanada#niagara has now been upgraded to Globus GCSv5. The old endpoint is still available as computecanada#niagara-GCSv4 on nia-datamover2, only until Wednesday, at which time we'll disable it as well.&lt;br /&gt;
&lt;br /&gt;
'''Mon Dec  4 11:54:49 EST 2023:''' The nia-datamover1 node will the offline this Monday afternoon for the Globus GCSv5 upgrade. Endpoint computecanada#niagara-GCSv4 will still be available via nia-datamover2&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 28 16:29:14 EST 2023:''' The computecanada#hpss Globus endpoint is now running GCSv5. We'll find a window of opportunity next week to upgrade computecanada#niagara to GCSv5 as well.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 28 14:20:30 EST 2023:''' The computecanada#hpss Globus endpoint will be offline for the next few hours for the GCSv5 upgrade.&lt;br /&gt;
&lt;br /&gt;
'''Fri Nov 10, 2023, 18:00 PM EDT:''' The HPSS upgrade is finished. We didn't have time to update Globus to GCSv5, so we'll find a window of opportunity to do this next week. &lt;br /&gt;
&lt;br /&gt;
Please be advised that starting this &amp;lt;B&amp;gt;Friday morning, Nov/10, we'll be upgrading the HPSS system from version 8.3 to 9.3 and the HPSS Globus server from GCSv4 to GCSv5.&amp;lt;/B&amp;gt; Everything going well we expect to be back online by the end of the day.  &lt;br /&gt;
&lt;br /&gt;
'''Fri Nov 3, 2023, 12:20 PM EDT:''' The &amp;quot;Niagara at Scale&amp;quot; event has finished. Niagara is available again for all users.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 31, 2023, 12 PM EDT:''' The &amp;quot;Niagara at Scale&amp;quot; event has started.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 31, 2023, 12:PM EDT - Fri Nov 3, 2023, 12:00 PM EDT:''' Three-day reservation for the &amp;quot;Niagara at Scale&amp;quot; event. Only &amp;quot;Niagara at Scale&amp;quot; projects will run on the compute nodes. Users are encouraged to submit small and short jobs that could run before this event.  Throughout the event, users can still login, access their data, and submit jobs, but these jobs will not run until after the event. Note that the debugjob queue will remain available to everyone as well.&lt;br /&gt;
&lt;br /&gt;
''' Thu Oct 27 11:16 AM EDT:''' SSH keys are gradually being restored, estimated to complete by 1:15 PM.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 27, 2023, 8:00 EDT:''' SSH key login authentication with CCDB keys is currently not working, on many Alliance systems.  It appears this started last night. Issue is being investigated.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 26, 2023, 12:35 EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 26, 2023, 12:05 EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Wed Oct 25 7:54 PM EDT:''' slurm-*.out now outputs job info for last array job.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 24 12:00 AM EDT:''' network appears to be up&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 24 11:32 AM EDT:''' campus network issues&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 05, 2023, 12:05 PM EDT:''' Niagara scheduler is back online.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 05, 2023, 11:50 AM EDT:''' Niagara scheduler is temporarily under maintenance for security updates. &lt;br /&gt;
&lt;br /&gt;
''' Thu Sep 28, 2023 11:00 am''': Niagara scheduler is back online.&lt;br /&gt;
&lt;br /&gt;
''' Thu Sep 28, 2023 10:50 am''': Niagara scheduler is temporarily under maintenance for security updates.&lt;br /&gt;
&lt;br /&gt;
''' Wed Sep 27, 2023 11:35 am''': Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
''' Wed Sep 27, 2023 11:00 am''': Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
''' Wed Sep 6, 2023 11:30 am''': Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
''' Wed Sep 6, 2023 11:00 am''': Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
''' Fri Aug 25, 2023 0:19 am''': A power glitch brought some compute nodes down; users should resubmit any affected jobs. The Jupyterhub had to be restarted for the same reason.&lt;br /&gt;
&lt;br /&gt;
''' Mon Aug 14, 2023 12:10 pm''': Network problems with Teach cluster are now resolved and it is again available for users.&lt;br /&gt;
&lt;br /&gt;
''' Mon Aug 14, 2023 11:40 am''': Network problems with Teach cluster. We are investigating.&lt;br /&gt;
&lt;br /&gt;
''' Thu Aug 3, 2023 11:10 am''': Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
''' Thu Aug 3, 2023 10:40 am''': Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
''' Tue Aug 1, 2023 2:43 pm''': To recover from the power glitch, all servers on the SciNet jupyterhub have been stopped. Please restart you server if you need to.&lt;br /&gt;
&lt;br /&gt;
''' Tue Aug 1, 2023 11:46 am''': There was a power glitch at 11:46 Aug 1, 2023, causing a significant number of job losses. Please resubmit your jobs.&lt;br /&gt;
&lt;br /&gt;
'''Summer Maintenance Shutdown Finished''' -- Slurm upgraded to version 23.02.3.&lt;br /&gt;
Change to be aware: SLURM_NTASKS is only set if --ntasks option is set.&lt;br /&gt;
Details at: https://bugs.schedmd.com/show_bug.cgi?id=17108&lt;br /&gt;
&lt;br /&gt;
'''July 17 and 18, 2023''':  Announcement: Summer Maintenance Shutdown&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt; &lt;br /&gt;
&lt;br /&gt;
'''July 17th, 2023''' This maintenance involves a full data centre shutdown will start at 7:00 a.m. ET on Monday July 17th, 2023. None of the SciNet systems (Niagara, Mist, Rouge, Teach, the file systems, as well as hosted equipment) will be accessible.&lt;br /&gt;
&lt;br /&gt;
'''July 18th, 2023''' The shutdown will last until Tuesday July 18th, 2023. Systems are expected to be fully available in the evening of that day.&lt;br /&gt;
&lt;br /&gt;
The scheduler will hold jobs that cannot finish before the start of the shutdown. Users are encouraged to submit small and short jobs that can take advantage of this, as the scheduler may be able to fit these jobs in before the maintenance on otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
'''Wed Jun 21 16:03:45 EDT 2023:''' Niagara's scheduler maintenance is finished.&lt;br /&gt;
&lt;br /&gt;
'''Wed Jun 21 15:42:00 EDT 2023:''' Niagara's scheduler is rebooting in 10 minutes for a short maintenance down time.&lt;br /&gt;
&lt;br /&gt;
'''Wed Jun 21, 2023, 11:25 AM EDT:''' Maintenance is finished and Teach cluster is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Jun 20, 2023, 9:55 AM EDT:''' Teach cluster is powered off for maintenance.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style='color:red'&amp;gt;'''Tue June 20, 2023:'''  Announcement:&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt; The Teach cluster at SciNet will undergo a maintenance shutdown starting on Tuesday June 20, 2023.  It will likely take a few days before it will be available again.  Check here for updates.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jun 5, 2023, 2:35 PM EDT:''' All systems are operational again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jun 5, 2023, 11:55 AM EDT:''' There were issues with the cooling system.  The login nodes and file systems are now accessible again, but compute nodes are still off.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jun 5, 2023, 6:55 AM EDT:''' Issues at the data center, we are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Sat May 27, 2023, 21:00AM EDT:''' We have been able to mitigate the UPS issue for now, until new parts arrive sometime during the week. System will be accessible soon&lt;br /&gt;
&lt;br /&gt;
'''Sat May 27, 2023, 16:00AM EDT:''' We identified an UPS/Power related issue on the datacenter, that is adversely affecting several components, in particular all file systems. Out of an abundance of caution we are shutting down the cluster, until the UPS situation is resolved. Ongoing jobs will be canceled.&lt;br /&gt;
&lt;br /&gt;
'''Sat May 27, 2023, 11:18AM EDT:''' Filesystem issues, investigating.&lt;br /&gt;
&lt;br /&gt;
'''Wed May 24, 2023, 11:40AM EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Wed May 24, 2023, 11:10 AM EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Mon May 15, 2023, 10:08 AM EDT''' rebooting Mist-login node again &lt;br /&gt;
&lt;br /&gt;
'''Mon May 15, 2023, 09:15 AM EDT''' rebooting Mist-login node&lt;br /&gt;
&lt;br /&gt;
'''Mon May 01, 2023, 04:00 PM EDT''' done rebooting nia-login nodes&lt;br /&gt;
&lt;br /&gt;
'''Mon May 01, 2023, 12:00 PM EDT''' rebooting all nia-login nodes one at a time &lt;br /&gt;
&lt;br /&gt;
'''Mon May 01, 2023, 11:00 AM EDT''' nia-login07 is going to be rebooted.&lt;br /&gt;
&lt;br /&gt;
'''Thu Apr 20, 2023, 12:05 PM EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Apr 20, 2023, 11:30 AM EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Thu Apr 20, 2023, 8:27 AM EDT:''' Intermittent file system issues. We are investigating.  For now (10:45 AM), the file systems appear operational.&lt;br /&gt;
&lt;br /&gt;
'''Fri 14 Apr 2023 10:25 AM EDT:''' Switch problem resolved.&lt;br /&gt;
&lt;br /&gt;
'''Fri 14 Apr 2023 10:10 AM EDT:''' A switch problem is affecting access to certain equipment at the SciNet data center, including the Teach cluster.  Niagara and Mist are accessible.&lt;br /&gt;
&lt;br /&gt;
'''Fri 14 Apr 2023 09:55 AM EDT:''' SciNet Jupyter Hub maintenance is finished and it is again available for users.&lt;br /&gt;
&lt;br /&gt;
'''Fri 14 Apr 2023:''' SciNet Jupyter Hub will be restarted for system updates this morning.  Keep in mind to save your notebooks!&lt;br /&gt;
&lt;br /&gt;
'''Thu 06 Apr 2023 03:40 PM EDT:''' Rouge cluster is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Thu 06 Apr 2023 01:00 PM EDT:''' Rouge cluster is temporarily inaccessible to users due to the electrical work.&lt;br /&gt;
&lt;br /&gt;
'''Sun 02 Apr 2023 03:37 AM EDT:''' IO/read errors on the file system seem to have been fixed. Please resubmit your jobs, and report any further problems to support. Burst Buffer will remain offline for now.&lt;br /&gt;
&lt;br /&gt;
'''Sun 02 Apr 2023 00:18 AM EDT:''' File System is back up, but there seems to be some IO/read errors. All running jobs have been killed. Please hold off on submitting jobs until further notice.&lt;br /&gt;
&lt;br /&gt;
'''Sat 01 Apr 2023 10:17 PM EDT:''' We are having issues with the File System. Currently investigating the cause.&lt;br /&gt;
&lt;br /&gt;
'''Fri 31 Mar 2023 11:00 PM EDT:''' Burst Buffer may be the culprit. We are investigating but may have to take Burst Buffer offline. &lt;br /&gt;
&lt;br /&gt;
'''Fri 31 Mar 2023 01:30 PM EDT:''' File system issues causing trouble for some jobs on Niagara and Mist&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
Tue 28 Mar 2023 11:05 AM EDT: Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
Tue 28 Mar 2023 10:35 AM EDT: Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
Fri 17 Mar 2023 14:50 PM EDT: All systems online.&lt;br /&gt;
&lt;br /&gt;
Fri 17 Mar 2023 11:00 AM EDT: Problem identified and repaired. Starting to bring up systems, but not available to users yet.&lt;br /&gt;
&lt;br /&gt;
Fri 17 Mar 2023 09:15:39 EDT: Staff on site and ticket opened with cooling contractor, cause of failure unclear &lt;br /&gt;
&lt;br /&gt;
Fri 17 Mar 2023 01:47:43 EDT: Cooling system malfunction, datacentre is shut down. &lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Tue Feb 28, 16:40 EST:&amp;lt;/b&amp;gt; All systems are back online.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Tue Feb 28, 15:30 EST:&amp;lt;/b&amp;gt; Maintenance is complete. Bringing up systems.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Tue Feb 28, 7:10 AM EST:&amp;lt;/b&amp;gt; Maintenance shutdown resuming.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Mon Feb 27, 3:55 PM EST:&amp;lt;/b&amp;gt; Maintenance paused as parts were delayed. The maintenance will resume tomorrow (Tue Feb 28) at 7AM EST for about 5 hours.  In the meantime, the login nodes of the systems will be brought online.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Mon Feb 27, 7:20 AM EST:&amp;lt;/b&amp;gt; Maintenance shutdown started.&lt;br /&gt;
 &lt;br /&gt;
&amp;lt;span style=&amp;quot;color:red&amp;quot;&amp;gt;&amp;lt;b&amp;gt; February 27 and 28, 2023: SciNet Data Centre Maintenance:&amp;lt;/b&amp;gt;&amp;lt;/span&amp;gt;&amp;lt;br/&amp;gt;&lt;br /&gt;
This annual winter maintenance involves a full data centre shutdown&lt;br /&gt;
starting at 7:00 a.m. EST on Monday, February 27. None of the SciNet&lt;br /&gt;
systems (Niagara, Mist, Rouge, Teach, the file systems, as well as&lt;br /&gt;
hosted equipment) will be accessible.&lt;br /&gt;
&lt;br /&gt;
On the second day of the maintenance, Niagara, Mist, and their file&lt;br /&gt;
systems are expected to become partially available for users.  All&lt;br /&gt;
systems should be fully available in the evening of the 28th.&lt;br /&gt;
&lt;br /&gt;
The scheduler will hold jobs that cannot finish before the start of&lt;br /&gt;
the shutdown. Users are encouraged to submit small and short jobs&lt;br /&gt;
that can take advantage of this, as the scheduler may be able to fit&lt;br /&gt;
these jobs in before the maintenance on otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Feb 17, 2023, 11:15 PM EST:&amp;lt;/b&amp;gt; File system issues on Teach fixed and Teach is accessible again. Note that the file system of Teach is not very good at handling many remote vscode connections.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Feb 17, 2023, 11:02 PM EST:&amp;lt;/b&amp;gt; File system issues on Teach.  We are working on a fix.  &lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Sun Feb 12, 2023, 3:05 PM EST&amp;lt;/b&amp;gt; All systems are back online.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Sun Feb 12, 2023, 2:10 PM EST&amp;lt;/b&amp;gt; Powers restored, clusters are being started.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Sat Feb 11, 2023, 2:35 PM EST&amp;lt;/b&amp;gt; Powers interruption started. All compute nodes will be down, likely until Sunday &lt;br /&gt;
afternoon.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Sat Feb 11, 2023, 1:20 PM EST&amp;lt;/b&amp;gt; There is to be an emergency power repair on the adjacent street. The datacentre will be &lt;br /&gt;
switching over to generator. All compute nodes will be down.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Fri Feb 10, 2023, 10:55 AM EST&amp;lt;/b&amp;gt; All systems are back online.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Fri Feb 10, 2023, 10:00 AM EST&amp;lt;/b&amp;gt; Cooling issue resolved, cluster is being started.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Wed Jan 25, 2023, 02:15 PM EST&amp;lt;/b&amp;gt; Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Wed Jan 25, 2023, 10:30 AM EST&amp;lt;/b&amp;gt; Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Mon Jan 23, 2023, around 7-8 AM EST&amp;lt;/b&amp;gt; Intermediate file system issuse may have killed your job. Users are advised to resubmit.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Sat Jan 21, 2023, 00:50 EST&amp;lt;/b&amp;gt; Niagara, Mist, Rouge and the filesystems are up&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Fri Jan 20, 2023, 11:19 PM: EST&amp;lt;/b&amp;gt; Systems are coming up. We have determined that there was a general power glitch in the area of our Datacentre. The power has been fully restored&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Fri Jan 20, 2023, 10:34 PM: EST&amp;lt;/b&amp;gt; Cooling is back. Systems are slowly coming up  &lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Fri Jan 20, 2023, 8:20 PM: EST&amp;lt;/b&amp;gt; A cooling failure at the data center, possibly due to a power glitch. We are investigating.  &lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Thu Jan 12, 2023, 9:30 AM EST&amp;lt;/b&amp;gt; File system is experiencing issues. Issues have stabilized, but jobs running around this time may have been affected.&lt;br /&gt;
&lt;br /&gt;
'''Wed Dec 21, 2022, 12:00 PM: ''' Please note that SciNet is on vacation, together with the University of Toronto. Full service will resume on Jan 2, 2023. We will endeavour to keep systems running, and answer tickets, on a best-effort basis.  Happy Holidays!!!&lt;br /&gt;
&lt;br /&gt;
'''Fri Dec 16, 2022, 2:19 PM: ''' City power glitch caused all compute nodes to reboot. Please resubmit your jobs.&lt;br /&gt;
&lt;br /&gt;
'''Mon Dec 12, 2022, 9:30 AM - 11:30:''' File system issues caused login issues and may have affected running jobs.  System back to normal now, but users may want to check any jobs they had running. &lt;br /&gt;
&lt;br /&gt;
'''Wed Dec 7, 2022, 11:40 AM EST:''' Systems are being brought back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed Dec 7, 2022, 09:00 AM EST:''' Maintenance is underway.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style='color:red'&amp;gt;&amp;lt;b&amp;gt;Announcement:&amp;lt;/b&amp;gt;&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
On '''Wednesday December 7th, 2022''', the file systems of the SciNet's systems, Niagara, Mist, HPSS, Teach cluster, will undergo maintenance from 9:00 am EST.  During the maintenance, there will be no access to any of these systems, as it requires all file system operations to have stopped.  The maintenance should take about 1 hour, and all systems are expected to become available again later that morning.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 30, 2022, 14:45 PM EST:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 30, 2022, 14:15 PM EST:''' Mist login node is under maintenance and temporarily inaccessible to users. &lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 20, 2022, 18:00 PM EDT:''' Systems are back online &lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 20, 2022, 09:40 AM EDT:''' About half of Niagara compute nodes are up. Note that only jobs that can finish by 5:00 PM will run.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 20, 2022, 07:50 AM EDT:''' Jupyter Hub is available again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 20, 2022, 07:35 AM EDT:''' Jupyter Hub is being updated and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 20, 2022, 07:30 AM EDT:''' Maintenance is underway.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style='color:red'&amp;gt;&amp;lt;b&amp;gt;Announcement:&amp;lt;/b&amp;gt;&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
On '''Thursday October 20th, 2022''', the SciNet datacentre (which hosts Niagara and Mist) will undergo transformer maintenance from 7:30 am EDT to 5:00 pm EDT.  At both the start and end of this maintenance window, all systems will need to be briefly shutdown and will not be accessible.  Apart from that, during this window, login nodes will be accessible and part of Niagara will be available to run jobs. The Mist and Rouge clusters will be off for the entirety of this maintenance. &lt;br /&gt;
&lt;br /&gt;
Users are encouraged to submit Niagara jobs of about 1 to 2 hours in the days before the maintenance, as these could be run within the&lt;br /&gt;
window of 8 AM and 5 PM EDT.&amp;lt;/p&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Wed Oct 5, 2022, 12:10 PM EDT:''' A grid power glitch caused all compute nodes to reboot. Please resubmit your jobs.&lt;br /&gt;
&lt;br /&gt;
'''Mon Oct 3, 2022, 11:20 PM EDT:'''  Niagara login nodes are accessible from outside again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Oct 3, 2022, 9:20 PM EDT:'''  Niagara login nodes are inaccessible from outside of the datacentre at the moment. As a work-around, ssh into mist.scinet.utoronto.ca and then ssh into e.g. nia-login01.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 28, 2022, 1:15 PM EDT:''' The JupyterHub maintenance is finished and it is now accessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 28, 2022, 1:00 PM EDT:''' The JupyterHub is to be rebooted for system upgrades. Running processes and notebooks will be closed. The service is expected to be back around 1:30 PM EDT.&lt;br /&gt;
 &lt;br /&gt;
'''Tue Sep 27, 2022, 11:50 AM EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 27, 2022, 11:25 AM EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Mon Sep 26, 2022, 11:35 AM EDT:''' Rouge and Teach login nodes are accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Sep 26, 2022, 11:05 AM EDT:''' Rouge and Teach login nodes are under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 22, 2022, 0:46 AM EDT:''' The CCEnv software stack is back to normal.&lt;br /&gt;
&lt;br /&gt;
'''Thu Sep 22, 2022, 8:15 PM EDT:''' The CCEnv software stack is inaccessible due to an issue with CVMFS.&lt;br /&gt;
 &lt;br /&gt;
'''Tue Sep 20, 2022, 16:00 AM EDT:''' Rouge login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 20, 2022, 10:20 AM EDT:''' Rouge login node is under maintenance and temporarily inaccessible to users (hardware upgrade).&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 20, 2022, 9:41 AM EDT:''' Rouge login node is back up.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 20, 2022, 8:25 AM EDT:''' Rouge login node down, we are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sept 16, 2022, 9:30 AM EDT:''' Login nodes are accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sept 16, 2022, 9:00 AM EDT:''' Login nodes are not accessible.  We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 13, 2022, 11:00 AM EDT:''' Mist login node is available again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 13, 2022, 10:00 AM EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 2, 2022, 11:25 AM EDT:''' Rouge login node is back up.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 2, 2022, 10:25 AM EDT:''' Issues with the Rouge login node; we are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Tue Aug 23, 2022, 1:15 PM EDT:''' Jupyter Hub is available again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Aug 23, 2022, 1:00 PM EDT:''' Jupyter Hub is being updated and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Fri Aug 12, 2022, 6:30 PM EDT:''' File system issues are resolved.&lt;br /&gt;
&lt;br /&gt;
'''Fri Aug 12, 2022, 5:06 PM EDT:''' File system issues. We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Thu Aug 11, 2022, 9:20 AM EDT:''' The login node issues have been resolved.&lt;br /&gt;
&lt;br /&gt;
'''Thu Aug 11, 2022, 7:50 AM EDT:''' We are having problems accessing the Niagara login nodes.  Until fixed, please login to Mist and then ssh to a Niagara login node to access Niagara (&amp;quot;ssh nia-login02&amp;quot;, for example).&lt;br /&gt;
&lt;br /&gt;
'''Fri July 15, 2022, 10:50 AM EDT:''' Jupyter Hub is available again.&lt;br /&gt;
&lt;br /&gt;
'''Fri July 15, 2022, 10:30 AM EDT:''' Jupyter Hub is being updated and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Wed June 16, 2022, 3:45 PM EDT:''' File system is stable now. We're gradually opening the systems up.&lt;br /&gt;
&lt;br /&gt;
'''Wed June 16, 2022, 10:15 AM EDT:''' Emergency maintenance shutdown of filesystem. Running jobs will be affected.&lt;br /&gt;
&lt;br /&gt;
'''Wed June 15, 2022, 7:35 PM EDT:''' Maintenance shutdown finished. Most systems are available again.&lt;br /&gt;
&lt;br /&gt;
'''Wed June 15, 2022, 7:00 AM EDT:''' Maintenance shutdown of the SciNet datacentre. There will be no access to any of the SciNet systems during this time. We expect to be able to bring the systems back online in the evening of June 15th.&lt;br /&gt;
&lt;br /&gt;
'''Mon June 13, 2022, 7:00 AM EDT - Wed June 15, 2022, 7:00 AM EDT:''' Two-day reservation for the &amp;quot;Niagara at Scale&amp;quot; event. Only &amp;quot;Niagara at Scale&amp;quot; projects will run on the compute nodes (as well as SOSCIP projects, on a subset of nodes). Users are encouraged to submit small and short jobs that could run before this event.  Throughout the event, users can still login, access their data, and submit jobs, but these jobs will not run until after the subsequent maintenance (see below). Note that the debugjob queue will remain available to everyone as well.&lt;br /&gt;
&lt;br /&gt;
'''Mon May 30th, 2022, 12:42:00 EDT:''' Mist login node is available again.&lt;br /&gt;
&lt;br /&gt;
'''Mon May 30th, 2022, 10:22:00 EDT:''' Mist login node is being upgraded and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Wed May 25th, 2022, 13:30:00 EDT:''' Niagara operating at 100% again.&lt;br /&gt;
&lt;br /&gt;
'''Tue May 24th, 2022, 21:30:00 EDT:''' Jupyter Hub up.  Part of Niagara can run compute jobs.&lt;br /&gt;
&lt;br /&gt;
'''Tue May 24th, 2022, 19:00:00 EDT:''' Systems are up. Users can login, BUT cannot submit jobs yet.&lt;br /&gt;
&lt;br /&gt;
'''Tue May 24th, 2022, 10:00:00 EDT:''' We are still performing system checks.&lt;br /&gt;
&lt;br /&gt;
'''Mon May 23rd, 2022, 16:44:30 EDT:''' Systems still down. Filesystems are working, but there are quite a number of drive failures - no data loss - so out of an abundance of caution we are keeping the systems down at least until tomorrow.  The long weekend has also been disruptive for service response, and we prefer to err on the safe side.&lt;br /&gt;
&lt;br /&gt;
'''Mon May 23rd, 2022, 08:12:14 EDT:''' Systems still down. Filesystems being checked to ensure no heat damage.&lt;br /&gt;
&lt;br /&gt;
'''Sun May 22nd, 2022, 10.16 am EDT:''' Electrician dispatched to replace blown fuses.&lt;br /&gt;
&lt;br /&gt;
'''Sun May 22nd, 2022, 2:54 am EDT:''' Automatic shutdown down due to power/cooling.&lt;br /&gt;
&lt;br /&gt;
'''Fri May 6th, 2022, 11:35 am EDT:''' HPSS scheduler upgrade also finished.&lt;br /&gt;
&lt;br /&gt;
'''Thu May 5th, 2022, 7:45 pm EDT:''' Upgrade of the scheduler has finished, with the exception of HPSS.&lt;br /&gt;
&lt;br /&gt;
'''Thu May 5th, 2022, 7:00 am - 3:00 pm EDT (approx):''' Starting from 7:00 am EDT, an upgrade of the scheduler of the Niagara, Mist, and Rouge clusters will be applied.  This requires the scheduler to be down for about 5-6 hours, and all compute and login nodes to be rebooted.&lt;br /&gt;
Jobs cannot be submitted during this maintenance, but jobs submitted beforehand will remain in the queue.  For most of the time, the login nodes of the clusters will be available so that users may access their files on the home, scratch, and project file systems.&lt;br /&gt;
&lt;br /&gt;
'''Monday May 2nd, 2022, 9:30 - 11:00 am EDT:''' the Niagara login nodes, the jupyter hub, and nia-datamover2 will get rebooted for updates.  In the process, any login sessions will get disconnected, and servers on the jupyterhub will stop. Jobs in the Niagara queue will not be affected.&lt;br /&gt;
&lt;br /&gt;
'''Tue Apr 26, 11:20 AM EDT:''' A Rolling update of the Mist cluster is taking a bit longer than expected, affecting logins to Mist. &lt;br /&gt;
 &lt;br /&gt;
'''Announcement:''' On Thursday April 14th, 2022, the connectivity to the SciNet datacentre will be disrupted at 11:00 AM EDT  for a few minutes, in order to deploy a new network core switch.  Any SSH connections or data transfers to SciNet systems (Niagara, Mist, etc.) may be terminated at that time.&lt;br /&gt;
&lt;br /&gt;
'''Thu March 24, 6:54 AM EST:''' HPSS is back online&lt;br /&gt;
&lt;br /&gt;
'''Thu March 24, 8:15 AM EST:''' HPSS has a hardware problem&lt;br /&gt;
&lt;br /&gt;
'''Wed March 2, 4:50 PM EST:''' The CCEnv software stack is available again on Niagara.&lt;br /&gt;
&lt;br /&gt;
'''Wed March 2, 7:50 AM EST:''' The CCEnv software stack on Niagara has issues; we are investigating.&lt;br /&gt;
 &lt;br /&gt;
'''Sat Feb 12 2022, 12:59 EST:''' Jupyterhub is back up, but may have hardware issue.&lt;br /&gt;
&lt;br /&gt;
'''Sat Feb 12 2022, 10:36 EST:''' Issue with the Jupyterhub, since last night.  We're investigating.&lt;br /&gt;
&lt;br /&gt;
'''Tue Feb 1 2022 19:20 EST:''' Maintenance finished successfully. Systems are up. &lt;br /&gt;
&lt;br /&gt;
'''Tue Feb 1 2022 13:00 EST:''' Maintenance downtime started.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jan 31 2022 13:15:00 EST:''' The SciNet datacentre's cooling system needs an '''emergency repair''' as soon as possible.  During this repair, all systems hosted at SciNet (Niagara, Mist, Rouge, HPSS, and Teach) will need to be switched off and will be unavailable to users. Repairs will start '''Tuesday February 1st, at 1:00 pm EST''', and could take until the end of the next day.  Please check here for updates.&lt;br /&gt;
&lt;br /&gt;
'''Sat Jan 29 2020 16:45:38 EST:''' Fibre repaired.&lt;br /&gt;
&lt;br /&gt;
'''Sat 29 Jan 2022 11:22:27 EST:''' Fibre repair is underway.  Expect to have connectivity restored later today.&lt;br /&gt;
&lt;br /&gt;
'''Fri 28 Jan 2022 07:35:01 EST:''' The fibre optics cable that connects the SciNet datacentre was severed by uncoordinated digging at York University.  We expect repairs to happen as soon as possible.&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 27 12:46 EST PM 2022:''' Network issues to and from the datacentre. We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Sun Jan 23 11:05 EST AM 2022:''' Filesystem issues appear to have resolved.&lt;br /&gt;
&lt;br /&gt;
'''Sun Jan 23 10:30 EST AM 2022:''' Filesystem issues -- investigating.&lt;br /&gt;
&lt;br /&gt;
'''Sat Jan 8 11:42 EST AM 2022:''' The emergency maintenance is complete. Systems are up and available.&lt;br /&gt;
&lt;br /&gt;
'''Fri Jan 7 14:34 EST PM 2022:''' The SciNet shutdown is in progress. Systems are expected back on Saturday, Jan 8.&lt;br /&gt;
&lt;br /&gt;
'''&amp;lt;span style=&amp;quot;color:red&amp;quot;&amp;gt;Emergency shutdown Friday January 7, 2022&amp;lt;/span&amp;gt;''': An emergency shutdown of all SciNet to replace a crucial file system component is planned to take place on Friday January 7, 2022, starting at 8am EST, and will require at least 12 hours of downtime.  Updates will be posted during the day.&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 6 08:20 EST AM 2022''' The SciNet filesystem is having issues.  We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Fri Dec 24 13:31 EST PM 2021''' Please note the following scheduled network maintenance, which will result in loss of connectivity to the SciNet datacentre:  Start time&lt;br /&gt;
Dec 29, 00:30 EST  Estimated duration  4 hours and 30 minutes. &lt;br /&gt;
&lt;br /&gt;
'''Mon Dec 20 4:29 EST PM 2021''' Filesystem is back to normal. &lt;br /&gt;
&lt;br /&gt;
'''Mon Dec 20 2:53 EST PM 2021''' Filesystem problem - We are investigating. &lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 23 12:30 EDT 2021 ''' Cooling restored.  Systems should be available later this afternoon.  &lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 23 9:30 EDT 2021 ''' Technicians on site working on cooling system. &lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 23 3:30 EDT 2021 ''' Cooling system issues still unresolved. &lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 22 23:27:48 EDT 2021 ''' Shutdown of the datacenter due to a problem with the cooling system.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 22 09:30 EDT 2021 ''': File system issues, resolved.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 22 07:30 EDT 2021 ''': File system issues, investigating.&lt;br /&gt;
&lt;br /&gt;
'''Sun Sep 19 10:00 EDT 2021''': Power glitch interrupted all compute jobs; please resubmit any jobs you had running.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 15 17:35 EDT 2021''': filesystem issues resolved&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 15 16:39 EDT 2021''': filesystem issues&lt;br /&gt;
&lt;br /&gt;
'''Mon Sep 13 13:15:07 EDT 2021''' HPSS is back online.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 10 17:57:23 EDT 2021''' HPSS is offline due to unscheduled maintenance.&lt;br /&gt;
&lt;br /&gt;
'''Wed Aug 18 16:13:42 EDT 2021''' The HPSS upgrade is complete.&lt;br /&gt;
&lt;br /&gt;
'''HPSS Downtime August 17th and 18th, 2021 (Tuesday and Wednesday):''' We'll be upgrading the HPSS software to version 8.3, along with all the clients (htar/hsi, vfs and Globus/dsi)&lt;br /&gt;
&lt;br /&gt;
'''July 24, 2021, 6:00 PM EDT:''' There appear to be file system issues, which may affect users' ability to login.  We are investigating.&lt;br /&gt;
&lt;br /&gt;
''' July 23th, 2021, 9:00 AM EDT:''' ''' Security update: ''' Due to a severe vulnerability in the Linux kernel (CVE-2021-33909), our team is currently patching and rebooting all login nodes and compute nodes, as well as the JupyterHub.  There should be no affect on running jobs, however sessions on login and datamover nodes will be disrupted. &lt;br /&gt;
&lt;br /&gt;
''' July 20th, 2021, 7:00 PM EDT:''' ''' SLURM configuration''' - Changed the default behaviour to kill a job step if any task exits with a non-zero exit code. If your code is able to handle failures gracefully, please add srun's option --no-kill to recover the previous default behaviour.&lt;br /&gt;
&lt;br /&gt;
''' July 20th, 2021, 7:00 PM EDT:''' Maintenance finished, systems are back online.   &lt;br /&gt;
&lt;br /&gt;
'''SciNet Downtime July 20th, 2021 (Tuesday):''' There will be a maintenance shutdown of the SciNet data center on Tuesday July 20th, starting at 7 am EDT. There will be no access to any of the SciNet systems (Niagara, Mist, HPSS, Teach cluster, or the file systems) during this time.  We expect to be able to bring the systems back online in the evening of July 20th.  The status of the Niagara cluster can be checked on status.computecanada.ca. For up-to-date and more detailed information on the status of all the SciNet systems, you can always check back here.&lt;br /&gt;
&lt;br /&gt;
'''June 28th, 2021, 4:06 PM:''' Mist OS upgrade is complete.&lt;br /&gt;
&lt;br /&gt;
'''May 27, 2021:''' Datamovers addresses have changed to improve high bandwidth connectivity and cybersecurity. The new addresses are 142.1.174.227 for nia-datamover1.scinet.utoronto.ca, and 142.1.174.228 for nia-datamover2.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
If you have jobs that need to connect to a software license server using an ssh tunnel through nia-gw (which actually resolves to datamover1 or datamover2), you may need to ask the system administrators of that license server to allow incoming connections from the new addresses above.&lt;br /&gt;
'''June 29th, 2021, 2:00 PM:''' Thunderstorm-related power fluctuations are causing some Niagara compute nodes and their jobs to crash.  Please resubmit if your jobs seem to have crashed for no apparent reason.&lt;br /&gt;
&lt;br /&gt;
'''June 28th, 2021, 4:06 PM:''' Mist OS upgrade is complete.&lt;br /&gt;
&lt;br /&gt;
'''June 28th, 2021, 9:00 AM:''' Mist is under maintenance. OS upgrading from RHEL 7 to 8.&lt;br /&gt;
&lt;br /&gt;
'''June 11th, 2021, 8:30 AM:''' Maintenance complete. Systems are up.&lt;br /&gt;
&lt;br /&gt;
'''June 9th to 10th, 2021:''' The SciNet datacentre will have a scheduled maintenance shutdown.  Niagara, Mist, Rouge, HPSS, login nodes, the file systems, and hosted systems will all be offline during the shutdown starting at 7AM EDT on Wednesday June 9th. We expect the systems to be back up in the morning of Friday June 11th.  Check here for updates.&lt;br /&gt;
&lt;br /&gt;
'''May 27, 2021:''' Datamovers addresses have changed to improve high bandwidth connectivity and cybersecurity. The new addresses are 142.1.174.227 for nia-datamover1.scinet.utoronto.ca, and 142.1.174.228 for nia-datamover2.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''May 27th, 20:00.''' All systems are up and running &lt;br /&gt;
&lt;br /&gt;
'''May 27th, 19:30.''' Most systems are up&lt;br /&gt;
&lt;br /&gt;
'''May 27th, 19:00:''' Cooling is back. Powering up systems&lt;br /&gt;
&lt;br /&gt;
'''May 27th, 2021, 11:30am:'''  The cooling tower issue has been identified as a wiring issue and is being repaired.  We don't have an ETA on when cooling will be restored, however we are hopeful it will be by the end of the day.  &lt;br /&gt;
&lt;br /&gt;
'''May 27th, 2021, 12:30am:''' Cooling tower motor is not working properly and may need to be replaced.  Its the primary motor and the cooling system can not run without it, so at least until tomorrow all equipment at the datacenter will remain unavailable.  Updates about expected repair times will be posted when they are known.&lt;br /&gt;
&lt;br /&gt;
'''May 26th, 2021, 9:20pm:''' we are currently experiencing cooling issues at the SciNet data centre.  Updates will be posted as we determine the cause of the problem.&lt;br /&gt;
&lt;br /&gt;
'''From Tue Mar 30 at 12 noon EST to Thu Apr 1 at 12 noon EST,''' there will be a two-day reservation for the &amp;quot;Niagara at Scale&amp;quot; pilot  event.  During these 48 hours, only &amp;quot;Niagara at Scale&amp;quot; projects will run on the compute notes (as well as SOSCIP projects, on a subset of nodes).  All other users can still login, access their data, and submit jobs throughout this event, but the jobs will not run until after the event.  The debugjob queue will remain available to  everyone as well.&lt;br /&gt;
&lt;br /&gt;
The scheduler will not start batch jobs that cannot finish before the start of this event. Users can submit small and short jobs can take advantage of this, as the scheduler may be able to fit these jobs in before the event starts on the otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
'''Tue 23 Mar 2021 12:19:07 PM EDT''' - Planned external network maintenance 12pm-1pm Tuesday, March 23rd. &lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 28 17:35:16 EST 2021:''' HPSS services are back online&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 28 12:36:21 EST 2021:''' HPSS services offline&lt;br /&gt;
&lt;br /&gt;
We need a small maintenance window as early as possible still this afternoon to perform a small change in configuration. Ongoing jobs will be allowed to finish, but we are keeping new submissions on hold on the queue.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jan 25 13:16:33 EST 2021:''' HPSS services are back online&lt;br /&gt;
&lt;br /&gt;
'''Sat Jan 23 10:03:33 EST 2021:''' HPSS services offline&lt;br /&gt;
&lt;br /&gt;
We detected some type of hardware failure on our HPSS equipment overnight, so access has been disabled pending further investigation.&lt;br /&gt;
&lt;br /&gt;
'''Fri Jan 22 10:49:29 EST 2021:''' The Globus transition to oauth is finished&lt;br /&gt;
&lt;br /&gt;
Please deactivate any previous sessions to the niagara endpoint (in the last 7 days), and activate/login again. &lt;br /&gt;
&lt;br /&gt;
For more details check https://docs.scinet.utoronto.ca/index.php/Globus#computecandada.23niagara&lt;br /&gt;
&lt;br /&gt;
'''Jan 21, 2021:''' Globus access disruption on Fri, Jan/22/2021 10AM: Please be advised that we will have a maintenance window starting tomorrow at 10AM to roll out the transition of services to oauth based authentication.&lt;br /&gt;
&lt;br /&gt;
'''Jan 15, 2021:'''Globus access update on Mon, Jan/18/2021 and Tue, Jan/19/2021:&lt;br /&gt;
Please be advised we start preparations on Monday to perform update to Globus access on Tuesday. We'll be adopting oauth instead of myproxy from that point on. During this period expect sporadic disruptions of service. On Monday we'll already block access to nia-dm2, so please refrain from starting new login sessions or ssh tunnels via nia-dm2 from this weekend already.&lt;br /&gt;
&lt;br /&gt;
''' December 11,2020, 12:00 AM EST: ''' Cooling issue resolved. Systems back.&lt;br /&gt;
&lt;br /&gt;
''' December 11,2020, 6:00 PM EST: ''' Cooling issue at datacenter. All systems down.&lt;br /&gt;
&lt;br /&gt;
''' December 7, 2020, 7:25 PM EST: '''All systems back; users can log in again.&lt;br /&gt;
&lt;br /&gt;
''' December 7, 2020, 6:46 PM EST: '''User connectivity to data center not yet ready, but queued jobs on Mist and Niagara have been started.&lt;br /&gt;
 &lt;br /&gt;
''' December 7, 2020, 7:00 AM EST: '''Maintenance shutdown in effect. This is a one-day maintenance shutdown.  There will be no access to Niagara, Mist, HPSS or teach, nor to their file systems during this time.  We expect to be able to bring the systems back online this evening.&lt;br /&gt;
&lt;br /&gt;
''' December 2, 2020, 9:10 PM EST: '''Power is back, systems are coming up. Please resubmit any jobs that failed because of this incident.&lt;br /&gt;
&lt;br /&gt;
''' December 2, 2020, 6:00 PM EST: '''Power glitch at the data center, caused about half of the compute nodes to go down.  Power issue not yet resolved.&lt;br /&gt;
&lt;br /&gt;
'''&amp;lt;span style=&amp;quot;color:#dd1111&amp;quot;&amp;gt;Announcing a Maintenance Shutdown on December 7th, 2020&amp;lt;/span&amp;gt;''' &amp;lt;br/&amp;gt;There will be a one-day maintenance shutdown on December 7th 2020, starting at 7 am EST.  There will be no access to Niagara, Mist, HPSS or teach, nor to their file systems during this time.  We expect to be able to bring the systems back online in the evening of the same day.&lt;br /&gt;
&lt;br /&gt;
''' November 6, 2020, 8:00 PM EST: ''' Systems are coming back online.&lt;br /&gt;
&lt;br /&gt;
''' November 6, 2020, 9:49 AM EST: ''' Repairs on the cooling system are underway.  No ETA, but the systems will likely be back some time today.&lt;br /&gt;
&lt;br /&gt;
''' November 6, 2020, 4:27 AM EST: '''Cooling system failure, datacentre is shut down.&lt;br /&gt;
&lt;br /&gt;
''' October 9, 2020, 12:57 PM: ''' A short power glitch caused many of the Niagara compute nodes to lose power; jobs running on them would have failed. Please check your jobs and resubmit.&lt;br /&gt;
&lt;br /&gt;
''' October 8, 2020, 9:50 PM: ''' Jupyterhub service is back up.&lt;br /&gt;
&lt;br /&gt;
''' October 8, 2020, 5:40 PM: ''' Jupyterhub service is down. We are investigating.&lt;br /&gt;
&lt;br /&gt;
''' September 28, 2020, 11:00 AM EST: ''' A short power glitch caused many of the Niagara compute nodes to lose power; jobs running on them would have failed. Please check your jobs and resubmit.&lt;br /&gt;
&lt;br /&gt;
''' September 1, 2020, 2:15 PM EST: ''' A short power glitch caused about half of the Niagara compute nodes to lose power; jobs running on them would have failed. Please check your jobs and resubmit.&lt;br /&gt;
&lt;br /&gt;
''' September 1, 2020, 9:27 AM EST: ''' The Niagara cluster has moved to a new default software stack, NiaEnv/2019b.  If your job scripts used the previous default software stack before (NiaEnv/2018a), please put the command &amp;quot;module load NiaEnv/2018a&amp;quot; before other module commands in those scripts, to ensure they will continue to work, or try the new stack (recommended).&lt;br /&gt;
''' August 24, 2020, 7:37 PM EST: ''' Connectivity is back to normal&lt;br /&gt;
&lt;br /&gt;
''' August 24, 2020, 6:35 PM EST: ''' We have partial connectivity back, but are still investigating.&lt;br /&gt;
&lt;br /&gt;
''' August 24, 2020, 3:15 PM EST: ''' There are issues connecting to the data centre. We're investigating.&lt;br /&gt;
&lt;br /&gt;
''' August 21, 2020, 6:00 PM EST: ''' The pump has been repaired, cooling is restored, systems are up.  &amp;lt;br/&amp;gt;Scratch purging is postponed until the evening of Friday Aug 28th, 2020.&lt;br /&gt;
&lt;br /&gt;
'''August 19, 2020, 4:40 PM EST:''' Update: The current estimate is to have the cooling restored on Friday and we hope to have the systems available for users on Saturday August 22, 2020.&lt;br /&gt;
&lt;br /&gt;
'''August 17, 2020, 4:00 PM EST:''' Unfortunately after taking the pump apart it was determined there was a more serious failure of the main drive shaft, not just the seal. As a new one will need to be sourced or fabricated we're estimating that it will take at least a few more days to get the part and repairs done to restore cooling. Sorry for the inconvenience. &lt;br /&gt;
&lt;br /&gt;
'''August 15, 2020, 1:00 PM EST:''' Due to parts availablity to repair the failed pump and cooling system it is unlikely that systems will be able to be restored until Monday afternoon at the earliest. &lt;br /&gt;
&lt;br /&gt;
'''August 15, 2020, 00:04 AM EST:'''  A primary pump seal in the cooling infrastructure has blown and parts availability will not be able be determined until tomorrow. All systems are shut down as there is no cooling.  If parts are available, systems may be back at the earliest late tomorrow. Check here for updates.  &lt;br /&gt;
&lt;br /&gt;
'''August 14, 2020, 21:04 AM EST:''' Tomorrow's /scratch purge has been postponed.&lt;br /&gt;
&lt;br /&gt;
'''August 14, 2020, 21:00 AM EST:''' Staff at the datacenter. Looks like one of the pumps has a seal that is leaking badly.&lt;br /&gt;
&lt;br /&gt;
'''August 14, 2020, 20:37 AM EST:''' We seem to be undergoing a thermal shutdown at the datacenter.&lt;br /&gt;
&lt;br /&gt;
'''August 14, 2020, 20:20 AM EST:''' Network problems to niagara/mist. We are investigating.&lt;br /&gt;
 &lt;br /&gt;
'''August 13, 2020, 10:40 AM EST:''' Network is fixed, scheduler and other services are back.&lt;br /&gt;
&lt;br /&gt;
'''August 13, 2020, 8:20 AM EST:''' We had an IB switch failure, which is affecting a subset of nodes, including the scheduler nodes.&lt;br /&gt;
&lt;br /&gt;
'''August 10, 2020, 7:30 PM EST:''' Scheduler fully operational again.&lt;br /&gt;
&lt;br /&gt;
'''August 10, 2020, 3:00 PM EST:''' Scheduler partially functional: jobs can be submitted and are running.&lt;br /&gt;
&lt;br /&gt;
'''August 10, 2020, 2:00 PM EST:''' Scheduler is temporarily inoperational.&lt;br /&gt;
&lt;br /&gt;
'''August 7, 2020, 9:15 PM EST:''' Network is fixed, scheduler and other services are coming back.&lt;br /&gt;
&lt;br /&gt;
'''August 7, 2020, 8:20 PM EST:''' Disruption of part of the network in the data centre.  Causes issue with the scheduler, the mist login node, and possibly others. We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''July 30, 2020, 9:00 AM''' Project backup in progress but incomplete: please be aware that after we deployed the new, larger storage appliance for scratch and project two months ago, we started a full backup of project (1.5PB). This backup is taking a while to complete, and there are still a few areas which have not been backed up fully. Please be careful to not delete things from project that you still need, in particular if they are recently added material.&lt;br /&gt;
&lt;br /&gt;
'''July 27, 2020, 5:00 PM:''' Scheduler issues resolved.&lt;br /&gt;
&lt;br /&gt;
'''July 27, 2020, 3:00 PM:''' Scheduler issues. We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''July 13, 4:40 PM:''' Most systems are available again. Only Mist is still being brought up.&lt;br /&gt;
&lt;br /&gt;
'''July 13, 10:00 AM:''' '''SciNet/Niagara Downtime In Progress'''&lt;br /&gt;
&lt;br /&gt;
'''SciNet/Niagara Downtime Announcement, July 13, 2020'''&amp;lt;br/&amp;gt;&lt;br /&gt;
All resources at SciNet will undergo a maintenance shutdown on Monday July 13, 2020, starting at 10:00 am EDT, for file system and scheduler upgrades.  There will be no access to any of the SciNet systems (Niagara, Mist, HPSS, Teach cluster, or the file systems) during this time.&lt;br /&gt;
We expect to be able to bring the systems back around 3 PM (EST) on the same day.&lt;br /&gt;
&lt;br /&gt;
''' June 29, 6:21:00  PM:''' Systems are available again.  &lt;br /&gt;
&lt;br /&gt;
''' June 29, 12:30:00  PM:''' Power Outage caused thermal shutdown.&lt;br /&gt;
&lt;br /&gt;
'''June 20, 2020, 10:24 PM:''' File systems are back up.  Unfortunately, all running jobs would have died and users are asked to resubmit them.&lt;br /&gt;
&lt;br /&gt;
'''June 20, 2020, 9:48 PM:''' An issue with the file systems is causing trouble.  We are investigating the cause.&lt;br /&gt;
&lt;br /&gt;
'''June 15, 2020, 10:30 PM:''' A '''power glitch''' caused some compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''June 12, 2020, 6:15 PM:''' Two '''power glitches''' during the night caused some compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''June 6, 2020, 6:06 AM:''' A '''power glitch''' caused some compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''May 24, 2020, 8:20 AM:''' A '''power glitch''' this morning caused all compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''May 7, 2020, 6:05 PM:''' Maintenance shutdown is finished.  Most systems are back in production.&lt;br /&gt;
&lt;br /&gt;
'''May 6, 2020, 7:08 AM:''' Two-day datacentre maintenance shutdown has started.&lt;br /&gt;
&lt;br /&gt;
''' SciNet/Niagara Downtime Announcement, May 6-7, 2020'''&lt;br /&gt;
&lt;br /&gt;
All resources at SciNet will undergo a two-day maintenance shutdown on May 6th and 7th 2020, starting at 7 am EDT on Wednesday May 6th.  There will be no access to any of the SciNet systems (Niagara, Mist, HPSS, Teach cluster, or the file systems) or systems hosted at the SciNet data centre.  We expect to be able to bring the systems back online the evening of May 7th.&lt;br /&gt;
&lt;br /&gt;
'''May 4, 2020, 7:51 AM:''' A power glitch this morning caused compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''May 3, 2020, 8:20 AM:''' A power glitch this morning caused all compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''April 28, 2020, 7:20 AM:''' A power glitch this morning caused all compute nodes to be rebooted: jobs running at the time have failed; users are asked to resubmit these jobs.&lt;br /&gt;
 &lt;br /&gt;
'''April 20, 2020: Security Incident at Cedar; implications for Niagara users'''&lt;br /&gt;
&lt;br /&gt;
Last week, it became evident that the Cedar GP cluster had been&lt;br /&gt;
comprimised for several weeks.  The passwords of at least two&lt;br /&gt;
Compute Canada users were known to the attackers. One of these was&lt;br /&gt;
used to escalate privileges on Cedar, as explained on&lt;br /&gt;
https://status.computecanada.ca/view_incident?incident=423.&lt;br /&gt;
&lt;br /&gt;
These accounts were used to login to Niagara as well, but Niagara&lt;br /&gt;
did not have the same security loophole as Cedar (which has been&lt;br /&gt;
fixed), and no further escalation was observed on Niagara.&lt;br /&gt;
&lt;br /&gt;
Reassuring as that may sound, it is not known how the passwords of&lt;br /&gt;
the two user accounts were obtained. Given this uncertainty, the&lt;br /&gt;
SciNet team *strongly* recommends that you change your password on&lt;br /&gt;
https://ccdb.computecanada.ca/security/change_password, and remove&lt;br /&gt;
any SSH keys and regenerate new ones (see&lt;br /&gt;
https://docs.scinet.utoronto.ca/index.php/SSH_keys).&lt;br /&gt;
&lt;br /&gt;
''' Tue 30 Mar 2020 14:55:14 EDT'''  Burst Buffer available again.&lt;br /&gt;
&lt;br /&gt;
''' Fri Mar 27 15:29:00 EDT 2020:''' SciNet systems are back up. Only the Burst Buffer remains offline, its maintenance is expected to be finished early next week.&lt;br /&gt;
&lt;br /&gt;
''' Thu Mar 26 23:05:00 EDT 2020:'''  Some aspects of the maintenance took longer than expected. The systems will not be back up until some time tomorrow, Friday March 27, 2020.  &lt;br /&gt;
&lt;br /&gt;
''' Wed Mar 25 7:00:00 EDT 2020:'''  SciNet/Niagara downtime started.&lt;br /&gt;
&lt;br /&gt;
''' Mon Mar 23 18:45:10 EDT 2020:'''  File system issues were resolved.&lt;br /&gt;
&lt;br /&gt;
''' Mon Mar 23 18:01:19 EDT 2020:''' There is currently an issue with the main Niagara filesystems. This effects all systems, all jobs have been killed. The issue is being investigated. &lt;br /&gt;
&lt;br /&gt;
''' Fri Mar 20 13:15:33 EDT 2020: ''' There was a power glitch at the datacentre at 8:50 AM, which resulted in jobs getting killed.  Please resubmit failed jobs. &lt;br /&gt;
&lt;br /&gt;
''' COVID-19 Impact on SciNet Operations, March 18, 2020'''&lt;br /&gt;
&lt;br /&gt;
Although the University of Toronto is closing of some of its&lt;br /&gt;
research operations on Friday March 20 at 5 pm EDT, this does not&lt;br /&gt;
affect the SciNet systems (such as Niagara, Mist, and HPSS), which&lt;br /&gt;
will remain operational.&lt;br /&gt;
&lt;br /&gt;
''' SciNet/Niagara Downtime Announcement, March 25-26, 2020'''&lt;br /&gt;
&lt;br /&gt;
All resources at SciNet will undergo a two-day maintenance shutdown on March 25th and 26th 2020, starting at 7 am EDT on Wednesday March 25th.  There will be no access to any of the SciNet systems (Niagara, Mist, HPSS, Teach cluster, or the file systems) during this time.&lt;br /&gt;
&lt;br /&gt;
This shutdown is necessary to finish the expansion of the Niagara cluster and its storage system.&lt;br /&gt;
&lt;br /&gt;
We expect to be able to bring the systems back online the evening of March 26th.&lt;br /&gt;
&lt;br /&gt;
''' March 9, 2020, 11:24 PM:''' HPSS services are temporarily suspended for emergency maintenance.&lt;br /&gt;
&lt;br /&gt;
''' March 7, 2020, 10:15 PM:''' File system issues have been cleared.&lt;br /&gt;
&lt;br /&gt;
''' March 6, 2020, 7:30 PM:''' File system issues; we are investigating&lt;br /&gt;
&lt;br /&gt;
''' March 2, 2020, 1:30 PM:''' For the extension of Niagara, the operating system on all Niagara nodes has been upgraded&lt;br /&gt;
from CentOS 7.4 to 7.6.  This required all&lt;br /&gt;
nodes to be rebooted. Running compute jobs are allowed to finish&lt;br /&gt;
before the compute node gets rebooted. Login nodes have all been rebooted, as have the datamover nodes and the jupyterhub service.&lt;br /&gt;
&lt;br /&gt;
''' Feb 24, 2020, 1:30PM: ''' The [[Mist]] login node got rebooted.  It is back, but we are still monitoring the situation.&lt;br /&gt;
&lt;br /&gt;
''' Feb 12, 2020, 11:00AM: ''' The [[Mist]] GPU cluster now available to users.&lt;br /&gt;
&lt;br /&gt;
''' Feb 11, 2020, 2:00PM: ''' The Niagara compute nodes were accidentally rebooted, killing all running jobs.&lt;br /&gt;
&lt;br /&gt;
''' Feb 10, 2020, 19:00PM: ''' HPSS is back to normal.&lt;br /&gt;
&lt;br /&gt;
''' Jan 30, 2020, 12:01PM: ''' We are having an issue with HPSS, in which the disk-cache is full. We put a reservation on the whole system (Globus, plus archive and vfs queues), until it has had a chance to clear some space on the cache.&lt;br /&gt;
&lt;br /&gt;
''' Jan 21, 2020, 4:05PM: '''   The was a partial power outage the took down a large amount of the compute nodes.  If your job died during this period please resubmit.  &lt;br /&gt;
&lt;br /&gt;
'''Jan 13, 2020, 7:35 PM:''' Maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Jan 13, 2020, 8:20 AM:''' The announced maintenance downtime started (see below).&lt;br /&gt;
&lt;br /&gt;
'''Jan 9 2020, 11:30 AM:''' External ssh connectivity restored, issue related to the university network.&lt;br /&gt;
&lt;br /&gt;
'''Jan 9 2020, 9:24 AM:''' We received reports of users having trouble connecting into the SciNet data centre; we're investigating.  Systems are up and running and jobs are fine.&amp;lt;p&amp;gt;&lt;br /&gt;
As a work around, in the meantime, it appears to be possible to log into graham, cedar or beluga, and then ssh to niagara.&amp;lt;/p&amp;gt;&lt;br /&gt;
&lt;br /&gt;
'''Downtime announcement:'''&lt;br /&gt;
To prepare for the upcoming expansion of Niagara, there will be a&lt;br /&gt;
one-day maintenance shutdown on '''January 13th 2020, starting at 8 am&lt;br /&gt;
EST'''.  There will be no access to Niagara, Mist, HPSS or teach, nor&lt;br /&gt;
to their file systems during this time.&lt;br /&gt;
&lt;br /&gt;
2019&lt;br /&gt;
&lt;br /&gt;
'''December 13, 9:00 AM EST:''' Issues resolved.&lt;br /&gt;
&lt;br /&gt;
'''December 13, 8:20 AM EST:''' Overnight issue is now preventing logins to Niagara and other services. Possibly a file system issue, we are investigating.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;p&amp;gt; '''Fri, Nov 15 2019, 11:00 PM (EST)'''  Niagara and most of the main systems are now available. &lt;br /&gt;
&amp;lt;/p&amp;gt;&amp;lt;p&amp;gt; '''Fri, Nov 15 2019, 7:50 PM (EST)'''  SOSCIP GPU cluster is up and accessible.  Work on the other systems continues.&lt;br /&gt;
&amp;lt;/p&amp;gt;&amp;lt;p&amp;gt; '''Fri, Nov 15 2019, 5:00 PM (EST)'''  Infrastructure maintenance done, upgrades still in process.&lt;br /&gt;
&amp;lt;/p&amp;gt;&amp;lt;p&amp;gt;&lt;br /&gt;
'''Fri, Nov 15 2019, 7:00 AM (EST)'''  Maintenance shutdown of the SciNet data centre has started.  Note: scratch purging has been postponed until Nov 17.&amp;lt;br/&amp;gt; &lt;br /&gt;
&amp;lt;/p&amp;gt;&lt;br /&gt;
&amp;lt;p&amp;gt;&lt;br /&gt;
'''Announcement:''' &lt;br /&gt;
The SciNet datacentre will undergo a maintenance shutdown on&lt;br /&gt;
Friday November 15th 2019, from 7 am to 11 pm (EST), with no access&lt;br /&gt;
to any of the SciNet systems (Niagara, P8, SGC, HPSS, Teach cluster,&lt;br /&gt;
or the filesystems) during that time. &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Sat, Nov 2 2019, 1:30 PM (update):'''  Chiller has been fixed, all systems are operational.    &lt;br /&gt;
&amp;lt;/p&amp;gt;&lt;br /&gt;
'''Fri, Nov 1 2019, 4:30 PM (update):'''  We are operating in free cooling so have brought up about 1/2 of the Niagara compute nodes to reduce the cooling load.  Access, storage, and other systems should now be available.   &lt;br /&gt;
&lt;br /&gt;
'''Fri, Nov 1 2019, 12:05 PM (update):''' A power module in the chiller has failed and needs to be replaced.   We should be able to operate in free cooling if the temperature stays cold enough, but we may not be able to run all systems. No eta yet on when users will be able to log back in. &lt;br /&gt;
&lt;br /&gt;
'''Fri, Nov 1 2019, 9:15 AM (update):''' There was a automated shutdown because of rising temperatures, causing all systems to go down. We are investigating, check here for updates.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;p&amp;gt;'''Fri, Nov 1 2019, 8:16 AM:''' Unexpected data centre issue: Check here for updates.&lt;br /&gt;
&amp;lt;/p&amp;gt;&lt;br /&gt;
&lt;br /&gt;
''' Thu 1 Aug 2019 5:00:00 PM ''' Systems are up and operational.   &lt;br /&gt;
&lt;br /&gt;
'''Thu 1 Aug 2019 7:00:00 AM: ''' Scheduled Downtime Maintenance of the SciNet Datacenter.  All systems will be down and unavailable starting 7am until the evening. &lt;br /&gt;
&lt;br /&gt;
'''Fri 26 Jul 2019, 16:02:26 EDT:''' There was an issue with the Burst Buffer at around 3PM, and it was recently solved. BB is OK again.&lt;br /&gt;
&lt;br /&gt;
''' Sun 30 Jun 2019 ''' The '''SOSCIP BGQ''' and '''P7''' systems were decommissioned on '''June 30th, 2019'''.  The BGQdev front end node and storage are still available.  &lt;br /&gt;
&lt;br /&gt;
'''Wed 19 Jun 2018, 1:20:00 PM:''' The BGQ is back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed 19 Jun 2018, 10:00:00 AM:''' The BGQ is still down, the SOSCIP GPU nodes should be back up. &lt;br /&gt;
&lt;br /&gt;
'''Wed 19 Jun 2018, 1:40:00 AM:''' There was an issue with the SOSCIP BGQ and GPU Cluster last night about 1:42am, probably a power fluctuation that took it down.  &lt;br /&gt;
&lt;br /&gt;
'''Wed 12 Jun 2019, 3:30 AM - 7:40 AM''' Intermittent system issues on Niagara's project and scratch as the file number limit was reached. We increased the number of files allowed in total on the file system. &lt;br /&gt;
&lt;br /&gt;
'''Thu 30 May 2019, 11:00:00 PM:'''&lt;br /&gt;
The maintenance downtime of SciNet's data center has finished, and systems are being brought online now.  You can check the progress here. Some systems might not be available until Friday morning.&amp;lt;br/&amp;gt;&lt;br /&gt;
Some action on the part of users will be required when they first connect again to a Niagara login nodes or datamovers.  This is due to the security upgrade of the Niagara cluster, which is now in line with currently accepted best practices.&amp;lt;br/&amp;gt;&lt;br /&gt;
The details of the required actions can be found on the [[SSH Changes in May 2019]] wiki page.&lt;br /&gt;
&lt;br /&gt;
'''Wed 29-30 May 2019''' The SciNet datacentre will undergo a two-day maintenance shutdown, starting at 7 am EDT on Wednesday May 29th.  There will be no access to any of the SciNet systems (Niagara, P7, P8, BGQ, SGC, HPSS, Teach cluster, or the file systems) during this time.&lt;br /&gt;
&lt;br /&gt;
'''SCHEDULED SHUTDOWN''': &lt;br /&gt;
&lt;br /&gt;
Please be advised that on '''Wednesday May 29th through Thursday May 30th''', the SciNet datacentre will undergo a two-day maintenance shutdown, starting at 7 am EDT on Wednesday May 29th.  There will be no access to any of the SciNet systems (Niagara, P7, P8, BGQ, SGC, HPSS, Teach cluster, or the file systems) during this time.&lt;br /&gt;
&lt;br /&gt;
This is necessary to finish the installation of an emergency power generator, to perform the annual cooling tower maintenance, and to enhance login security.&lt;br /&gt;
&lt;br /&gt;
We expect to be able to bring the systems back online the evening of May 30th.  Due to the enhanced login security, the ssh applications of users will need to update their known host list. More detailed information on this procedure will be sent shortly before the systems are back online.&lt;br /&gt;
&lt;br /&gt;
'''Fri 5 Apr 2019:''' Software updates on Niagara: The default CCEnv software stack now uses avx512 on Niagara, and there is now a NiaEnv/2019b stack (&amp;quot;epoch&amp;quot;). &lt;br /&gt;
&lt;br /&gt;
'''Thu 4 Apr 2019:''' The 2019 compute and storage allocations have taken effect on Niagara.&lt;br /&gt;
&lt;br /&gt;
'''NOTE''':  There is scheduled network maintenance for '''Friday April 26th 12am-8am''' on the Scinet datacenter external network connection.   This will not affect internal connections and running jobs however remote connections may see interruptions during this period.&lt;br /&gt;
&lt;br /&gt;
'''Wed 24 Apr 2019 14:14 EDT:''' HPSS is back on service. Library and robot arm maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Wed 24 Apr 2019 08:35 EDT:''' HPSS out of service this morning for library and robot arm maintenance.&lt;br /&gt;
&lt;br /&gt;
'''Fri 19 Apr 2019 17:40 EDT:''' HPSS robot arm has been released and is back to normal operations.&lt;br /&gt;
&lt;br /&gt;
'''Fri 19 Apr 2019 14:00 EDT:''' problems with HPPS library robot have been detected.&lt;br /&gt;
&lt;br /&gt;
'''Wed 17 Apr 2019 15:35 EDT:''' Network connection is back.&lt;br /&gt;
&lt;br /&gt;
'''Wed 17 Apr 2019 15:12 EDT:''' Network connection down.  Investigating.&lt;br /&gt;
&lt;br /&gt;
'''Tue 9 Apr 2019 22:24:14 EDT:'''  Network connection restored.&lt;br /&gt;
&lt;br /&gt;
'''Tue 9 Apr 2019, 15:20:''' Network connection down.  Investigating.&lt;br /&gt;
&lt;br /&gt;
'''Fri 5 Apr 2019:''' Planned, short outage in connectivity to the SciNet datacentre from 7:30 am to 8:55 am EST for maintenance of the network.  This outage will not affect running or queued jobs. It may be necessary to reboot the login nodes at some point tomorrow, which could result in a short interruption of connectivity, but which will have no effect on running or queued jobs.&lt;br /&gt;
&lt;br /&gt;
'''April 4, 2019:'''  The 2019 compute and storage allocations will take effect on Niagara. Running jobs will not be affected by this change and will run their course.  Queued jobs' priorities will be updated to reflect the new fairshare values later in the day.  The queue should fully reflect the new fairshare values in about 24 hours.   &lt;br /&gt;
&lt;br /&gt;
It may be necessary to reboot the login nodes at some point tomorrow, which could result in a short interruption of connectivity, but which will have no effect on running or queued jobs.&lt;br /&gt;
&lt;br /&gt;
There will be updates to the software stack on this day as well.&lt;br /&gt;
&lt;br /&gt;
'''March 25, 3:05 PM EST:'''  Most systems back online, other services should be back shortly. &lt;br /&gt;
&lt;br /&gt;
'''March 25, 12:05 PM EST:''' Power is back at the datacentre, but it is not yet known when all systems will be back up.  Keep checking here for updates.&lt;br /&gt;
&lt;br /&gt;
'''March 25, 11:27 AM EST:''' A power outage in the datacentre occured and caused all services to go down.  Check here for updates.&lt;br /&gt;
&lt;br /&gt;
'''Thu Mar 21 10:37:28 EDT 2019:''' HPSS is back in service&lt;br /&gt;
&lt;br /&gt;
HPSS out of service on '''Tue, Mar/19 at 9AM''', for tape library expansion and relocation. It's possible the downtime will extend to Wed, Mar/20.&lt;br /&gt;
&lt;br /&gt;
'''January 21, 4:00 PM''': HPSS is back in service. Thank you for your patience.&lt;br /&gt;
&lt;br /&gt;
'''January 18, 5:00 PM''': We did practically all of the HPSS upgrades (software/hardware), however the main client node - archive02 - is presenting an issue we just couldn't resolve yet. We will try to resume work over the weekend with cool heads, or on Monday. Sorry, but this is an unforeseen delay. Jobs on the queue we'll remain there, and we'll delay the scratch purging by 1 week.&lt;br /&gt;
&lt;br /&gt;
'''January 16, 11:00 PM''': HPSS is being upgraded, as announced.&lt;br /&gt;
&lt;br /&gt;
'''January 16, 8:00 PM''': System are coming back up and should be accessible for users now.&lt;br /&gt;
&lt;br /&gt;
'''January 15, 8:00 AM''': Data centre downtime in effect.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;font color=red&amp;gt;&amp;lt;b&amp;gt;Downtime Announcement for January 15 and 16, 2019&amp;lt;/b&amp;gt;&amp;lt;/font&amp;gt;&amp;lt;br&amp;gt;&lt;br /&gt;
The SciNet datacentre will need to undergo a two-day maintenance shutdown in order to perform electrical work, repairs and maintenance.  The electrical work is in preparation for the upcoming installation of an emergency power generator and a larger UPS, which will result in increased resilience to power glitches and outages.  The shutdown is scheduled to start on '''Tuesday January 15, 2019, at 7 am''' and will last until '''Wednesday 16, 2019''', some time in the evening. There will be no access to any of the SciNet systems (Niagara, P7, P8, BGQ, SGC, HPSS, Teach cluster, or the filesystems) during this time.&lt;br /&gt;
Check back here for up-to-date information on the status of the systems.&lt;br /&gt;
&lt;br /&gt;
Note: this downtime was originally scheduled for Dec. 18, 2018, but has been postponed and combined with the annual maintenance downtime.&lt;br /&gt;
&lt;br /&gt;
'''December 24, 2018, 11:35 AM EST:''' Most systems are operational again. If you had compute jobs running yesterday at around 3:30PM, they likely crashed - please check them and resubmit if needed.&lt;br /&gt;
&lt;br /&gt;
'''December 24, 2018, 10:40 AM EST:''' Repairs have been made, and the file systems are starting to be mounted on the cluster. &lt;br /&gt;
&lt;br /&gt;
'''December 23, 2018, 3:38 PM EST:''' Issues with the file systems (home, scratch and project). We are investigating, it looks like a hardware issue that we are trying to work around. Note that the absence of /home means you cannot log in with ssh keys. All compute jobs crashed around 3:30 PM EST on Dec 23. Once the system is properly up again, please resubmit your jobs.  Unfortunately, at this time of year, it is not possible to give an estimate on when the system will be operational again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 22 14:20:00 EDT 2018''': &amp;lt;font color=green&amp;gt;HPSS back in service&amp;lt;/font&amp;gt;&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 22 08:55:00 EDT 2018''': &amp;lt;font color=red&amp;gt;HPSS offline for scheduled maintenance&amp;lt;/font&amp;gt;&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 20 16:30:00 EDT 2018''':  HPSS offline on Thursday 9AM for installation of new LTO8 drives in the tape library.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct  9 12:16:00 EDT 2018''':  BGQ compute nodes are up.  &lt;br /&gt;
&lt;br /&gt;
'''Sun Oct  7 20:24:26 EDT 2018''':  SGC and BGQ front end are available,  BGQ compute nodes down related to a cooling issue.  &lt;br /&gt;
&lt;br /&gt;
'''Sat Oct  6 23:16:44 EDT 2018''':  There were some problems bringing up SGC &amp;amp; BGQ, they will remain offline for now.&lt;br /&gt;
&lt;br /&gt;
'''Sat Oct  6 18:36:35 EDT 2018''':  Electrical work finished, power restored. Systems are coming online.&lt;br /&gt;
&lt;br /&gt;
'''July 18, 2018:''' login.scinet.utoronto.ca is now disabled, GPC $SCRATCH and $HOME are decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''July 12, 2018:''' There was a short power interruption around 10:30 am which caused most of the systems (Niagara, SGC, BGQ) to reboot and any running jobs to fail. &lt;br /&gt;
&lt;br /&gt;
'''July 11, 2018:''' P7's moved to BGQ filesystem, P8's moved to Niagara filesystem.&lt;br /&gt;
&lt;br /&gt;
'''May 24, 2018, 9:25 PM EST:''' The data center is up, and all systems are operational again.&lt;br /&gt;
&lt;br /&gt;
'''May 24, 2018, 7:00 AM EST:''' The data centre is under annual maintenance. All systems are offline. Systems are expected to be back late afternoon today; check for updates on this page.&lt;br /&gt;
&lt;br /&gt;
'''May 18, 2018:''' Announcement: Annual scheduled maintenance downtime: Thursday May 24, starting 7:00 AM&lt;br /&gt;
&lt;br /&gt;
'''May 16, 2018:''' Cooling  restored, systems online&lt;br /&gt;
&lt;br /&gt;
'''May 16, 2018:''' Cooling issue at datacentre again, all systems down&lt;br /&gt;
&lt;br /&gt;
'''May 15, 2018:''' Cooling restored, systems coming online&lt;br /&gt;
&lt;br /&gt;
'''May 15, 2018''' Cooling issue at datacentre, all systems down&lt;br /&gt;
&lt;br /&gt;
'''May 4, 2018:''' [[HPSS]] is now operational on Niagara.&lt;br /&gt;
&lt;br /&gt;
'''May 3, 2018:''' [[Burst Buffer]] is available upon request.&lt;br /&gt;
&lt;br /&gt;
'''May 3, 2018:''' The [https://docs.computecanada.ca/wiki/Globus Globus] endpoint for Niagara is available: computecanada#niagara.&lt;br /&gt;
&lt;br /&gt;
'''May 1, 2018:''' System status moved he here.&lt;br /&gt;
&lt;br /&gt;
'''Apr 23, 2018:''' GPC-compute is decommissioned, GPC-storage available until 30 May 2018.&lt;br /&gt;
&lt;br /&gt;
'''April 10, 2018:''' Niagara commissioned.&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6539</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6539"/>
		<updated>2025-04-08T21:05:35Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|{{Down   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''April 8, 2025:''' HPSS is being reserved for OS updates on April 9 (Wednesday).&lt;br /&gt;
&lt;br /&gt;
'''April 1, 2025:''' The Jupyter Hub has been replaced by SciNet's [[Open OnDemand Quickstart|Open OnDemand service]].&lt;br /&gt;
&lt;br /&gt;
'''March 1, 2025:''' As of March 1st scratch purging is suspended until after Trillium comes online.&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there is now a permanent reduction in computing capacity of Niagara to 50% and of Mist to 35%.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6536</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6536"/>
		<updated>2025-04-08T21:04:22Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|{{Down   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''April 8, 2025 5PM:''' HPSS is being reserved for OS updates on April 9 (Wednesday).&lt;br /&gt;
&lt;br /&gt;
'''March 1, 2025 9PM:''' As of March 1st scratch purging is suspended until after Trillium comes online.&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there is now a permanent reduction in computing capacity of Niagara to 50% and of Mist to 35%.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6533</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6533"/>
		<updated>2025-04-08T21:03:27Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|{{Down   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''April 8, 2025 5PM:''' HPSS is being reserved for OS updates on April 9 (Wednesday).&lt;br /&gt;
&lt;br /&gt;
'''March 01, 2025 9:00 pm EST:''' As of March 1st scratch purging is suspended until after Trillium comes online.&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there is now a permanent reduction in computing capacity of Niagara to 50% and of Mist to 35%.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Previous_messages&amp;diff=6530</id>
		<title>Previous messages</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Previous_messages&amp;diff=6530"/>
		<updated>2025-04-08T21:03:15Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;'''April 1, 2025:''' The Jupyter Hub has been replaced by SciNet's [[Open OnDemand Quickstart|Open OnDemand service]].&lt;br /&gt;
&lt;br /&gt;
'''March 31, 2025 3:20 pm EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''March 31, 2025 2:45 pm EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''March 28, 2025 3:00 pm - 4:00 pm EDT:''' A short maintenance was needed for the Teach compute nodes; you might have experienced some job scheduling delays on that cluster. &lt;br /&gt;
&lt;br /&gt;
'''March 20, 2025 10:30 am EDT:''' Teach compute nodes are back. &lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 11:00 pm EDT:''' Teach compute nodes are down again. &lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 5:15pm EDT:''' Maintenance of the cooling system was performed successfully. The cluster is back on line&lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 8:00 am - 5:00 pm EDT:''' Maintenance of the cooling system as well as preparations for the Trillium cluster will require a shutdown of the compute nodes of all SciNet systems (Niagara, Mist, Rouge, Balam, Teach, as well as hosted equipment). The login nodes, file systems and the HPSS system will remain available. The scheduler will hold jobs that are submitted until the maintenance has finished.&lt;br /&gt;
&lt;br /&gt;
'''March 18, 2025 10:00 am EDT:''' Teach compute nodes are back.&lt;br /&gt;
&lt;br /&gt;
'''March 17, 2025 10:00 pm EDT:''' Teach compute nodes are down. We are working on it. &lt;br /&gt;
&lt;br /&gt;
'''February 27, 2025 9:00 pm EST:''' Access to HPSS via Globus has been restored.&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 2:30 pm EST:''' Access to HPSS via Globus is currently suspended (sorry, trivial upgrade has gone wrong).&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 12:30 pm EST:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 11:50 am EST:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''February 7, 2025 2:45 pm EST:''' Systems are back online.&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb  7 01:04:33 EST 2025:''' There has been a problem with the water chiller. Automatic thermal shutdown of the compute nodes&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 11:45 am EST:''' Power is back.&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 6:00 am EST:''' Power outage in the data center. Many compute jobs will have stopped. Until power gets restored, parts of the systems are  running on the generator. No ETA on full power restoration.&lt;br /&gt;
 &lt;br /&gt;
'''January 28, 2025 9:30 pm EST:''' The CCEnv stack has been restored.&lt;br /&gt;
&lt;br /&gt;
'''January 28, 2025 5:00 pm EST:''' The CCEnv stack from cvmfs has issues and may not work reliable.&lt;br /&gt;
&lt;br /&gt;
'''January 23, 2025 9:00 am - 1:00 pm EST:''' Balam, Rouge and Neptune compute nodes will be shut down from 9 AM to 1 PM EST for additional electrical work.&lt;br /&gt;
&lt;br /&gt;
'''January 22, 2025 12:55 pm EST:''' Compute nodes are back online&lt;br /&gt;
&lt;br /&gt;
'''January 22, 2025 8:00 am - 5:00 pm EST:''' Preparations for the new system Trillium will require a shutdown of the compute nodes of all SciNet systems (Niagara, Mist, Rouge, Teach, as well as hosted equipment) from 8 AM to 5 PM EST. The login nodes, file systems and the HPSS system will remain available. The scheduler will hold jobs that are submitted until the maintenance has finished.&lt;br /&gt;
&lt;br /&gt;
'''January 9, 2025 11:00 am EST:''' Systems are back online&lt;br /&gt;
&lt;br /&gt;
'''January 8, 2025 10:34 pm EST:''' We had some sort of thermal event at the datacenter, and the clusters are down. We're still investigating&lt;br /&gt;
&lt;br /&gt;
'''January 8, 2025 08:00 am EST:''' Balam, Rouge and Neptune are shutdown for electrical upgrades&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there will be a (permanent) reduction in computing capacity of Niagara and Mist. Only 50% of Niagara and 35% of Mist will remain active after January 6th.  The reduction will require Mist to be shutdown for a few hours on January 6th. Balam, Rouge and Neptune will be shutdown on Wednesday January 8th for the same reason.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''December 20, 2024 09:00 am EST:''' OpenOnDemand service will not be available on Dec 20 from 9 a.m. to 5 p.m. due to scheduled maintenance.&lt;br /&gt;
&lt;br /&gt;
'''December 16, 2024, 08:21 am EST:''' The Niagara scheduler has been restarted.&lt;br /&gt;
  &lt;br /&gt;
'''December 16, 2024, 00:04 am EST:''' The Niagara scheduler has an issue; we are investigating.&lt;br /&gt;
  &lt;br /&gt;
'''Fri Nov 8, 2024, 09:45 AM EST.''' Balam and Rouge schedulers are back online.&lt;br /&gt;
&lt;br /&gt;
'''Thu Nov 7, 2024, 10:30 PM EST.''' Most systems are up, except for the schedulers on Balam and Rouge (but even their login nodes are up), and a few 'neptune' niagara nodes.&lt;br /&gt;
&lt;br /&gt;
'''Thu Nov 7, 2024, 5:30 PM EST:''' Systems are being brought up, but not yet available for users.&lt;br /&gt;
&lt;br /&gt;
'''Downtime Announcement: On Thu Nov 7, 2024, all systems and storage located at the SciNet Datacenter (Niagara, Mist, HPSS, Rouge, Teach, JupyterHub, Balam) will be unavailable from 7 a.m. to 5 p.m. ET.&lt;br /&gt;
This outage is required to install new electrical equipment (UPS) for the upcoming systems refresh. The work is expected to be completed in one day.&lt;br /&gt;
The scheduler will hold jobs that cannot finish before the start of the shutdown. Users are encouraged to submit small and short jobs that can take advantage of this, as the scheduler may be able to fit these jobs in before the maintenance on otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 24 15:05 EDT 2024''': Cooling pump motor has been replaced. All systems are back to normal.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 22 16:35 EDT 2024''': The motor is scheduled for replacement on Thursday, Oct 24.&lt;br /&gt;
&lt;br /&gt;
'''Mon Oct 21 17:15 EDT 2024''': Compute nodes will remain down until we can replace the main cooling pump.  This may take several days.  Please see this page for updates.&lt;br /&gt;
&lt;br /&gt;
'''Mon Oct 21 12:15 EDT 2024''': Compute nodes have been shutdown due to a cooling system failure.&lt;br /&gt;
&lt;br /&gt;
'''Fri Oct 18 21:40 EDT 2024''': Systems are back to normal&lt;br /&gt;
&lt;br /&gt;
'''Fri Oct 18 21:15 EDT 2024''': We are experiences technical difficulties, apparently caused by a glitch in the file systems&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 1 10:45 EDT 2024''': The Jupyter Hub service will be rebooted today at around 11:00 am EDT for system upgrades. &lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 3 07:00 EDT 2024''': Intermittent file system issues which may cause issues logging in.  We are in the process of resolving the issue.&lt;br /&gt;
&lt;br /&gt;
'''Sun Sep 1 00:01 - 04:00 EDT 2024''': Network maintenance may cause connection issues to the datacentre.&lt;br /&gt;
&lt;br /&gt;
'''Thu Aug 22 13:30:00 EDT 2024''': Chiller issue caused about 25% of Niagara compute nodes to go down; users should resubmit any affected jobs.&lt;br /&gt;
&lt;br /&gt;
'''Wed Aug 21 16:35:00 EDT 2024''': Maintenance finished; compute nodes are now available for user jobs.&lt;br /&gt;
&lt;br /&gt;
'''Wed Aug 21 7:00:00 EDT 2024''': Maintenance started.&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 19:15:00 EDT 2024''': Issues have been resolved.&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 14:30:00 EDT 2024''': Power issues seem to have brought compute nodes down, and compounded to the file system issues we had earlier.&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 10:31:53 EDT 2024''': GPFS is back online, and seems to be holding&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 08:44:40 EDT 2024''': Sorry, problems with GPFS file systems are reoccurring. &lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 07:59:02 EDT 2024''': GPFS file systems are back to normal. Many jobs have died and will need to be resubmitted.&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 06:39:12 EDT 2024''': Support staff detected the problem and started to work on the fix&lt;br /&gt;
&lt;br /&gt;
'''Sun Aug 18 00:53:52 EDT 2024''': GPFS file systems (home, scratch, project) started to show initial stages of problems&lt;br /&gt;
&lt;br /&gt;
'''August 21, 2024''': The annual cooling tower maintenance for the SciNet data centre will take place on August 21, 2024 from 7 a.m. EDT until the end of day. This maintenance requires a shutdown of the compute nodes of all SciNet systems (Niagara, Mist, Rouge, Teach, as well as hosted equipment). The login nodes, file systems and the HPSS system will remain available.&lt;br /&gt;
&lt;br /&gt;
The scheduler will hold jobs that cannot finish before the start of the shutdown. Users are encouraged to submit small and short jobs that can take advantage of this, as the scheduler may be able to fit these jobs in before the maintenance on otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
'''Thursday, August 1, 10:00 PM EDT''' Filesystem problems resolved.&lt;br /&gt;
&lt;br /&gt;
'''Thursday, August 1, 9:30 PM EDT''' Filesystem problems preventing logins to the systems.  Working on it.&lt;br /&gt;
&lt;br /&gt;
'''Monday, July 22, 11:50 AM EDT''' Systems are back to normal&lt;br /&gt;
&lt;br /&gt;
'''Monday, July 22, 10:50 AM EDT''' Cooling problem has been fixed. Systems are coming up&lt;br /&gt;
&lt;br /&gt;
'''Monday, July 22, 10:20 AM EDT''' Compute nodes have been shutdown due to a cooling tower failure.&lt;br /&gt;
&lt;br /&gt;
'''Friday, July 19, 9:30 AM EDT''' CCEnv modules available on all login nodes again.&lt;br /&gt;
&lt;br /&gt;
'''Friday, July 19, 5:00 AM EDT''' Some login nodes do not have the CCEnv modules available.  We are working on a fix.&lt;br /&gt;
&lt;br /&gt;
'''Monday, Jun 3, 12:55 PM EDT''' All systems are recovered now.&lt;br /&gt;
&lt;br /&gt;
'''Monday, Jun 3, 10:50 AM EDT''' The file system issues affect all nodes, so all systems are inaccessible to users at the moment. No time estimate yet for when the systems may be back.&lt;br /&gt;
&lt;br /&gt;
'''Monday, Jun 3, 7:58 AM EDT''' Login issues for Niagara and Mist. There are file system issues as well. Investigating.&lt;br /&gt;
&lt;br /&gt;
'''Sunday, Jun 2, 12:00 PM EDT''' CCEnv modules missing, investigating.&lt;br /&gt;
&lt;br /&gt;
'''Wednesday May 29, 5:50 PM EDT''' Niagara compute nodes are up.  &lt;br /&gt;
&lt;br /&gt;
'''Wednesday May 29, 4:40 PM EDT''' Niagara compute nodes are coming up.  &lt;br /&gt;
&lt;br /&gt;
'''Wednesday May 29, 4 PM EDT''' Niagara login nodes and jupyterhub are up; file system is now accessible.  &lt;br /&gt;
&lt;br /&gt;
'''Wednesday May 29, 2 PM EDT''' Electricians are checking and testing all junction boxes and connectors under the raised floor for safety.  Some systems are expected to be back up later today (storage, login nodes), and compute systems will be powered up as soon as it is deemed safe.&lt;br /&gt;
&lt;br /&gt;
'''Tuesday May 28, 3 PM EDT''' Cleaning crews are at the datacentre, to pump the water and install dryers.  Once the floors are dry, we need to inspect all electrical boxes to ensure safety.  We do not expect to have a fully functional datacentre before Thursday, although we hope to be able to turn on the storage and login nodes sometime tomorrow, if circumstances permit.  Apologies, and thank you for your patience.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Tuesday May 28, 7 AM EDT''' A water mains break outside our datacentre has caused extensive flooding, and all systems have been shut down preventatively. &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Friday May 17, 10 PM EDT - Saturday May 18, 2 AM EDT:''' The external network will be unavailable for maintenance. Running and queued jobs on the systems will not be affected.&lt;br /&gt;
&lt;br /&gt;
'''Tuesday May 14, 6:45 PM EDT:''' All systems are recovered now.&lt;br /&gt;
&lt;br /&gt;
'''Tuesday May 14, 5 PM EDT:''' Power loss at the datacentre resulted in loss of cooling.  Systems are being restored.&lt;br /&gt;
&lt;br /&gt;
'''Friday May 3, 10 PM EDT - Saturday May 4, 2 AM EDT:''' The external network will be unavailable for maintenance. Running and queued jobs on the systems will not be affected.&lt;br /&gt;
&lt;br /&gt;
'''Tuesday April 17, 2024: 11:00 ''' The restart of the Niagara login nodes has been completed successfully.&lt;br /&gt;
&lt;br /&gt;
'''Tuesday April 17, 2024: 09:40 ''' Niagara login nodes will be rebooted &lt;br /&gt;
&lt;br /&gt;
'''Tuesday April 16, 2024: 12:45 ''' mist-login01  recovered now&lt;br /&gt;
&lt;br /&gt;
'''Tuesday April 16, 2024: 11:45 ''' mist-login01  will be unavailable due to maintenance from 12:15 to 12:45. Following the completion of maintenance, login access should be restored &lt;br /&gt;
&lt;br /&gt;
'''Monday April 15, 2024: 13:02 ''' Balam-login01 will be unavailable due to maintenance from 13:00 to 13:30. Following the completion of maintenance, login access should be restored and available once more. &lt;br /&gt;
&lt;br /&gt;
'''Monday March 18, 2024: 14:45 ''' File system issue resolved.  Users are advised to check if their running jobs were affected, and if so, to resubmit.&lt;br /&gt;
&lt;br /&gt;
'''Monday March 18, 2024: 13:02 ''' File system issues.  This affects the ability to log in. We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Monday March 11, 2024: 14:05 ''' All systems are recovered now&lt;br /&gt;
&lt;br /&gt;
'''Monday March 11, 2024:''' There will be an shutdown of the file system at SciNet for an emergency repair. As a consequence, the login nodes and compute nodes of all SciNet clusters using the file system (Niagara, Mist, Balam, Rouge, and Teach) will be down from 11 am EST until later in the afternoon. &lt;br /&gt;
&lt;br /&gt;
'''February 28, 2024, 16:30 PM EDT:''' All systems are recovered now.&lt;br /&gt;
&lt;br /&gt;
'''February 28, 2024, 1:00 PM EDT:''' A loop pump fault caused many compute nodes overheat. If you jobs failed around this time, please resubmit. Once the root cause has been addressed, the cluster will be brought up completely. Please report issues to support@scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''February 22, 2024, 5:45 PM EDT:''' Maintenance finished and system restored. Please report issues to support@scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''February 21, 2024, 7:00 AM EDT:''' Maintenance starting.  Niagara login nodes and the file system are kept up as much as possible, but will be rebooted at some point.&lt;br /&gt;
&lt;br /&gt;
'''February 20, 2024, 3:45 PM EDT:''' Cooling tower has been restored, all systems are in production. &lt;br /&gt;
&lt;br /&gt;
'''February 20, 2024, 1:30 AM EDT:''' Cooling tower malfunction, all compute nodes are shutdown, the root cause will be addressed earliest in the morning.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style=&amp;quot;color:red&amp;quot;&amp;gt;&amp;lt;b&amp;gt; February 21 and 22, 2024: SciNet Data Centre Maintenance:&amp;lt;/b&amp;gt;&amp;lt;/span&amp;gt;&amp;lt;br/&amp;gt;&lt;br /&gt;
This annual winter maintenance involves a full data centre shutdown&lt;br /&gt;
starting at 7:00 am EST on Wednesday, February 21st.  None of the&lt;br /&gt;
SciNet systems (Niagara, Mist, Rouge, Teach, the file systems, as&lt;br /&gt;
well as hosted equipment) will be accessible.  All systems should be&lt;br /&gt;
fully available again in the last afternoon of the 22nd.&lt;br /&gt;
&lt;br /&gt;
The scheduler will hold jobs that cannot finish before the start of&lt;br /&gt;
the shutdown. Users are encouraged to submit small and short jobs&lt;br /&gt;
that can take advantage of this, as the scheduler may be able to fit&lt;br /&gt;
these jobs in before the maintenance on otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
'''Mon January 29, 08:20    (EST):''' Access to Niagara login nodes restored (it was an internal routing issue).&lt;br /&gt;
&lt;br /&gt;
'''Mon January 29, 07:35    (EST):''' No access to Niagara login nodes.  We are investigating.  Use the Mist login to get access to SciNet systems.&lt;br /&gt;
&lt;br /&gt;
'''Wed January 24, 15:20    (EST):''' maintenance on rouge-login01 &lt;br /&gt;
&lt;br /&gt;
'''Wed January 24, 14:55    (EST):''' Rebooting rouge-login01 &lt;br /&gt;
&lt;br /&gt;
'''Tue January 23, 10:25 am (EST):''' Mist-login01 maintenance done &lt;br /&gt;
&lt;br /&gt;
'''Tue January 23, 10:10 am (EST):''' Rebooting Mist-login01 to deploy new image&lt;br /&gt;
&lt;br /&gt;
'''Tue January 22, 21:00 am (EST):''' HPSS performance for hsi &amp;amp; htar clients is back to normal.&lt;br /&gt;
&lt;br /&gt;
'''Tue January 20, 11:50 am (EST):''' HPSS hsi/htar/VFS jobs will remain on PD state on the queue over the weekend, so we may work on archive02/vfs02 on Monday, and try to improve transfer performance. In the meantime you may use Globus (computecanada#hpss) if your workflow is suitable. &lt;br /&gt;
&lt;br /&gt;
'''Tue January 14, 13:20 am (EST):''' The ongoing HPSS jobs from Friday finished earlier, so we restarted HPSS sooner and released the PD jobs on the queue. &lt;br /&gt;
&lt;br /&gt;
'''Tue January 12, 10:40 am (EST):''' We have applied some tweaks to the HPSS configuration to improve performance, but they won't take effect until we restart the services, which scheduled for Monday morning. If over the weekend we notice that there are no HPSS jobs running on the queue we may restart HPSS sooner. &lt;br /&gt;
&lt;br /&gt;
'''Tue January 09, 9:10 am (EST):''' Remaining cvmfs issues cleared.&lt;br /&gt;
&lt;br /&gt;
'''Tue January 09, 8:00 am (EST):''' We're investigating remaining issues with cvmfs access on login nodes.&lt;br /&gt;
&lt;br /&gt;
'''Mon January 08, 21:50 pm (EST):''' File systems are back to normal. Please resubmit your jobs.  &lt;br /&gt;
&lt;br /&gt;
'''Mon January 08, 9:10 pm (EST):''' We had a severe deadlock, and some disk volumes went down. The file systems are being recovered now. It could take another hour.&lt;br /&gt;
&lt;br /&gt;
'''Mon January 08, 7:20 pm (EST):''' We seem to have a problem with the file system, and are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Tue December 19, 2:45 pm (EST):''' Compute nodes are available again.  &lt;br /&gt;
&lt;br /&gt;
'''Tue December 19, 12:09 pm (EST):''' Maintenance was postponed by one hour. &lt;br /&gt;
&lt;br /&gt;
'''Tue December 19, 12 noon - 1 pm (EST):''' There will be a shutdown of the compute nodes of the Niagara, Mist and Rouge cluster to allow for an emergency repair to the cooling tower.  Login nodes will remain available but no jobs will run during that time.  Updates will be posted on here.&lt;br /&gt;
&lt;br /&gt;
'''Mon Dec  11 11:17:00 EST 2023:''' File systems recovered; Niagara and Mist are operational again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Dec  11 7:51:00 EST 2023:''' Niagara's login nodes are being overwhelmed.  We are investigating. Likely file-system related.&lt;br /&gt;
&lt;br /&gt;
'''Thu Dec  6 10:01:24 EST 2023:''' Niagara's scheduler rebooting for security patches.&lt;br /&gt;
&lt;br /&gt;
'''Wed Dec  6 13:06:46 EST 2023:''' Endpoint computecanada#niagara transition from Globus GCSv4 to GCSv5 is completed. computecanada#niagara-GCSv4 has been deactivated&lt;br /&gt;
&lt;br /&gt;
'''Mon Dec  4 16:35:07 EST 2023:''' Endpoint computecanada#niagara has now been upgraded to Globus GCSv5. The old endpoint is still available as computecanada#niagara-GCSv4 on nia-datamover2, only until Wednesday, at which time we'll disable it as well.&lt;br /&gt;
&lt;br /&gt;
'''Mon Dec  4 11:54:49 EST 2023:''' The nia-datamover1 node will the offline this Monday afternoon for the Globus GCSv5 upgrade. Endpoint computecanada#niagara-GCSv4 will still be available via nia-datamover2&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 28 16:29:14 EST 2023:''' The computecanada#hpss Globus endpoint is now running GCSv5. We'll find a window of opportunity next week to upgrade computecanada#niagara to GCSv5 as well.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 28 14:20:30 EST 2023:''' The computecanada#hpss Globus endpoint will be offline for the next few hours for the GCSv5 upgrade.&lt;br /&gt;
&lt;br /&gt;
'''Fri Nov 10, 2023, 18:00 PM EDT:''' The HPSS upgrade is finished. We didn't have time to update Globus to GCSv5, so we'll find a window of opportunity to do this next week. &lt;br /&gt;
&lt;br /&gt;
Please be advised that starting this &amp;lt;B&amp;gt;Friday morning, Nov/10, we'll be upgrading the HPSS system from version 8.3 to 9.3 and the HPSS Globus server from GCSv4 to GCSv5.&amp;lt;/B&amp;gt; Everything going well we expect to be back online by the end of the day.  &lt;br /&gt;
&lt;br /&gt;
'''Fri Nov 3, 2023, 12:20 PM EDT:''' The &amp;quot;Niagara at Scale&amp;quot; event has finished. Niagara is available again for all users.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 31, 2023, 12 PM EDT:''' The &amp;quot;Niagara at Scale&amp;quot; event has started.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 31, 2023, 12:PM EDT - Fri Nov 3, 2023, 12:00 PM EDT:''' Three-day reservation for the &amp;quot;Niagara at Scale&amp;quot; event. Only &amp;quot;Niagara at Scale&amp;quot; projects will run on the compute nodes. Users are encouraged to submit small and short jobs that could run before this event.  Throughout the event, users can still login, access their data, and submit jobs, but these jobs will not run until after the event. Note that the debugjob queue will remain available to everyone as well.&lt;br /&gt;
&lt;br /&gt;
''' Thu Oct 27 11:16 AM EDT:''' SSH keys are gradually being restored, estimated to complete by 1:15 PM.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 27, 2023, 8:00 EDT:''' SSH key login authentication with CCDB keys is currently not working, on many Alliance systems.  It appears this started last night. Issue is being investigated.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 26, 2023, 12:35 EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 26, 2023, 12:05 EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Wed Oct 25 7:54 PM EDT:''' slurm-*.out now outputs job info for last array job.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 24 12:00 AM EDT:''' network appears to be up&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct 24 11:32 AM EDT:''' campus network issues&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 05, 2023, 12:05 PM EDT:''' Niagara scheduler is back online.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 05, 2023, 11:50 AM EDT:''' Niagara scheduler is temporarily under maintenance for security updates. &lt;br /&gt;
&lt;br /&gt;
''' Thu Sep 28, 2023 11:00 am''': Niagara scheduler is back online.&lt;br /&gt;
&lt;br /&gt;
''' Thu Sep 28, 2023 10:50 am''': Niagara scheduler is temporarily under maintenance for security updates.&lt;br /&gt;
&lt;br /&gt;
''' Wed Sep 27, 2023 11:35 am''': Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
''' Wed Sep 27, 2023 11:00 am''': Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
''' Wed Sep 6, 2023 11:30 am''': Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
''' Wed Sep 6, 2023 11:00 am''': Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
''' Fri Aug 25, 2023 0:19 am''': A power glitch brought some compute nodes down; users should resubmit any affected jobs. The Jupyterhub had to be restarted for the same reason.&lt;br /&gt;
&lt;br /&gt;
''' Mon Aug 14, 2023 12:10 pm''': Network problems with Teach cluster are now resolved and it is again available for users.&lt;br /&gt;
&lt;br /&gt;
''' Mon Aug 14, 2023 11:40 am''': Network problems with Teach cluster. We are investigating.&lt;br /&gt;
&lt;br /&gt;
''' Thu Aug 3, 2023 11:10 am''': Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
''' Thu Aug 3, 2023 10:40 am''': Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
''' Tue Aug 1, 2023 2:43 pm''': To recover from the power glitch, all servers on the SciNet jupyterhub have been stopped. Please restart you server if you need to.&lt;br /&gt;
&lt;br /&gt;
''' Tue Aug 1, 2023 11:46 am''': There was a power glitch at 11:46 Aug 1, 2023, causing a significant number of job losses. Please resubmit your jobs.&lt;br /&gt;
&lt;br /&gt;
'''Summer Maintenance Shutdown Finished''' -- Slurm upgraded to version 23.02.3.&lt;br /&gt;
Change to be aware: SLURM_NTASKS is only set if --ntasks option is set.&lt;br /&gt;
Details at: https://bugs.schedmd.com/show_bug.cgi?id=17108&lt;br /&gt;
&lt;br /&gt;
'''July 17 and 18, 2023''':  Announcement: Summer Maintenance Shutdown&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt; &lt;br /&gt;
&lt;br /&gt;
'''July 17th, 2023''' This maintenance involves a full data centre shutdown will start at 7:00 a.m. ET on Monday July 17th, 2023. None of the SciNet systems (Niagara, Mist, Rouge, Teach, the file systems, as well as hosted equipment) will be accessible.&lt;br /&gt;
&lt;br /&gt;
'''July 18th, 2023''' The shutdown will last until Tuesday July 18th, 2023. Systems are expected to be fully available in the evening of that day.&lt;br /&gt;
&lt;br /&gt;
The scheduler will hold jobs that cannot finish before the start of the shutdown. Users are encouraged to submit small and short jobs that can take advantage of this, as the scheduler may be able to fit these jobs in before the maintenance on otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
'''Wed Jun 21 16:03:45 EDT 2023:''' Niagara's scheduler maintenance is finished.&lt;br /&gt;
&lt;br /&gt;
'''Wed Jun 21 15:42:00 EDT 2023:''' Niagara's scheduler is rebooting in 10 minutes for a short maintenance down time.&lt;br /&gt;
&lt;br /&gt;
'''Wed Jun 21, 2023, 11:25 AM EDT:''' Maintenance is finished and Teach cluster is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Jun 20, 2023, 9:55 AM EDT:''' Teach cluster is powered off for maintenance.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style='color:red'&amp;gt;'''Tue June 20, 2023:'''  Announcement:&amp;lt;/span&amp;gt;&amp;lt;br&amp;gt; The Teach cluster at SciNet will undergo a maintenance shutdown starting on Tuesday June 20, 2023.  It will likely take a few days before it will be available again.  Check here for updates.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jun 5, 2023, 2:35 PM EDT:''' All systems are operational again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jun 5, 2023, 11:55 AM EDT:''' There were issues with the cooling system.  The login nodes and file systems are now accessible again, but compute nodes are still off.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jun 5, 2023, 6:55 AM EDT:''' Issues at the data center, we are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Sat May 27, 2023, 21:00AM EDT:''' We have been able to mitigate the UPS issue for now, until new parts arrive sometime during the week. System will be accessible soon&lt;br /&gt;
&lt;br /&gt;
'''Sat May 27, 2023, 16:00AM EDT:''' We identified an UPS/Power related issue on the datacenter, that is adversely affecting several components, in particular all file systems. Out of an abundance of caution we are shutting down the cluster, until the UPS situation is resolved. Ongoing jobs will be canceled.&lt;br /&gt;
&lt;br /&gt;
'''Sat May 27, 2023, 11:18AM EDT:''' Filesystem issues, investigating.&lt;br /&gt;
&lt;br /&gt;
'''Wed May 24, 2023, 11:40AM EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Wed May 24, 2023, 11:10 AM EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Mon May 15, 2023, 10:08 AM EDT''' rebooting Mist-login node again &lt;br /&gt;
&lt;br /&gt;
'''Mon May 15, 2023, 09:15 AM EDT''' rebooting Mist-login node&lt;br /&gt;
&lt;br /&gt;
'''Mon May 01, 2023, 04:00 PM EDT''' done rebooting nia-login nodes&lt;br /&gt;
&lt;br /&gt;
'''Mon May 01, 2023, 12:00 PM EDT''' rebooting all nia-login nodes one at a time &lt;br /&gt;
&lt;br /&gt;
'''Mon May 01, 2023, 11:00 AM EDT''' nia-login07 is going to be rebooted.&lt;br /&gt;
&lt;br /&gt;
'''Thu Apr 20, 2023, 12:05 PM EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Apr 20, 2023, 11:30 AM EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Thu Apr 20, 2023, 8:27 AM EDT:''' Intermittent file system issues. We are investigating.  For now (10:45 AM), the file systems appear operational.&lt;br /&gt;
&lt;br /&gt;
'''Fri 14 Apr 2023 10:25 AM EDT:''' Switch problem resolved.&lt;br /&gt;
&lt;br /&gt;
'''Fri 14 Apr 2023 10:10 AM EDT:''' A switch problem is affecting access to certain equipment at the SciNet data center, including the Teach cluster.  Niagara and Mist are accessible.&lt;br /&gt;
&lt;br /&gt;
'''Fri 14 Apr 2023 09:55 AM EDT:''' SciNet Jupyter Hub maintenance is finished and it is again available for users.&lt;br /&gt;
&lt;br /&gt;
'''Fri 14 Apr 2023:''' SciNet Jupyter Hub will be restarted for system updates this morning.  Keep in mind to save your notebooks!&lt;br /&gt;
&lt;br /&gt;
'''Thu 06 Apr 2023 03:40 PM EDT:''' Rouge cluster is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Thu 06 Apr 2023 01:00 PM EDT:''' Rouge cluster is temporarily inaccessible to users due to the electrical work.&lt;br /&gt;
&lt;br /&gt;
'''Sun 02 Apr 2023 03:37 AM EDT:''' IO/read errors on the file system seem to have been fixed. Please resubmit your jobs, and report any further problems to support. Burst Buffer will remain offline for now.&lt;br /&gt;
&lt;br /&gt;
'''Sun 02 Apr 2023 00:18 AM EDT:''' File System is back up, but there seems to be some IO/read errors. All running jobs have been killed. Please hold off on submitting jobs until further notice.&lt;br /&gt;
&lt;br /&gt;
'''Sat 01 Apr 2023 10:17 PM EDT:''' We are having issues with the File System. Currently investigating the cause.&lt;br /&gt;
&lt;br /&gt;
'''Fri 31 Mar 2023 11:00 PM EDT:''' Burst Buffer may be the culprit. We are investigating but may have to take Burst Buffer offline. &lt;br /&gt;
&lt;br /&gt;
'''Fri 31 Mar 2023 01:30 PM EDT:''' File system issues causing trouble for some jobs on Niagara and Mist&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
Tue 28 Mar 2023 11:05 AM EDT: Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
Tue 28 Mar 2023 10:35 AM EDT: Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
Fri 17 Mar 2023 14:50 PM EDT: All systems online.&lt;br /&gt;
&lt;br /&gt;
Fri 17 Mar 2023 11:00 AM EDT: Problem identified and repaired. Starting to bring up systems, but not available to users yet.&lt;br /&gt;
&lt;br /&gt;
Fri 17 Mar 2023 09:15:39 EDT: Staff on site and ticket opened with cooling contractor, cause of failure unclear &lt;br /&gt;
&lt;br /&gt;
Fri 17 Mar 2023 01:47:43 EDT: Cooling system malfunction, datacentre is shut down. &lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Tue Feb 28, 16:40 EST:&amp;lt;/b&amp;gt; All systems are back online.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Tue Feb 28, 15:30 EST:&amp;lt;/b&amp;gt; Maintenance is complete. Bringing up systems.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Tue Feb 28, 7:10 AM EST:&amp;lt;/b&amp;gt; Maintenance shutdown resuming.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Mon Feb 27, 3:55 PM EST:&amp;lt;/b&amp;gt; Maintenance paused as parts were delayed. The maintenance will resume tomorrow (Tue Feb 28) at 7AM EST for about 5 hours.  In the meantime, the login nodes of the systems will be brought online.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Mon Feb 27, 7:20 AM EST:&amp;lt;/b&amp;gt; Maintenance shutdown started.&lt;br /&gt;
 &lt;br /&gt;
&amp;lt;span style=&amp;quot;color:red&amp;quot;&amp;gt;&amp;lt;b&amp;gt; February 27 and 28, 2023: SciNet Data Centre Maintenance:&amp;lt;/b&amp;gt;&amp;lt;/span&amp;gt;&amp;lt;br/&amp;gt;&lt;br /&gt;
This annual winter maintenance involves a full data centre shutdown&lt;br /&gt;
starting at 7:00 a.m. EST on Monday, February 27. None of the SciNet&lt;br /&gt;
systems (Niagara, Mist, Rouge, Teach, the file systems, as well as&lt;br /&gt;
hosted equipment) will be accessible.&lt;br /&gt;
&lt;br /&gt;
On the second day of the maintenance, Niagara, Mist, and their file&lt;br /&gt;
systems are expected to become partially available for users.  All&lt;br /&gt;
systems should be fully available in the evening of the 28th.&lt;br /&gt;
&lt;br /&gt;
The scheduler will hold jobs that cannot finish before the start of&lt;br /&gt;
the shutdown. Users are encouraged to submit small and short jobs&lt;br /&gt;
that can take advantage of this, as the scheduler may be able to fit&lt;br /&gt;
these jobs in before the maintenance on otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Feb 17, 2023, 11:15 PM EST:&amp;lt;/b&amp;gt; File system issues on Teach fixed and Teach is accessible again. Note that the file system of Teach is not very good at handling many remote vscode connections.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Feb 17, 2023, 11:02 PM EST:&amp;lt;/b&amp;gt; File system issues on Teach.  We are working on a fix.  &lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Sun Feb 12, 2023, 3:05 PM EST&amp;lt;/b&amp;gt; All systems are back online.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Sun Feb 12, 2023, 2:10 PM EST&amp;lt;/b&amp;gt; Powers restored, clusters are being started.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Sat Feb 11, 2023, 2:35 PM EST&amp;lt;/b&amp;gt; Powers interruption started. All compute nodes will be down, likely until Sunday &lt;br /&gt;
afternoon.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Sat Feb 11, 2023, 1:20 PM EST&amp;lt;/b&amp;gt; There is to be an emergency power repair on the adjacent street. The datacentre will be &lt;br /&gt;
switching over to generator. All compute nodes will be down.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Fri Feb 10, 2023, 10:55 AM EST&amp;lt;/b&amp;gt; All systems are back online.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Fri Feb 10, 2023, 10:00 AM EST&amp;lt;/b&amp;gt; Cooling issue resolved, cluster is being started.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Wed Jan 25, 2023, 02:15 PM EST&amp;lt;/b&amp;gt; Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Wed Jan 25, 2023, 10:30 AM EST&amp;lt;/b&amp;gt; Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Mon Jan 23, 2023, around 7-8 AM EST&amp;lt;/b&amp;gt; Intermediate file system issuse may have killed your job. Users are advised to resubmit.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Sat Jan 21, 2023, 00:50 EST&amp;lt;/b&amp;gt; Niagara, Mist, Rouge and the filesystems are up&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Fri Jan 20, 2023, 11:19 PM: EST&amp;lt;/b&amp;gt; Systems are coming up. We have determined that there was a general power glitch in the area of our Datacentre. The power has been fully restored&lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Fri Jan 20, 2023, 10:34 PM: EST&amp;lt;/b&amp;gt; Cooling is back. Systems are slowly coming up  &lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Fri Jan 20, 2023, 8:20 PM: EST&amp;lt;/b&amp;gt; A cooling failure at the data center, possibly due to a power glitch. We are investigating.  &lt;br /&gt;
&lt;br /&gt;
&amp;lt;b&amp;gt;Thu Jan 12, 2023, 9:30 AM EST&amp;lt;/b&amp;gt; File system is experiencing issues. Issues have stabilized, but jobs running around this time may have been affected.&lt;br /&gt;
&lt;br /&gt;
'''Wed Dec 21, 2022, 12:00 PM: ''' Please note that SciNet is on vacation, together with the University of Toronto. Full service will resume on Jan 2, 2023. We will endeavour to keep systems running, and answer tickets, on a best-effort basis.  Happy Holidays!!!&lt;br /&gt;
&lt;br /&gt;
'''Fri Dec 16, 2022, 2:19 PM: ''' City power glitch caused all compute nodes to reboot. Please resubmit your jobs.&lt;br /&gt;
&lt;br /&gt;
'''Mon Dec 12, 2022, 9:30 AM - 11:30:''' File system issues caused login issues and may have affected running jobs.  System back to normal now, but users may want to check any jobs they had running. &lt;br /&gt;
&lt;br /&gt;
'''Wed Dec 7, 2022, 11:40 AM EST:''' Systems are being brought back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed Dec 7, 2022, 09:00 AM EST:''' Maintenance is underway.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style='color:red'&amp;gt;&amp;lt;b&amp;gt;Announcement:&amp;lt;/b&amp;gt;&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
On '''Wednesday December 7th, 2022''', the file systems of the SciNet's systems, Niagara, Mist, HPSS, Teach cluster, will undergo maintenance from 9:00 am EST.  During the maintenance, there will be no access to any of these systems, as it requires all file system operations to have stopped.  The maintenance should take about 1 hour, and all systems are expected to become available again later that morning.&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 30, 2022, 14:45 PM EST:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Wed Nov 30, 2022, 14:15 PM EST:''' Mist login node is under maintenance and temporarily inaccessible to users. &lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 20, 2022, 18:00 PM EDT:''' Systems are back online &lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 20, 2022, 09:40 AM EDT:''' About half of Niagara compute nodes are up. Note that only jobs that can finish by 5:00 PM will run.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 20, 2022, 07:50 AM EDT:''' Jupyter Hub is available again.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 20, 2022, 07:35 AM EDT:''' Jupyter Hub is being updated and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Thu Oct 20, 2022, 07:30 AM EDT:''' Maintenance is underway.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;span style='color:red'&amp;gt;&amp;lt;b&amp;gt;Announcement:&amp;lt;/b&amp;gt;&amp;lt;/span&amp;gt;&lt;br /&gt;
&lt;br /&gt;
On '''Thursday October 20th, 2022''', the SciNet datacentre (which hosts Niagara and Mist) will undergo transformer maintenance from 7:30 am EDT to 5:00 pm EDT.  At both the start and end of this maintenance window, all systems will need to be briefly shutdown and will not be accessible.  Apart from that, during this window, login nodes will be accessible and part of Niagara will be available to run jobs. The Mist and Rouge clusters will be off for the entirety of this maintenance. &lt;br /&gt;
&lt;br /&gt;
Users are encouraged to submit Niagara jobs of about 1 to 2 hours in the days before the maintenance, as these could be run within the&lt;br /&gt;
window of 8 AM and 5 PM EDT.&amp;lt;/p&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Wed Oct 5, 2022, 12:10 PM EDT:''' A grid power glitch caused all compute nodes to reboot. Please resubmit your jobs.&lt;br /&gt;
&lt;br /&gt;
'''Mon Oct 3, 2022, 11:20 PM EDT:'''  Niagara login nodes are accessible from outside again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Oct 3, 2022, 9:20 PM EDT:'''  Niagara login nodes are inaccessible from outside of the datacentre at the moment. As a work-around, ssh into mist.scinet.utoronto.ca and then ssh into e.g. nia-login01.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 28, 2022, 1:15 PM EDT:''' The JupyterHub maintenance is finished and it is now accessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 28, 2022, 1:00 PM EDT:''' The JupyterHub is to be rebooted for system upgrades. Running processes and notebooks will be closed. The service is expected to be back around 1:30 PM EDT.&lt;br /&gt;
 &lt;br /&gt;
'''Tue Sep 27, 2022, 11:50 AM EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 27, 2022, 11:25 AM EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Mon Sep 26, 2022, 11:35 AM EDT:''' Rouge and Teach login nodes are accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Mon Sep 26, 2022, 11:05 AM EDT:''' Rouge and Teach login nodes are under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 22, 2022, 0:46 AM EDT:''' The CCEnv software stack is back to normal.&lt;br /&gt;
&lt;br /&gt;
'''Thu Sep 22, 2022, 8:15 PM EDT:''' The CCEnv software stack is inaccessible due to an issue with CVMFS.&lt;br /&gt;
 &lt;br /&gt;
'''Tue Sep 20, 2022, 16:00 AM EDT:''' Rouge login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 20, 2022, 10:20 AM EDT:''' Rouge login node is under maintenance and temporarily inaccessible to users (hardware upgrade).&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 20, 2022, 9:41 AM EDT:''' Rouge login node is back up.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 20, 2022, 8:25 AM EDT:''' Rouge login node down, we are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sept 16, 2022, 9:30 AM EDT:''' Login nodes are accessible again.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sept 16, 2022, 9:00 AM EDT:''' Login nodes are not accessible.  We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 13, 2022, 11:00 AM EDT:''' Mist login node is available again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Sep 13, 2022, 10:00 AM EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 2, 2022, 11:25 AM EDT:''' Rouge login node is back up.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 2, 2022, 10:25 AM EDT:''' Issues with the Rouge login node; we are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Tue Aug 23, 2022, 1:15 PM EDT:''' Jupyter Hub is available again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Aug 23, 2022, 1:00 PM EDT:''' Jupyter Hub is being updated and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Fri Aug 12, 2022, 6:30 PM EDT:''' File system issues are resolved.&lt;br /&gt;
&lt;br /&gt;
'''Fri Aug 12, 2022, 5:06 PM EDT:''' File system issues. We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Thu Aug 11, 2022, 9:20 AM EDT:''' The login node issues have been resolved.&lt;br /&gt;
&lt;br /&gt;
'''Thu Aug 11, 2022, 7:50 AM EDT:''' We are having problems accessing the Niagara login nodes.  Until fixed, please login to Mist and then ssh to a Niagara login node to access Niagara (&amp;quot;ssh nia-login02&amp;quot;, for example).&lt;br /&gt;
&lt;br /&gt;
'''Fri July 15, 2022, 10:50 AM EDT:''' Jupyter Hub is available again.&lt;br /&gt;
&lt;br /&gt;
'''Fri July 15, 2022, 10:30 AM EDT:''' Jupyter Hub is being updated and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Wed June 16, 2022, 3:45 PM EDT:''' File system is stable now. We're gradually opening the systems up.&lt;br /&gt;
&lt;br /&gt;
'''Wed June 16, 2022, 10:15 AM EDT:''' Emergency maintenance shutdown of filesystem. Running jobs will be affected.&lt;br /&gt;
&lt;br /&gt;
'''Wed June 15, 2022, 7:35 PM EDT:''' Maintenance shutdown finished. Most systems are available again.&lt;br /&gt;
&lt;br /&gt;
'''Wed June 15, 2022, 7:00 AM EDT:''' Maintenance shutdown of the SciNet datacentre. There will be no access to any of the SciNet systems during this time. We expect to be able to bring the systems back online in the evening of June 15th.&lt;br /&gt;
&lt;br /&gt;
'''Mon June 13, 2022, 7:00 AM EDT - Wed June 15, 2022, 7:00 AM EDT:''' Two-day reservation for the &amp;quot;Niagara at Scale&amp;quot; event. Only &amp;quot;Niagara at Scale&amp;quot; projects will run on the compute nodes (as well as SOSCIP projects, on a subset of nodes). Users are encouraged to submit small and short jobs that could run before this event.  Throughout the event, users can still login, access their data, and submit jobs, but these jobs will not run until after the subsequent maintenance (see below). Note that the debugjob queue will remain available to everyone as well.&lt;br /&gt;
&lt;br /&gt;
'''Mon May 30th, 2022, 12:42:00 EDT:''' Mist login node is available again.&lt;br /&gt;
&lt;br /&gt;
'''Mon May 30th, 2022, 10:22:00 EDT:''' Mist login node is being upgraded and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''Wed May 25th, 2022, 13:30:00 EDT:''' Niagara operating at 100% again.&lt;br /&gt;
&lt;br /&gt;
'''Tue May 24th, 2022, 21:30:00 EDT:''' Jupyter Hub up.  Part of Niagara can run compute jobs.&lt;br /&gt;
&lt;br /&gt;
'''Tue May 24th, 2022, 19:00:00 EDT:''' Systems are up. Users can login, BUT cannot submit jobs yet.&lt;br /&gt;
&lt;br /&gt;
'''Tue May 24th, 2022, 10:00:00 EDT:''' We are still performing system checks.&lt;br /&gt;
&lt;br /&gt;
'''Mon May 23rd, 2022, 16:44:30 EDT:''' Systems still down. Filesystems are working, but there are quite a number of drive failures - no data loss - so out of an abundance of caution we are keeping the systems down at least until tomorrow.  The long weekend has also been disruptive for service response, and we prefer to err on the safe side.&lt;br /&gt;
&lt;br /&gt;
'''Mon May 23rd, 2022, 08:12:14 EDT:''' Systems still down. Filesystems being checked to ensure no heat damage.&lt;br /&gt;
&lt;br /&gt;
'''Sun May 22nd, 2022, 10.16 am EDT:''' Electrician dispatched to replace blown fuses.&lt;br /&gt;
&lt;br /&gt;
'''Sun May 22nd, 2022, 2:54 am EDT:''' Automatic shutdown down due to power/cooling.&lt;br /&gt;
&lt;br /&gt;
'''Fri May 6th, 2022, 11:35 am EDT:''' HPSS scheduler upgrade also finished.&lt;br /&gt;
&lt;br /&gt;
'''Thu May 5th, 2022, 7:45 pm EDT:''' Upgrade of the scheduler has finished, with the exception of HPSS.&lt;br /&gt;
&lt;br /&gt;
'''Thu May 5th, 2022, 7:00 am - 3:00 pm EDT (approx):''' Starting from 7:00 am EDT, an upgrade of the scheduler of the Niagara, Mist, and Rouge clusters will be applied.  This requires the scheduler to be down for about 5-6 hours, and all compute and login nodes to be rebooted.&lt;br /&gt;
Jobs cannot be submitted during this maintenance, but jobs submitted beforehand will remain in the queue.  For most of the time, the login nodes of the clusters will be available so that users may access their files on the home, scratch, and project file systems.&lt;br /&gt;
&lt;br /&gt;
'''Monday May 2nd, 2022, 9:30 - 11:00 am EDT:''' the Niagara login nodes, the jupyter hub, and nia-datamover2 will get rebooted for updates.  In the process, any login sessions will get disconnected, and servers on the jupyterhub will stop. Jobs in the Niagara queue will not be affected.&lt;br /&gt;
&lt;br /&gt;
'''Tue Apr 26, 11:20 AM EDT:''' A Rolling update of the Mist cluster is taking a bit longer than expected, affecting logins to Mist. &lt;br /&gt;
 &lt;br /&gt;
'''Announcement:''' On Thursday April 14th, 2022, the connectivity to the SciNet datacentre will be disrupted at 11:00 AM EDT  for a few minutes, in order to deploy a new network core switch.  Any SSH connections or data transfers to SciNet systems (Niagara, Mist, etc.) may be terminated at that time.&lt;br /&gt;
&lt;br /&gt;
'''Thu March 24, 6:54 AM EST:''' HPSS is back online&lt;br /&gt;
&lt;br /&gt;
'''Thu March 24, 8:15 AM EST:''' HPSS has a hardware problem&lt;br /&gt;
&lt;br /&gt;
'''Wed March 2, 4:50 PM EST:''' The CCEnv software stack is available again on Niagara.&lt;br /&gt;
&lt;br /&gt;
'''Wed March 2, 7:50 AM EST:''' The CCEnv software stack on Niagara has issues; we are investigating.&lt;br /&gt;
 &lt;br /&gt;
'''Sat Feb 12 2022, 12:59 EST:''' Jupyterhub is back up, but may have hardware issue.&lt;br /&gt;
&lt;br /&gt;
'''Sat Feb 12 2022, 10:36 EST:''' Issue with the Jupyterhub, since last night.  We're investigating.&lt;br /&gt;
&lt;br /&gt;
'''Tue Feb 1 2022 19:20 EST:''' Maintenance finished successfully. Systems are up. &lt;br /&gt;
&lt;br /&gt;
'''Tue Feb 1 2022 13:00 EST:''' Maintenance downtime started.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jan 31 2022 13:15:00 EST:''' The SciNet datacentre's cooling system needs an '''emergency repair''' as soon as possible.  During this repair, all systems hosted at SciNet (Niagara, Mist, Rouge, HPSS, and Teach) will need to be switched off and will be unavailable to users. Repairs will start '''Tuesday February 1st, at 1:00 pm EST''', and could take until the end of the next day.  Please check here for updates.&lt;br /&gt;
&lt;br /&gt;
'''Sat Jan 29 2020 16:45:38 EST:''' Fibre repaired.&lt;br /&gt;
&lt;br /&gt;
'''Sat 29 Jan 2022 11:22:27 EST:''' Fibre repair is underway.  Expect to have connectivity restored later today.&lt;br /&gt;
&lt;br /&gt;
'''Fri 28 Jan 2022 07:35:01 EST:''' The fibre optics cable that connects the SciNet datacentre was severed by uncoordinated digging at York University.  We expect repairs to happen as soon as possible.&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 27 12:46 EST PM 2022:''' Network issues to and from the datacentre. We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Sun Jan 23 11:05 EST AM 2022:''' Filesystem issues appear to have resolved.&lt;br /&gt;
&lt;br /&gt;
'''Sun Jan 23 10:30 EST AM 2022:''' Filesystem issues -- investigating.&lt;br /&gt;
&lt;br /&gt;
'''Sat Jan 8 11:42 EST AM 2022:''' The emergency maintenance is complete. Systems are up and available.&lt;br /&gt;
&lt;br /&gt;
'''Fri Jan 7 14:34 EST PM 2022:''' The SciNet shutdown is in progress. Systems are expected back on Saturday, Jan 8.&lt;br /&gt;
&lt;br /&gt;
'''&amp;lt;span style=&amp;quot;color:red&amp;quot;&amp;gt;Emergency shutdown Friday January 7, 2022&amp;lt;/span&amp;gt;''': An emergency shutdown of all SciNet to replace a crucial file system component is planned to take place on Friday January 7, 2022, starting at 8am EST, and will require at least 12 hours of downtime.  Updates will be posted during the day.&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 6 08:20 EST AM 2022''' The SciNet filesystem is having issues.  We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''Fri Dec 24 13:31 EST PM 2021''' Please note the following scheduled network maintenance, which will result in loss of connectivity to the SciNet datacentre:  Start time&lt;br /&gt;
Dec 29, 00:30 EST  Estimated duration  4 hours and 30 minutes. &lt;br /&gt;
&lt;br /&gt;
'''Mon Dec 20 4:29 EST PM 2021''' Filesystem is back to normal. &lt;br /&gt;
&lt;br /&gt;
'''Mon Dec 20 2:53 EST PM 2021''' Filesystem problem - We are investigating. &lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 23 12:30 EDT 2021 ''' Cooling restored.  Systems should be available later this afternoon.  &lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 23 9:30 EDT 2021 ''' Technicians on site working on cooling system. &lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 23 3:30 EDT 2021 ''' Cooling system issues still unresolved. &lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 22 23:27:48 EDT 2021 ''' Shutdown of the datacenter due to a problem with the cooling system.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 22 09:30 EDT 2021 ''': File system issues, resolved.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 22 07:30 EDT 2021 ''': File system issues, investigating.&lt;br /&gt;
&lt;br /&gt;
'''Sun Sep 19 10:00 EDT 2021''': Power glitch interrupted all compute jobs; please resubmit any jobs you had running.&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 15 17:35 EDT 2021''': filesystem issues resolved&lt;br /&gt;
&lt;br /&gt;
'''Wed Sep 15 16:39 EDT 2021''': filesystem issues&lt;br /&gt;
&lt;br /&gt;
'''Mon Sep 13 13:15:07 EDT 2021''' HPSS is back online.&lt;br /&gt;
&lt;br /&gt;
'''Fri Sep 10 17:57:23 EDT 2021''' HPSS is offline due to unscheduled maintenance.&lt;br /&gt;
&lt;br /&gt;
'''Wed Aug 18 16:13:42 EDT 2021''' The HPSS upgrade is complete.&lt;br /&gt;
&lt;br /&gt;
'''HPSS Downtime August 17th and 18th, 2021 (Tuesday and Wednesday):''' We'll be upgrading the HPSS software to version 8.3, along with all the clients (htar/hsi, vfs and Globus/dsi)&lt;br /&gt;
&lt;br /&gt;
'''July 24, 2021, 6:00 PM EDT:''' There appear to be file system issues, which may affect users' ability to login.  We are investigating.&lt;br /&gt;
&lt;br /&gt;
''' July 23th, 2021, 9:00 AM EDT:''' ''' Security update: ''' Due to a severe vulnerability in the Linux kernel (CVE-2021-33909), our team is currently patching and rebooting all login nodes and compute nodes, as well as the JupyterHub.  There should be no affect on running jobs, however sessions on login and datamover nodes will be disrupted. &lt;br /&gt;
&lt;br /&gt;
''' July 20th, 2021, 7:00 PM EDT:''' ''' SLURM configuration''' - Changed the default behaviour to kill a job step if any task exits with a non-zero exit code. If your code is able to handle failures gracefully, please add srun's option --no-kill to recover the previous default behaviour.&lt;br /&gt;
&lt;br /&gt;
''' July 20th, 2021, 7:00 PM EDT:''' Maintenance finished, systems are back online.   &lt;br /&gt;
&lt;br /&gt;
'''SciNet Downtime July 20th, 2021 (Tuesday):''' There will be a maintenance shutdown of the SciNet data center on Tuesday July 20th, starting at 7 am EDT. There will be no access to any of the SciNet systems (Niagara, Mist, HPSS, Teach cluster, or the file systems) during this time.  We expect to be able to bring the systems back online in the evening of July 20th.  The status of the Niagara cluster can be checked on status.computecanada.ca. For up-to-date and more detailed information on the status of all the SciNet systems, you can always check back here.&lt;br /&gt;
&lt;br /&gt;
'''June 28th, 2021, 4:06 PM:''' Mist OS upgrade is complete.&lt;br /&gt;
&lt;br /&gt;
'''May 27, 2021:''' Datamovers addresses have changed to improve high bandwidth connectivity and cybersecurity. The new addresses are 142.1.174.227 for nia-datamover1.scinet.utoronto.ca, and 142.1.174.228 for nia-datamover2.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
If you have jobs that need to connect to a software license server using an ssh tunnel through nia-gw (which actually resolves to datamover1 or datamover2), you may need to ask the system administrators of that license server to allow incoming connections from the new addresses above.&lt;br /&gt;
'''June 29th, 2021, 2:00 PM:''' Thunderstorm-related power fluctuations are causing some Niagara compute nodes and their jobs to crash.  Please resubmit if your jobs seem to have crashed for no apparent reason.&lt;br /&gt;
&lt;br /&gt;
'''June 28th, 2021, 4:06 PM:''' Mist OS upgrade is complete.&lt;br /&gt;
&lt;br /&gt;
'''June 28th, 2021, 9:00 AM:''' Mist is under maintenance. OS upgrading from RHEL 7 to 8.&lt;br /&gt;
&lt;br /&gt;
'''June 11th, 2021, 8:30 AM:''' Maintenance complete. Systems are up.&lt;br /&gt;
&lt;br /&gt;
'''June 9th to 10th, 2021:''' The SciNet datacentre will have a scheduled maintenance shutdown.  Niagara, Mist, Rouge, HPSS, login nodes, the file systems, and hosted systems will all be offline during the shutdown starting at 7AM EDT on Wednesday June 9th. We expect the systems to be back up in the morning of Friday June 11th.  Check here for updates.&lt;br /&gt;
&lt;br /&gt;
'''May 27, 2021:''' Datamovers addresses have changed to improve high bandwidth connectivity and cybersecurity. The new addresses are 142.1.174.227 for nia-datamover1.scinet.utoronto.ca, and 142.1.174.228 for nia-datamover2.scinet.utoronto.ca.&lt;br /&gt;
&lt;br /&gt;
'''May 27th, 20:00.''' All systems are up and running &lt;br /&gt;
&lt;br /&gt;
'''May 27th, 19:30.''' Most systems are up&lt;br /&gt;
&lt;br /&gt;
'''May 27th, 19:00:''' Cooling is back. Powering up systems&lt;br /&gt;
&lt;br /&gt;
'''May 27th, 2021, 11:30am:'''  The cooling tower issue has been identified as a wiring issue and is being repaired.  We don't have an ETA on when cooling will be restored, however we are hopeful it will be by the end of the day.  &lt;br /&gt;
&lt;br /&gt;
'''May 27th, 2021, 12:30am:''' Cooling tower motor is not working properly and may need to be replaced.  Its the primary motor and the cooling system can not run without it, so at least until tomorrow all equipment at the datacenter will remain unavailable.  Updates about expected repair times will be posted when they are known.&lt;br /&gt;
&lt;br /&gt;
'''May 26th, 2021, 9:20pm:''' we are currently experiencing cooling issues at the SciNet data centre.  Updates will be posted as we determine the cause of the problem.&lt;br /&gt;
&lt;br /&gt;
'''From Tue Mar 30 at 12 noon EST to Thu Apr 1 at 12 noon EST,''' there will be a two-day reservation for the &amp;quot;Niagara at Scale&amp;quot; pilot  event.  During these 48 hours, only &amp;quot;Niagara at Scale&amp;quot; projects will run on the compute notes (as well as SOSCIP projects, on a subset of nodes).  All other users can still login, access their data, and submit jobs throughout this event, but the jobs will not run until after the event.  The debugjob queue will remain available to  everyone as well.&lt;br /&gt;
&lt;br /&gt;
The scheduler will not start batch jobs that cannot finish before the start of this event. Users can submit small and short jobs can take advantage of this, as the scheduler may be able to fit these jobs in before the event starts on the otherwise idle nodes.&lt;br /&gt;
&lt;br /&gt;
'''Tue 23 Mar 2021 12:19:07 PM EDT''' - Planned external network maintenance 12pm-1pm Tuesday, March 23rd. &lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 28 17:35:16 EST 2021:''' HPSS services are back online&lt;br /&gt;
&lt;br /&gt;
'''Thu Jan 28 12:36:21 EST 2021:''' HPSS services offline&lt;br /&gt;
&lt;br /&gt;
We need a small maintenance window as early as possible still this afternoon to perform a small change in configuration. Ongoing jobs will be allowed to finish, but we are keeping new submissions on hold on the queue.&lt;br /&gt;
&lt;br /&gt;
'''Mon Jan 25 13:16:33 EST 2021:''' HPSS services are back online&lt;br /&gt;
&lt;br /&gt;
'''Sat Jan 23 10:03:33 EST 2021:''' HPSS services offline&lt;br /&gt;
&lt;br /&gt;
We detected some type of hardware failure on our HPSS equipment overnight, so access has been disabled pending further investigation.&lt;br /&gt;
&lt;br /&gt;
'''Fri Jan 22 10:49:29 EST 2021:''' The Globus transition to oauth is finished&lt;br /&gt;
&lt;br /&gt;
Please deactivate any previous sessions to the niagara endpoint (in the last 7 days), and activate/login again. &lt;br /&gt;
&lt;br /&gt;
For more details check https://docs.scinet.utoronto.ca/index.php/Globus#computecandada.23niagara&lt;br /&gt;
&lt;br /&gt;
'''Jan 21, 2021:''' Globus access disruption on Fri, Jan/22/2021 10AM: Please be advised that we will have a maintenance window starting tomorrow at 10AM to roll out the transition of services to oauth based authentication.&lt;br /&gt;
&lt;br /&gt;
'''Jan 15, 2021:'''Globus access update on Mon, Jan/18/2021 and Tue, Jan/19/2021:&lt;br /&gt;
Please be advised we start preparations on Monday to perform update to Globus access on Tuesday. We'll be adopting oauth instead of myproxy from that point on. During this period expect sporadic disruptions of service. On Monday we'll already block access to nia-dm2, so please refrain from starting new login sessions or ssh tunnels via nia-dm2 from this weekend already.&lt;br /&gt;
&lt;br /&gt;
''' December 11,2020, 12:00 AM EST: ''' Cooling issue resolved. Systems back.&lt;br /&gt;
&lt;br /&gt;
''' December 11,2020, 6:00 PM EST: ''' Cooling issue at datacenter. All systems down.&lt;br /&gt;
&lt;br /&gt;
''' December 7, 2020, 7:25 PM EST: '''All systems back; users can log in again.&lt;br /&gt;
&lt;br /&gt;
''' December 7, 2020, 6:46 PM EST: '''User connectivity to data center not yet ready, but queued jobs on Mist and Niagara have been started.&lt;br /&gt;
 &lt;br /&gt;
''' December 7, 2020, 7:00 AM EST: '''Maintenance shutdown in effect. This is a one-day maintenance shutdown.  There will be no access to Niagara, Mist, HPSS or teach, nor to their file systems during this time.  We expect to be able to bring the systems back online this evening.&lt;br /&gt;
&lt;br /&gt;
''' December 2, 2020, 9:10 PM EST: '''Power is back, systems are coming up. Please resubmit any jobs that failed because of this incident.&lt;br /&gt;
&lt;br /&gt;
''' December 2, 2020, 6:00 PM EST: '''Power glitch at the data center, caused about half of the compute nodes to go down.  Power issue not yet resolved.&lt;br /&gt;
&lt;br /&gt;
'''&amp;lt;span style=&amp;quot;color:#dd1111&amp;quot;&amp;gt;Announcing a Maintenance Shutdown on December 7th, 2020&amp;lt;/span&amp;gt;''' &amp;lt;br/&amp;gt;There will be a one-day maintenance shutdown on December 7th 2020, starting at 7 am EST.  There will be no access to Niagara, Mist, HPSS or teach, nor to their file systems during this time.  We expect to be able to bring the systems back online in the evening of the same day.&lt;br /&gt;
&lt;br /&gt;
''' November 6, 2020, 8:00 PM EST: ''' Systems are coming back online.&lt;br /&gt;
&lt;br /&gt;
''' November 6, 2020, 9:49 AM EST: ''' Repairs on the cooling system are underway.  No ETA, but the systems will likely be back some time today.&lt;br /&gt;
&lt;br /&gt;
''' November 6, 2020, 4:27 AM EST: '''Cooling system failure, datacentre is shut down.&lt;br /&gt;
&lt;br /&gt;
''' October 9, 2020, 12:57 PM: ''' A short power glitch caused many of the Niagara compute nodes to lose power; jobs running on them would have failed. Please check your jobs and resubmit.&lt;br /&gt;
&lt;br /&gt;
''' October 8, 2020, 9:50 PM: ''' Jupyterhub service is back up.&lt;br /&gt;
&lt;br /&gt;
''' October 8, 2020, 5:40 PM: ''' Jupyterhub service is down. We are investigating.&lt;br /&gt;
&lt;br /&gt;
''' September 28, 2020, 11:00 AM EST: ''' A short power glitch caused many of the Niagara compute nodes to lose power; jobs running on them would have failed. Please check your jobs and resubmit.&lt;br /&gt;
&lt;br /&gt;
''' September 1, 2020, 2:15 PM EST: ''' A short power glitch caused about half of the Niagara compute nodes to lose power; jobs running on them would have failed. Please check your jobs and resubmit.&lt;br /&gt;
&lt;br /&gt;
''' September 1, 2020, 9:27 AM EST: ''' The Niagara cluster has moved to a new default software stack, NiaEnv/2019b.  If your job scripts used the previous default software stack before (NiaEnv/2018a), please put the command &amp;quot;module load NiaEnv/2018a&amp;quot; before other module commands in those scripts, to ensure they will continue to work, or try the new stack (recommended).&lt;br /&gt;
''' August 24, 2020, 7:37 PM EST: ''' Connectivity is back to normal&lt;br /&gt;
&lt;br /&gt;
''' August 24, 2020, 6:35 PM EST: ''' We have partial connectivity back, but are still investigating.&lt;br /&gt;
&lt;br /&gt;
''' August 24, 2020, 3:15 PM EST: ''' There are issues connecting to the data centre. We're investigating.&lt;br /&gt;
&lt;br /&gt;
''' August 21, 2020, 6:00 PM EST: ''' The pump has been repaired, cooling is restored, systems are up.  &amp;lt;br/&amp;gt;Scratch purging is postponed until the evening of Friday Aug 28th, 2020.&lt;br /&gt;
&lt;br /&gt;
'''August 19, 2020, 4:40 PM EST:''' Update: The current estimate is to have the cooling restored on Friday and we hope to have the systems available for users on Saturday August 22, 2020.&lt;br /&gt;
&lt;br /&gt;
'''August 17, 2020, 4:00 PM EST:''' Unfortunately after taking the pump apart it was determined there was a more serious failure of the main drive shaft, not just the seal. As a new one will need to be sourced or fabricated we're estimating that it will take at least a few more days to get the part and repairs done to restore cooling. Sorry for the inconvenience. &lt;br /&gt;
&lt;br /&gt;
'''August 15, 2020, 1:00 PM EST:''' Due to parts availablity to repair the failed pump and cooling system it is unlikely that systems will be able to be restored until Monday afternoon at the earliest. &lt;br /&gt;
&lt;br /&gt;
'''August 15, 2020, 00:04 AM EST:'''  A primary pump seal in the cooling infrastructure has blown and parts availability will not be able be determined until tomorrow. All systems are shut down as there is no cooling.  If parts are available, systems may be back at the earliest late tomorrow. Check here for updates.  &lt;br /&gt;
&lt;br /&gt;
'''August 14, 2020, 21:04 AM EST:''' Tomorrow's /scratch purge has been postponed.&lt;br /&gt;
&lt;br /&gt;
'''August 14, 2020, 21:00 AM EST:''' Staff at the datacenter. Looks like one of the pumps has a seal that is leaking badly.&lt;br /&gt;
&lt;br /&gt;
'''August 14, 2020, 20:37 AM EST:''' We seem to be undergoing a thermal shutdown at the datacenter.&lt;br /&gt;
&lt;br /&gt;
'''August 14, 2020, 20:20 AM EST:''' Network problems to niagara/mist. We are investigating.&lt;br /&gt;
 &lt;br /&gt;
'''August 13, 2020, 10:40 AM EST:''' Network is fixed, scheduler and other services are back.&lt;br /&gt;
&lt;br /&gt;
'''August 13, 2020, 8:20 AM EST:''' We had an IB switch failure, which is affecting a subset of nodes, including the scheduler nodes.&lt;br /&gt;
&lt;br /&gt;
'''August 10, 2020, 7:30 PM EST:''' Scheduler fully operational again.&lt;br /&gt;
&lt;br /&gt;
'''August 10, 2020, 3:00 PM EST:''' Scheduler partially functional: jobs can be submitted and are running.&lt;br /&gt;
&lt;br /&gt;
'''August 10, 2020, 2:00 PM EST:''' Scheduler is temporarily inoperational.&lt;br /&gt;
&lt;br /&gt;
'''August 7, 2020, 9:15 PM EST:''' Network is fixed, scheduler and other services are coming back.&lt;br /&gt;
&lt;br /&gt;
'''August 7, 2020, 8:20 PM EST:''' Disruption of part of the network in the data centre.  Causes issue with the scheduler, the mist login node, and possibly others. We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''July 30, 2020, 9:00 AM''' Project backup in progress but incomplete: please be aware that after we deployed the new, larger storage appliance for scratch and project two months ago, we started a full backup of project (1.5PB). This backup is taking a while to complete, and there are still a few areas which have not been backed up fully. Please be careful to not delete things from project that you still need, in particular if they are recently added material.&lt;br /&gt;
&lt;br /&gt;
'''July 27, 2020, 5:00 PM:''' Scheduler issues resolved.&lt;br /&gt;
&lt;br /&gt;
'''July 27, 2020, 3:00 PM:''' Scheduler issues. We are investigating.&lt;br /&gt;
&lt;br /&gt;
'''July 13, 4:40 PM:''' Most systems are available again. Only Mist is still being brought up.&lt;br /&gt;
&lt;br /&gt;
'''July 13, 10:00 AM:''' '''SciNet/Niagara Downtime In Progress'''&lt;br /&gt;
&lt;br /&gt;
'''SciNet/Niagara Downtime Announcement, July 13, 2020'''&amp;lt;br/&amp;gt;&lt;br /&gt;
All resources at SciNet will undergo a maintenance shutdown on Monday July 13, 2020, starting at 10:00 am EDT, for file system and scheduler upgrades.  There will be no access to any of the SciNet systems (Niagara, Mist, HPSS, Teach cluster, or the file systems) during this time.&lt;br /&gt;
We expect to be able to bring the systems back around 3 PM (EST) on the same day.&lt;br /&gt;
&lt;br /&gt;
''' June 29, 6:21:00  PM:''' Systems are available again.  &lt;br /&gt;
&lt;br /&gt;
''' June 29, 12:30:00  PM:''' Power Outage caused thermal shutdown.&lt;br /&gt;
&lt;br /&gt;
'''June 20, 2020, 10:24 PM:''' File systems are back up.  Unfortunately, all running jobs would have died and users are asked to resubmit them.&lt;br /&gt;
&lt;br /&gt;
'''June 20, 2020, 9:48 PM:''' An issue with the file systems is causing trouble.  We are investigating the cause.&lt;br /&gt;
&lt;br /&gt;
'''June 15, 2020, 10:30 PM:''' A '''power glitch''' caused some compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''June 12, 2020, 6:15 PM:''' Two '''power glitches''' during the night caused some compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''June 6, 2020, 6:06 AM:''' A '''power glitch''' caused some compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''May 24, 2020, 8:20 AM:''' A '''power glitch''' this morning caused all compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''May 7, 2020, 6:05 PM:''' Maintenance shutdown is finished.  Most systems are back in production.&lt;br /&gt;
&lt;br /&gt;
'''May 6, 2020, 7:08 AM:''' Two-day datacentre maintenance shutdown has started.&lt;br /&gt;
&lt;br /&gt;
''' SciNet/Niagara Downtime Announcement, May 6-7, 2020'''&lt;br /&gt;
&lt;br /&gt;
All resources at SciNet will undergo a two-day maintenance shutdown on May 6th and 7th 2020, starting at 7 am EDT on Wednesday May 6th.  There will be no access to any of the SciNet systems (Niagara, Mist, HPSS, Teach cluster, or the file systems) or systems hosted at the SciNet data centre.  We expect to be able to bring the systems back online the evening of May 7th.&lt;br /&gt;
&lt;br /&gt;
'''May 4, 2020, 7:51 AM:''' A power glitch this morning caused compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''May 3, 2020, 8:20 AM:''' A power glitch this morning caused all compute nodes to be rebooted: jobs running at the time may have failed; users are asked to resubmit these jobs.&lt;br /&gt;
&lt;br /&gt;
'''April 28, 2020, 7:20 AM:''' A power glitch this morning caused all compute nodes to be rebooted: jobs running at the time have failed; users are asked to resubmit these jobs.&lt;br /&gt;
 &lt;br /&gt;
'''April 20, 2020: Security Incident at Cedar; implications for Niagara users'''&lt;br /&gt;
&lt;br /&gt;
Last week, it became evident that the Cedar GP cluster had been&lt;br /&gt;
comprimised for several weeks.  The passwords of at least two&lt;br /&gt;
Compute Canada users were known to the attackers. One of these was&lt;br /&gt;
used to escalate privileges on Cedar, as explained on&lt;br /&gt;
https://status.computecanada.ca/view_incident?incident=423.&lt;br /&gt;
&lt;br /&gt;
These accounts were used to login to Niagara as well, but Niagara&lt;br /&gt;
did not have the same security loophole as Cedar (which has been&lt;br /&gt;
fixed), and no further escalation was observed on Niagara.&lt;br /&gt;
&lt;br /&gt;
Reassuring as that may sound, it is not known how the passwords of&lt;br /&gt;
the two user accounts were obtained. Given this uncertainty, the&lt;br /&gt;
SciNet team *strongly* recommends that you change your password on&lt;br /&gt;
https://ccdb.computecanada.ca/security/change_password, and remove&lt;br /&gt;
any SSH keys and regenerate new ones (see&lt;br /&gt;
https://docs.scinet.utoronto.ca/index.php/SSH_keys).&lt;br /&gt;
&lt;br /&gt;
''' Tue 30 Mar 2020 14:55:14 EDT'''  Burst Buffer available again.&lt;br /&gt;
&lt;br /&gt;
''' Fri Mar 27 15:29:00 EDT 2020:''' SciNet systems are back up. Only the Burst Buffer remains offline, its maintenance is expected to be finished early next week.&lt;br /&gt;
&lt;br /&gt;
''' Thu Mar 26 23:05:00 EDT 2020:'''  Some aspects of the maintenance took longer than expected. The systems will not be back up until some time tomorrow, Friday March 27, 2020.  &lt;br /&gt;
&lt;br /&gt;
''' Wed Mar 25 7:00:00 EDT 2020:'''  SciNet/Niagara downtime started.&lt;br /&gt;
&lt;br /&gt;
''' Mon Mar 23 18:45:10 EDT 2020:'''  File system issues were resolved.&lt;br /&gt;
&lt;br /&gt;
''' Mon Mar 23 18:01:19 EDT 2020:''' There is currently an issue with the main Niagara filesystems. This effects all systems, all jobs have been killed. The issue is being investigated. &lt;br /&gt;
&lt;br /&gt;
''' Fri Mar 20 13:15:33 EDT 2020: ''' There was a power glitch at the datacentre at 8:50 AM, which resulted in jobs getting killed.  Please resubmit failed jobs. &lt;br /&gt;
&lt;br /&gt;
''' COVID-19 Impact on SciNet Operations, March 18, 2020'''&lt;br /&gt;
&lt;br /&gt;
Although the University of Toronto is closing of some of its&lt;br /&gt;
research operations on Friday March 20 at 5 pm EDT, this does not&lt;br /&gt;
affect the SciNet systems (such as Niagara, Mist, and HPSS), which&lt;br /&gt;
will remain operational.&lt;br /&gt;
&lt;br /&gt;
''' SciNet/Niagara Downtime Announcement, March 25-26, 2020'''&lt;br /&gt;
&lt;br /&gt;
All resources at SciNet will undergo a two-day maintenance shutdown on March 25th and 26th 2020, starting at 7 am EDT on Wednesday March 25th.  There will be no access to any of the SciNet systems (Niagara, Mist, HPSS, Teach cluster, or the file systems) during this time.&lt;br /&gt;
&lt;br /&gt;
This shutdown is necessary to finish the expansion of the Niagara cluster and its storage system.&lt;br /&gt;
&lt;br /&gt;
We expect to be able to bring the systems back online the evening of March 26th.&lt;br /&gt;
&lt;br /&gt;
''' March 9, 2020, 11:24 PM:''' HPSS services are temporarily suspended for emergency maintenance.&lt;br /&gt;
&lt;br /&gt;
''' March 7, 2020, 10:15 PM:''' File system issues have been cleared.&lt;br /&gt;
&lt;br /&gt;
''' March 6, 2020, 7:30 PM:''' File system issues; we are investigating&lt;br /&gt;
&lt;br /&gt;
''' March 2, 2020, 1:30 PM:''' For the extension of Niagara, the operating system on all Niagara nodes has been upgraded&lt;br /&gt;
from CentOS 7.4 to 7.6.  This required all&lt;br /&gt;
nodes to be rebooted. Running compute jobs are allowed to finish&lt;br /&gt;
before the compute node gets rebooted. Login nodes have all been rebooted, as have the datamover nodes and the jupyterhub service.&lt;br /&gt;
&lt;br /&gt;
''' Feb 24, 2020, 1:30PM: ''' The [[Mist]] login node got rebooted.  It is back, but we are still monitoring the situation.&lt;br /&gt;
&lt;br /&gt;
''' Feb 12, 2020, 11:00AM: ''' The [[Mist]] GPU cluster now available to users.&lt;br /&gt;
&lt;br /&gt;
''' Feb 11, 2020, 2:00PM: ''' The Niagara compute nodes were accidentally rebooted, killing all running jobs.&lt;br /&gt;
&lt;br /&gt;
''' Feb 10, 2020, 19:00PM: ''' HPSS is back to normal.&lt;br /&gt;
&lt;br /&gt;
''' Jan 30, 2020, 12:01PM: ''' We are having an issue with HPSS, in which the disk-cache is full. We put a reservation on the whole system (Globus, plus archive and vfs queues), until it has had a chance to clear some space on the cache.&lt;br /&gt;
&lt;br /&gt;
''' Jan 21, 2020, 4:05PM: '''   The was a partial power outage the took down a large amount of the compute nodes.  If your job died during this period please resubmit.  &lt;br /&gt;
&lt;br /&gt;
'''Jan 13, 2020, 7:35 PM:''' Maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Jan 13, 2020, 8:20 AM:''' The announced maintenance downtime started (see below).&lt;br /&gt;
&lt;br /&gt;
'''Jan 9 2020, 11:30 AM:''' External ssh connectivity restored, issue related to the university network.&lt;br /&gt;
&lt;br /&gt;
'''Jan 9 2020, 9:24 AM:''' We received reports of users having trouble connecting into the SciNet data centre; we're investigating.  Systems are up and running and jobs are fine.&amp;lt;p&amp;gt;&lt;br /&gt;
As a work around, in the meantime, it appears to be possible to log into graham, cedar or beluga, and then ssh to niagara.&amp;lt;/p&amp;gt;&lt;br /&gt;
&lt;br /&gt;
'''Downtime announcement:'''&lt;br /&gt;
To prepare for the upcoming expansion of Niagara, there will be a&lt;br /&gt;
one-day maintenance shutdown on '''January 13th 2020, starting at 8 am&lt;br /&gt;
EST'''.  There will be no access to Niagara, Mist, HPSS or teach, nor&lt;br /&gt;
to their file systems during this time.&lt;br /&gt;
&lt;br /&gt;
2019&lt;br /&gt;
&lt;br /&gt;
'''December 13, 9:00 AM EST:''' Issues resolved.&lt;br /&gt;
&lt;br /&gt;
'''December 13, 8:20 AM EST:''' Overnight issue is now preventing logins to Niagara and other services. Possibly a file system issue, we are investigating.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;p&amp;gt; '''Fri, Nov 15 2019, 11:00 PM (EST)'''  Niagara and most of the main systems are now available. &lt;br /&gt;
&amp;lt;/p&amp;gt;&amp;lt;p&amp;gt; '''Fri, Nov 15 2019, 7:50 PM (EST)'''  SOSCIP GPU cluster is up and accessible.  Work on the other systems continues.&lt;br /&gt;
&amp;lt;/p&amp;gt;&amp;lt;p&amp;gt; '''Fri, Nov 15 2019, 5:00 PM (EST)'''  Infrastructure maintenance done, upgrades still in process.&lt;br /&gt;
&amp;lt;/p&amp;gt;&amp;lt;p&amp;gt;&lt;br /&gt;
'''Fri, Nov 15 2019, 7:00 AM (EST)'''  Maintenance shutdown of the SciNet data centre has started.  Note: scratch purging has been postponed until Nov 17.&amp;lt;br/&amp;gt; &lt;br /&gt;
&amp;lt;/p&amp;gt;&lt;br /&gt;
&amp;lt;p&amp;gt;&lt;br /&gt;
'''Announcement:''' &lt;br /&gt;
The SciNet datacentre will undergo a maintenance shutdown on&lt;br /&gt;
Friday November 15th 2019, from 7 am to 11 pm (EST), with no access&lt;br /&gt;
to any of the SciNet systems (Niagara, P8, SGC, HPSS, Teach cluster,&lt;br /&gt;
or the filesystems) during that time. &lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''Sat, Nov 2 2019, 1:30 PM (update):'''  Chiller has been fixed, all systems are operational.    &lt;br /&gt;
&amp;lt;/p&amp;gt;&lt;br /&gt;
'''Fri, Nov 1 2019, 4:30 PM (update):'''  We are operating in free cooling so have brought up about 1/2 of the Niagara compute nodes to reduce the cooling load.  Access, storage, and other systems should now be available.   &lt;br /&gt;
&lt;br /&gt;
'''Fri, Nov 1 2019, 12:05 PM (update):''' A power module in the chiller has failed and needs to be replaced.   We should be able to operate in free cooling if the temperature stays cold enough, but we may not be able to run all systems. No eta yet on when users will be able to log back in. &lt;br /&gt;
&lt;br /&gt;
'''Fri, Nov 1 2019, 9:15 AM (update):''' There was a automated shutdown because of rising temperatures, causing all systems to go down. We are investigating, check here for updates.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;p&amp;gt;'''Fri, Nov 1 2019, 8:16 AM:''' Unexpected data centre issue: Check here for updates.&lt;br /&gt;
&amp;lt;/p&amp;gt;&lt;br /&gt;
&lt;br /&gt;
''' Thu 1 Aug 2019 5:00:00 PM ''' Systems are up and operational.   &lt;br /&gt;
&lt;br /&gt;
'''Thu 1 Aug 2019 7:00:00 AM: ''' Scheduled Downtime Maintenance of the SciNet Datacenter.  All systems will be down and unavailable starting 7am until the evening. &lt;br /&gt;
&lt;br /&gt;
'''Fri 26 Jul 2019, 16:02:26 EDT:''' There was an issue with the Burst Buffer at around 3PM, and it was recently solved. BB is OK again.&lt;br /&gt;
&lt;br /&gt;
''' Sun 30 Jun 2019 ''' The '''SOSCIP BGQ''' and '''P7''' systems were decommissioned on '''June 30th, 2019'''.  The BGQdev front end node and storage are still available.  &lt;br /&gt;
&lt;br /&gt;
'''Wed 19 Jun 2018, 1:20:00 PM:''' The BGQ is back online.&lt;br /&gt;
&lt;br /&gt;
'''Wed 19 Jun 2018, 10:00:00 AM:''' The BGQ is still down, the SOSCIP GPU nodes should be back up. &lt;br /&gt;
&lt;br /&gt;
'''Wed 19 Jun 2018, 1:40:00 AM:''' There was an issue with the SOSCIP BGQ and GPU Cluster last night about 1:42am, probably a power fluctuation that took it down.  &lt;br /&gt;
&lt;br /&gt;
'''Wed 12 Jun 2019, 3:30 AM - 7:40 AM''' Intermittent system issues on Niagara's project and scratch as the file number limit was reached. We increased the number of files allowed in total on the file system. &lt;br /&gt;
&lt;br /&gt;
'''Thu 30 May 2019, 11:00:00 PM:'''&lt;br /&gt;
The maintenance downtime of SciNet's data center has finished, and systems are being brought online now.  You can check the progress here. Some systems might not be available until Friday morning.&amp;lt;br/&amp;gt;&lt;br /&gt;
Some action on the part of users will be required when they first connect again to a Niagara login nodes or datamovers.  This is due to the security upgrade of the Niagara cluster, which is now in line with currently accepted best practices.&amp;lt;br/&amp;gt;&lt;br /&gt;
The details of the required actions can be found on the [[SSH Changes in May 2019]] wiki page.&lt;br /&gt;
&lt;br /&gt;
'''Wed 29-30 May 2019''' The SciNet datacentre will undergo a two-day maintenance shutdown, starting at 7 am EDT on Wednesday May 29th.  There will be no access to any of the SciNet systems (Niagara, P7, P8, BGQ, SGC, HPSS, Teach cluster, or the file systems) during this time.&lt;br /&gt;
&lt;br /&gt;
'''SCHEDULED SHUTDOWN''': &lt;br /&gt;
&lt;br /&gt;
Please be advised that on '''Wednesday May 29th through Thursday May 30th''', the SciNet datacentre will undergo a two-day maintenance shutdown, starting at 7 am EDT on Wednesday May 29th.  There will be no access to any of the SciNet systems (Niagara, P7, P8, BGQ, SGC, HPSS, Teach cluster, or the file systems) during this time.&lt;br /&gt;
&lt;br /&gt;
This is necessary to finish the installation of an emergency power generator, to perform the annual cooling tower maintenance, and to enhance login security.&lt;br /&gt;
&lt;br /&gt;
We expect to be able to bring the systems back online the evening of May 30th.  Due to the enhanced login security, the ssh applications of users will need to update their known host list. More detailed information on this procedure will be sent shortly before the systems are back online.&lt;br /&gt;
&lt;br /&gt;
'''Fri 5 Apr 2019:''' Software updates on Niagara: The default CCEnv software stack now uses avx512 on Niagara, and there is now a NiaEnv/2019b stack (&amp;quot;epoch&amp;quot;). &lt;br /&gt;
&lt;br /&gt;
'''Thu 4 Apr 2019:''' The 2019 compute and storage allocations have taken effect on Niagara.&lt;br /&gt;
&lt;br /&gt;
'''NOTE''':  There is scheduled network maintenance for '''Friday April 26th 12am-8am''' on the Scinet datacenter external network connection.   This will not affect internal connections and running jobs however remote connections may see interruptions during this period.&lt;br /&gt;
&lt;br /&gt;
'''Wed 24 Apr 2019 14:14 EDT:''' HPSS is back on service. Library and robot arm maintenance finished.&lt;br /&gt;
&lt;br /&gt;
'''Wed 24 Apr 2019 08:35 EDT:''' HPSS out of service this morning for library and robot arm maintenance.&lt;br /&gt;
&lt;br /&gt;
'''Fri 19 Apr 2019 17:40 EDT:''' HPSS robot arm has been released and is back to normal operations.&lt;br /&gt;
&lt;br /&gt;
'''Fri 19 Apr 2019 14:00 EDT:''' problems with HPPS library robot have been detected.&lt;br /&gt;
&lt;br /&gt;
'''Wed 17 Apr 2019 15:35 EDT:''' Network connection is back.&lt;br /&gt;
&lt;br /&gt;
'''Wed 17 Apr 2019 15:12 EDT:''' Network connection down.  Investigating.&lt;br /&gt;
&lt;br /&gt;
'''Tue 9 Apr 2019 22:24:14 EDT:'''  Network connection restored.&lt;br /&gt;
&lt;br /&gt;
'''Tue 9 Apr 2019, 15:20:''' Network connection down.  Investigating.&lt;br /&gt;
&lt;br /&gt;
'''Fri 5 Apr 2019:''' Planned, short outage in connectivity to the SciNet datacentre from 7:30 am to 8:55 am EST for maintenance of the network.  This outage will not affect running or queued jobs. It may be necessary to reboot the login nodes at some point tomorrow, which could result in a short interruption of connectivity, but which will have no effect on running or queued jobs.&lt;br /&gt;
&lt;br /&gt;
'''April 4, 2019:'''  The 2019 compute and storage allocations will take effect on Niagara. Running jobs will not be affected by this change and will run their course.  Queued jobs' priorities will be updated to reflect the new fairshare values later in the day.  The queue should fully reflect the new fairshare values in about 24 hours.   &lt;br /&gt;
&lt;br /&gt;
It may be necessary to reboot the login nodes at some point tomorrow, which could result in a short interruption of connectivity, but which will have no effect on running or queued jobs.&lt;br /&gt;
&lt;br /&gt;
There will be updates to the software stack on this day as well.&lt;br /&gt;
&lt;br /&gt;
'''March 25, 3:05 PM EST:'''  Most systems back online, other services should be back shortly. &lt;br /&gt;
&lt;br /&gt;
'''March 25, 12:05 PM EST:''' Power is back at the datacentre, but it is not yet known when all systems will be back up.  Keep checking here for updates.&lt;br /&gt;
&lt;br /&gt;
'''March 25, 11:27 AM EST:''' A power outage in the datacentre occured and caused all services to go down.  Check here for updates.&lt;br /&gt;
&lt;br /&gt;
'''Thu Mar 21 10:37:28 EDT 2019:''' HPSS is back in service&lt;br /&gt;
&lt;br /&gt;
HPSS out of service on '''Tue, Mar/19 at 9AM''', for tape library expansion and relocation. It's possible the downtime will extend to Wed, Mar/20.&lt;br /&gt;
&lt;br /&gt;
'''January 21, 4:00 PM''': HPSS is back in service. Thank you for your patience.&lt;br /&gt;
&lt;br /&gt;
'''January 18, 5:00 PM''': We did practically all of the HPSS upgrades (software/hardware), however the main client node - archive02 - is presenting an issue we just couldn't resolve yet. We will try to resume work over the weekend with cool heads, or on Monday. Sorry, but this is an unforeseen delay. Jobs on the queue we'll remain there, and we'll delay the scratch purging by 1 week.&lt;br /&gt;
&lt;br /&gt;
'''January 16, 11:00 PM''': HPSS is being upgraded, as announced.&lt;br /&gt;
&lt;br /&gt;
'''January 16, 8:00 PM''': System are coming back up and should be accessible for users now.&lt;br /&gt;
&lt;br /&gt;
'''January 15, 8:00 AM''': Data centre downtime in effect.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;font color=red&amp;gt;&amp;lt;b&amp;gt;Downtime Announcement for January 15 and 16, 2019&amp;lt;/b&amp;gt;&amp;lt;/font&amp;gt;&amp;lt;br&amp;gt;&lt;br /&gt;
The SciNet datacentre will need to undergo a two-day maintenance shutdown in order to perform electrical work, repairs and maintenance.  The electrical work is in preparation for the upcoming installation of an emergency power generator and a larger UPS, which will result in increased resilience to power glitches and outages.  The shutdown is scheduled to start on '''Tuesday January 15, 2019, at 7 am''' and will last until '''Wednesday 16, 2019''', some time in the evening. There will be no access to any of the SciNet systems (Niagara, P7, P8, BGQ, SGC, HPSS, Teach cluster, or the filesystems) during this time.&lt;br /&gt;
Check back here for up-to-date information on the status of the systems.&lt;br /&gt;
&lt;br /&gt;
Note: this downtime was originally scheduled for Dec. 18, 2018, but has been postponed and combined with the annual maintenance downtime.&lt;br /&gt;
&lt;br /&gt;
'''December 24, 2018, 11:35 AM EST:''' Most systems are operational again. If you had compute jobs running yesterday at around 3:30PM, they likely crashed - please check them and resubmit if needed.&lt;br /&gt;
&lt;br /&gt;
'''December 24, 2018, 10:40 AM EST:''' Repairs have been made, and the file systems are starting to be mounted on the cluster. &lt;br /&gt;
&lt;br /&gt;
'''December 23, 2018, 3:38 PM EST:''' Issues with the file systems (home, scratch and project). We are investigating, it looks like a hardware issue that we are trying to work around. Note that the absence of /home means you cannot log in with ssh keys. All compute jobs crashed around 3:30 PM EST on Dec 23. Once the system is properly up again, please resubmit your jobs.  Unfortunately, at this time of year, it is not possible to give an estimate on when the system will be operational again.&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 22 14:20:00 EDT 2018''': &amp;lt;font color=green&amp;gt;HPSS back in service&amp;lt;/font&amp;gt;&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 22 08:55:00 EDT 2018''': &amp;lt;font color=red&amp;gt;HPSS offline for scheduled maintenance&amp;lt;/font&amp;gt;&lt;br /&gt;
&lt;br /&gt;
'''Tue Nov 20 16:30:00 EDT 2018''':  HPSS offline on Thursday 9AM for installation of new LTO8 drives in the tape library.&lt;br /&gt;
&lt;br /&gt;
'''Tue Oct  9 12:16:00 EDT 2018''':  BGQ compute nodes are up.  &lt;br /&gt;
&lt;br /&gt;
'''Sun Oct  7 20:24:26 EDT 2018''':  SGC and BGQ front end are available,  BGQ compute nodes down related to a cooling issue.  &lt;br /&gt;
&lt;br /&gt;
'''Sat Oct  6 23:16:44 EDT 2018''':  There were some problems bringing up SGC &amp;amp; BGQ, they will remain offline for now.&lt;br /&gt;
&lt;br /&gt;
'''Sat Oct  6 18:36:35 EDT 2018''':  Electrical work finished, power restored. Systems are coming online.&lt;br /&gt;
&lt;br /&gt;
'''July 18, 2018:''' login.scinet.utoronto.ca is now disabled, GPC $SCRATCH and $HOME are decommissioned.&lt;br /&gt;
&lt;br /&gt;
'''July 12, 2018:''' There was a short power interruption around 10:30 am which caused most of the systems (Niagara, SGC, BGQ) to reboot and any running jobs to fail. &lt;br /&gt;
&lt;br /&gt;
'''July 11, 2018:''' P7's moved to BGQ filesystem, P8's moved to Niagara filesystem.&lt;br /&gt;
&lt;br /&gt;
'''May 24, 2018, 9:25 PM EST:''' The data center is up, and all systems are operational again.&lt;br /&gt;
&lt;br /&gt;
'''May 24, 2018, 7:00 AM EST:''' The data centre is under annual maintenance. All systems are offline. Systems are expected to be back late afternoon today; check for updates on this page.&lt;br /&gt;
&lt;br /&gt;
'''May 18, 2018:''' Announcement: Annual scheduled maintenance downtime: Thursday May 24, starting 7:00 AM&lt;br /&gt;
&lt;br /&gt;
'''May 16, 2018:''' Cooling  restored, systems online&lt;br /&gt;
&lt;br /&gt;
'''May 16, 2018:''' Cooling issue at datacentre again, all systems down&lt;br /&gt;
&lt;br /&gt;
'''May 15, 2018:''' Cooling restored, systems coming online&lt;br /&gt;
&lt;br /&gt;
'''May 15, 2018''' Cooling issue at datacentre, all systems down&lt;br /&gt;
&lt;br /&gt;
'''May 4, 2018:''' [[HPSS]] is now operational on Niagara.&lt;br /&gt;
&lt;br /&gt;
'''May 3, 2018:''' [[Burst Buffer]] is available upon request.&lt;br /&gt;
&lt;br /&gt;
'''May 3, 2018:''' The [https://docs.computecanada.ca/wiki/Globus Globus] endpoint for Niagara is available: computecanada#niagara.&lt;br /&gt;
&lt;br /&gt;
'''May 1, 2018:''' System status moved he here.&lt;br /&gt;
&lt;br /&gt;
'''Apr 23, 2018:''' GPC-compute is decommissioned, GPC-storage available until 30 May 2018.&lt;br /&gt;
&lt;br /&gt;
'''April 10, 2018:''' Niagara commissioned.&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6527</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6527"/>
		<updated>2025-04-08T21:00:42Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|{{Down   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''April 8, 2025 5PM:''' HPSS is being reserved for OS updates on April 9 (Wednesday).&lt;br /&gt;
&lt;br /&gt;
'''April 1, 2025:''' The Jupyter Hub has been replaced by SciNet's [[Open OnDemand Quickstart|Open OnDemand service]].&lt;br /&gt;
&lt;br /&gt;
'''March 31, 2025 3:20 pm EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''March 31, 2025 2:45 pm EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''March 28, 2025 3:00 pm - 4:00 pm EDT:''' A short maintenance was needed for the Teach compute nodes; you might have experienced some job scheduling delays on that cluster. &lt;br /&gt;
&lt;br /&gt;
'''March 20, 2025 10:30 am EDT:''' Teach compute nodes are back. &lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 11:00 pm EDT:''' Teach compute nodes are down again. &lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 5:15pm EDT:''' Maintenance of the cooling system was performed successfully. The cluster is back on line&lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 8:00 am - 5:00 pm EDT:''' Maintenance of the cooling system as well as preparations for the Trillium cluster will require a shutdown of the compute nodes of all SciNet systems (Niagara, Mist, Rouge, Balam, Teach, as well as hosted equipment). The login nodes, file systems and the HPSS system will remain available. The scheduler will hold jobs that are submitted until the maintenance has finished.&lt;br /&gt;
&lt;br /&gt;
'''March 18, 2025 10:00 am EDT:''' Teach compute nodes are back.&lt;br /&gt;
&lt;br /&gt;
'''March 17, 2025 10:00 pm EDT:''' Teach compute nodes are down. We are working on it. &lt;br /&gt;
&lt;br /&gt;
'''March 01, 2025 9:00 pm EST:''' As of March 1st scratch purging is suspended until after Trillium comes online.&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there is now a permanent reduction in computing capacity of Niagara to 50% and of Mist to 35%.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6524</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6524"/>
		<updated>2025-04-08T20:58:31Z</updated>

		<summary type="html">&lt;p&gt;Pinto: &lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | OnDemand|Open_OnDemand_Quickstart}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|{{Down   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''April 8, 2025:''' HPSS is being reserved for OS updates on April 9 (Wednesday).&lt;br /&gt;
&lt;br /&gt;
'''April 1, 2025:''' The Jupyter Hub has been replaced by SciNet's [[Open OnDemand Quickstart|Open OnDemand service]].&lt;br /&gt;
&lt;br /&gt;
'''March 31, 2025 3:20 pm EDT:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''March 31, 2025 2:45 pm EDT:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''March 28, 2025 3:00 pm - 4:00 pm EDT:''' A short maintenance was needed for the Teach compute nodes; you might have experienced some job scheduling delays on that cluster. &lt;br /&gt;
&lt;br /&gt;
'''March 20, 2025 10:30 am EDT:''' Teach compute nodes are back. &lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 11:00 pm EDT:''' Teach compute nodes are down again. &lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 5:15pm EDT:''' Maintenance of the cooling system was performed successfully. The cluster is back on line&lt;br /&gt;
&lt;br /&gt;
'''March 19, 2025 8:00 am - 5:00 pm EDT:''' Maintenance of the cooling system as well as preparations for the Trillium cluster will require a shutdown of the compute nodes of all SciNet systems (Niagara, Mist, Rouge, Balam, Teach, as well as hosted equipment). The login nodes, file systems and the HPSS system will remain available. The scheduler will hold jobs that are submitted until the maintenance has finished.&lt;br /&gt;
&lt;br /&gt;
'''March 18, 2025 10:00 am EDT:''' Teach compute nodes are back.&lt;br /&gt;
&lt;br /&gt;
'''March 17, 2025 10:00 pm EDT:''' Teach compute nodes are down. We are working on it. &lt;br /&gt;
&lt;br /&gt;
'''March 01, 2025 9:00 pm EST:''' As of March 1st scratch purging is suspended until after Trillium comes online.&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there is now a permanent reduction in computing capacity of Niagara to 50% and of Mist to 35%.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6419</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6419"/>
		<updated>2025-03-01T14:38:31Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''March 01, 2025 9:00 pm EST:''' As of March 1st scratch purging is suspended until after Trillium comes online.&lt;br /&gt;
&lt;br /&gt;
'''February 27, 2025 9:00 pm EST:''' Access to HPSS via Globus has been restored.&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 2:30 pm EST:''' Access to HPSS via Globus is currently suspended (sorry, trivial upgrade has gone wrong).&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 12:30 pm EST:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 11:50 am EST:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''February 7, 2025 2:45 pm EST:''' Systems are back online.&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb  7 01:04:33 EST 2025:''' There has been a problem with the water chiller. Automatic thermal shutdown of the compute nodes&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 11:45 am EST:''' Power is back.&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 6:00 am EST:''' Power outage in the data center. Many compute jobs will have stopped. Until power gets restored, parts of the systems are  running on the generator. No ETA on full power restoration.&lt;br /&gt;
 &lt;br /&gt;
'''January 28, 2025 9:30 pm EST:''' The CCEnv stack has been restored.&lt;br /&gt;
&lt;br /&gt;
'''January 28, 2025 5:00 pm EST:''' The CCEnv stack from cvmfs has issues and may not work reliable.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6416</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6416"/>
		<updated>2025-02-28T02:02:50Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''February 27, 2025 9:00 pm EST:''' Access to HPSS via Globus has been restored.&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 2:30 pm EST:''' Access to HPSS via Globus is currently suspended (sorry, trivial upgrade has gone wrong).&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 12:30 pm EST:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 11:50 am EST:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''February 7, 2025 2:45 pm EST:''' Systems are back online.&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb  7 01:04:33 EST 2025:''' There has been a problem with the water chiller. Automatic thermal shutdown of the compute nodes&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 11:45 am EST:''' Power is back.&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 6:00 am EST:''' Power outage in the data center. Many compute jobs will have stopped. Until power gets restored, parts of the systems are  running on the generator. No ETA on full power restoration.&lt;br /&gt;
 &lt;br /&gt;
'''January 28, 2025 9:30 pm EST:''' The CCEnv stack has been restored.&lt;br /&gt;
&lt;br /&gt;
'''January 28, 2025 5:00 pm EST:''' The CCEnv stack from cvmfs has issues and may not work reliable.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6413</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6413"/>
		<updated>2025-02-25T19:39:49Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Partial   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 2:30 pm EST:''' Access to HPSS via Globus is currently suspended (sorry, trivial upgrade has gone wrong).&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 12:30 pm EST:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 11:50 am EST:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''February 7, 2025 2:45 pm EST:''' Systems are back online.&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb  7 01:04:33 EST 2025:''' There has been a problem with the water chiller. Automatic thermal shutdown of the compute nodes&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 11:45 am EST:''' Power is back.&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 6:00 am EST:''' Power outage in the data center. Many compute jobs will have stopped. Until power gets restored, parts of the systems are  running on the generator. No ETA on full power restoration.&lt;br /&gt;
 &lt;br /&gt;
'''January 28, 2025 9:30 pm EST:''' The CCEnv stack has been restored.&lt;br /&gt;
&lt;br /&gt;
'''January 28, 2025 5:00 pm EST:''' The CCEnv stack from cvmfs has issues and may not work reliable.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6410</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6410"/>
		<updated>2025-02-25T19:39:26Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial   | Mist|Mist}}&lt;br /&gt;
|{{Up   |Teach|Teach}}&lt;br /&gt;
|{{Up   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Partial   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 2:30 pm EST:''' Access to HPSS via Globus is currently suspended (sorry, trivial upgrade had gone wrong).&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 12:30 pm EST:''' Mist login node is accessible again.&lt;br /&gt;
&lt;br /&gt;
'''February 25, 2025 11:50 am EST:''' Mist login node is under maintenance and temporarily inaccessible to users.&lt;br /&gt;
&lt;br /&gt;
'''February 7, 2025 2:45 pm EST:''' Systems are back online.&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb  7 01:04:33 EST 2025:''' There has been a problem with the water chiller. Automatic thermal shutdown of the compute nodes&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 11:45 am EST:''' Power is back.&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 6:00 am EST:''' Power outage in the data center. Many compute jobs will have stopped. Until power gets restored, parts of the systems are  running on the generator. No ETA on full power restoration.&lt;br /&gt;
 &lt;br /&gt;
'''January 28, 2025 9:30 pm EST:''' The CCEnv stack has been restored.&lt;br /&gt;
&lt;br /&gt;
'''January 28, 2025 5:00 pm EST:''' The CCEnv stack from cvmfs has issues and may not work reliable.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6392</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6392"/>
		<updated>2025-02-07T17:37:32Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Down   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down   | Mist|Mist}}&lt;br /&gt;
|{{Down   |Teach|Teach}}&lt;br /&gt;
|{{Down   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|{{Up   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down   |Balam|Balam}}&lt;br /&gt;
|{{Up   |CCEnv|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb  7 01:04:33 EST 2025:''' There has been a problem with the water chiller. Automatic thermal shutdown of the compute nodes&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 11:45 am EST:''' Power is back.&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 6:00 am EST:''' Power outage in the data center. Many compute jobs will have stopped. Until power gets restored, parts of the systems are  running on the generator. No ETA on full power restoration.&lt;br /&gt;
 &lt;br /&gt;
'''January 28, 2025 9:30 pm EST:''' The CCEnv stack has been restored.&lt;br /&gt;
&lt;br /&gt;
'''January 28, 2025 5:00 pm EST:''' The CCEnv stack from cvmfs has issues and may not work reliable.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6386</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6386"/>
		<updated>2025-02-07T06:17:03Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Down   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down   | Mist|Mist}}&lt;br /&gt;
|{{Down   |Teach|Teach}}&lt;br /&gt;
|{{Down   |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down   | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|{{Down   | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Down   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Down   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down   | HPSS|HPSS}}&lt;br /&gt;
|{{Down   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Down   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Down   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down   |Balam|Balam}}&lt;br /&gt;
|{{Down   |CCEnv|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb  7 01:04:33 EST 2025:''' There has been a problem with the water chiller. Automatic thermal shutdown of the compute nodes&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 11:45 am EST:''' Power is back.&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 6:00 am EST:''' Power outage in the data center. Many compute jobs will have stopped. Until power gets restored, parts of the systems are  running on the generator. No ETA on full power restoration.&lt;br /&gt;
 &lt;br /&gt;
'''January 28, 2025 9:30 pm EST:''' The CCEnv stack has been restored.&lt;br /&gt;
&lt;br /&gt;
'''January 28, 2025 5:00 pm EST:''' The CCEnv stack from cvmfs has issues and may not work reliable.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6383</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6383"/>
		<updated>2025-02-07T06:15:54Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial | Mist|Mist}}&lt;br /&gt;
|{{Partial |Teach|Teach}}&lt;br /&gt;
|{{Up |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|{{Up | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up |Balam|Balam}}&lt;br /&gt;
|{{Up |CCEnv|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''Fri Feb  7 01:04:33 EST 2025:''' There has been a problem with the water chiller. Automatic thermal shutdown of the compute nodes&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 11:45 am EST:''' Power is back.&lt;br /&gt;
&lt;br /&gt;
'''January 31, 2025 6:00 am EST:''' Power outage in the data center. Many compute jobs will have stopped. Until power gets restored, parts of the systems are  running on the generator. No ETA on full power restoration.&lt;br /&gt;
 &lt;br /&gt;
'''January 28, 2025 9:30 pm EST:''' The CCEnv stack has been restored.&lt;br /&gt;
&lt;br /&gt;
'''January 28, 2025 5:00 pm EST:''' The CCEnv stack from cvmfs has issues and may not work reliable.&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6269</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6269"/>
		<updated>2025-01-09T16:06:24Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Up   | Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Down | Mist|Mist}}&lt;br /&gt;
|{{Down |Teach|Teach}}&lt;br /&gt;
|{{Down |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down | Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|{{Down | Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up   | File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up   | Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up   | HPSS|HPSS}}&lt;br /&gt;
|{{Up   | Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up   | Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down |Balam|Balam}}&lt;br /&gt;
|{{Down |CCEnv|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
'''January 9, 2025 11:00 am EST:''' Systems are back online&lt;br /&gt;
&lt;br /&gt;
'''January 8, 2025 10:34 pm EST:''' We had some sort of thermal event at the datacenter, and the clusters are down. We're still investigating&lt;br /&gt;
&lt;br /&gt;
'''January 8, 2025 08:00 am EST:''' Balam, Rouge and Neptune are shutdown for electrical upgrades&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there will be a (permanent) reduction in computing capacity of Niagara and Mist. Only 50% of Niagara and 35% of Mist will remain active after January 6th.  The reduction will require Mist to be shutdown for a few hours on January 6th. Balam, Rouge and Neptune will be shutdown on Wednesday January 8th for the same reason.&lt;br /&gt;
&lt;br /&gt;
'''December 20, 2024 09:00 am EST:''' OpenOnDemand service will not be available on Dec 20 from 9 a.m. to 5 p.m. due to scheduled maintenance.&lt;br /&gt;
&lt;br /&gt;
'''December 16, 2024, 08:21 am EST:''' The Niagara scheduler has been restarted.&lt;br /&gt;
  &lt;br /&gt;
'''December 16, 2024, 00:04 am EST:''' The Niagara scheduler has an issue; we are investigating.&lt;br /&gt;
  &lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6263</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6263"/>
		<updated>2025-01-09T03:51:19Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial |Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial |Mist|Mist}}&lt;br /&gt;
|{{Down |Teach|Teach}}&lt;br /&gt;
|{{Down |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up |Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|{{Up |Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up |File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up |Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up |HPSS|HPSS}}&lt;br /&gt;
|{{Up |Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up |External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up |Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down |Balam|Balam}}&lt;br /&gt;
|{{Up |CCEnv|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''January 8, 2025 10:34 pm EST:''' We had some sort of thermal event at the datacenter, and the cluster is down. We're still investigating&lt;br /&gt;
&lt;br /&gt;
'''January 8, 2025 08:00 am EST:''' Balam, Rouge and Neptune are shutdown for electrical upgrades&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there will be a (permanent) reduction in computing capacity of Niagara and Mist. Only 50% of Niagara and 35% of Mist will remain active after January 6th.  The reduction will require Mist to be shutdown for a few hours on January 6th. Balam, Rouge and Neptune will be shutdown on Wednesday January 8th for the same reason.&lt;br /&gt;
&lt;br /&gt;
'''December 20, 2024 09:00 am EST:''' OpenOnDemand service will not be available on Dec 20 from 9 a.m. to 5 p.m. due to scheduled maintenance.&lt;br /&gt;
&lt;br /&gt;
'''December 16, 2024, 08:21 am EST:''' The Niagara scheduler has been restarted.&lt;br /&gt;
  &lt;br /&gt;
'''December 16, 2024, 00:04 am EST:''' The Niagara scheduler has an issue; we are investigating.&lt;br /&gt;
  &lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
	<entry>
		<id>https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6260</id>
		<title>Main Page</title>
		<link rel="alternate" type="text/html" href="https://docs.scinet.utoronto.ca/index.php?title=Main_Page&amp;diff=6260"/>
		<updated>2025-01-09T03:50:35Z</updated>

		<summary type="html">&lt;p&gt;Pinto: /* System Status */&lt;/p&gt;
&lt;hr /&gt;
&lt;div&gt;__NOTOC__&lt;br /&gt;
{| style=&amp;quot;border-spacing:10px; width: 95%&amp;quot;&lt;br /&gt;
| style=&amp;quot;padding:1em; padding-top:.1em; border:2px solid #0645ad; background-color:#f6f6f6; border-radius:7px&amp;quot;|&lt;br /&gt;
&lt;br /&gt;
==System Status==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;!-- Use &amp;quot;Up&amp;quot;, &amp;quot;Partial&amp;quot; or &amp;quot;Down&amp;quot;; these are templates. --&amp;gt;&lt;br /&gt;
{|style=&amp;quot;width:100%&amp;quot; &lt;br /&gt;
|{{Partial |Niagara|Niagara_Quickstart}}&lt;br /&gt;
|{{Partial |Mist|Mist}}&lt;br /&gt;
|{{Down |Teach|Teach}}&lt;br /&gt;
|{{Down |Rouge|Rouge}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up |Jupyter Hub|Jupyter_Hub}}&lt;br /&gt;
|{{Up |Scheduler|Niagara_Quickstart#Submitting_jobs}}&lt;br /&gt;
|{{Up |File system|Niagara_Quickstart#Storage_and_quotas}}&lt;br /&gt;
|{{Up |Burst Buffer|Burst_Buffer}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Up |HPSS|HPSS}}&lt;br /&gt;
|{{Up |Login Nodes|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up |External Network|Niagara_Quickstart#Logging_in}} &lt;br /&gt;
|{{Up |Globus |Globus}}&lt;br /&gt;
|-&lt;br /&gt;
|{{Down |Balam|Balam}}&lt;br /&gt;
|{{Up |CCEnv|Using_modules}}&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
'''January 8, 2025 10:34 PMEST:''' We had some sort of thermal event at the datacenter, and the cluster is down. We're still investigating&lt;br /&gt;
&lt;br /&gt;
'''January 8, 2025 08:00 am EST:''' Balam, Rouge and Neptune are shutdown for electrical upgrades&lt;br /&gt;
&lt;br /&gt;
'''January 6, 2025:''' As part of the installation of the new computing cluster Trillium, there will be a (permanent) reduction in computing capacity of Niagara and Mist. Only 50% of Niagara and 35% of Mist will remain active after January 6th.  The reduction will require Mist to be shutdown for a few hours on January 6th. Balam, Rouge and Neptune will be shutdown on Wednesday January 8th for the same reason.&lt;br /&gt;
&lt;br /&gt;
'''December 20, 2024 09:00 am EST:''' OpenOnDemand service will not be available on Dec 20 from 9 a.m. to 5 p.m. due to scheduled maintenance.&lt;br /&gt;
&lt;br /&gt;
'''December 16, 2024, 08:21 am EST:''' The Niagara scheduler has been restarted.&lt;br /&gt;
  &lt;br /&gt;
'''December 16, 2024, 00:04 am EST:''' The Niagara scheduler has an issue; we are investigating.&lt;br /&gt;
  &lt;br /&gt;
&amp;lt;!--  When removing system status entries, please archive them to: --&amp;gt;&lt;br /&gt;
[[Previous messages]]&lt;br /&gt;
&lt;br /&gt;
{|style=&amp;quot;border-spacing: 10px;width: 100%&amp;quot;&lt;br /&gt;
|valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== QuickStart Guides ==&lt;br /&gt;
* [[Niagara Quickstart]]&lt;br /&gt;
* [[HPSS | HPSS archival storage]]&lt;br /&gt;
* [[Mist| Mist Power 9 GPU cluster]]&lt;br /&gt;
* [[Teach|Teach cluster]]&lt;br /&gt;
* [[FAQ | FAQ (frequently asked questions)]]&lt;br /&gt;
* [[Acknowledging SciNet]]&lt;br /&gt;
| valign=&amp;quot;top&amp;quot; style=&amp;quot;margin: 1em; padding:1em; padding-top:.1em; border:2px solid #000; background-color:#fff; border-radius:7px; width: 49.5%&amp;quot; |&lt;br /&gt;
&lt;br /&gt;
== Tutorials, Manuals, etc. ==&lt;br /&gt;
* [https://education.scinet.utoronto.ca SciNet education material]&lt;br /&gt;
* [https://www.youtube.com/c/SciNetHPCattheUniversityofToronto SciNet's YouTube channel]&lt;br /&gt;
* [[Modules specific to Niagara|Software Modules specific to Niagara]] &lt;br /&gt;
* [[Modules for Mist]] &lt;br /&gt;
* [[Commercial software]]&lt;br /&gt;
* [[Burst Buffer]]&lt;br /&gt;
* [[SSH#SSH Keys|SSH keys]]&lt;br /&gt;
* [[SSH Tunneling]]&lt;br /&gt;
* [[Visualization]]&lt;br /&gt;
* [[Running Serial Jobs on Niagara]]&lt;br /&gt;
* [[Jupyter Hub]]&lt;br /&gt;
|}&lt;/div&gt;</summary>
		<author><name>Pinto</name></author>
	</entry>
</feed>